1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
58 /* For lang_hooks.types.type_for_mode. */
59 #include "langhooks.h"
61 /* Return the vectorized type for the given statement. */
64 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
66 return STMT_VINFO_VECTYPE (stmt_info
);
69 /* Return TRUE iff the given statement is in an inner loop relative to
70 the loop being vectorized. */
72 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
74 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
75 basic_block bb
= gimple_bb (stmt
);
76 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
82 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
84 return (bb
->loop_father
== loop
->inner
);
87 /* Record the cost of a statement, either by directly informing the
88 target model or by saving it in a vector for later processing.
89 Return a preliminary estimate of the statement's cost. */
92 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
93 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
94 int misalign
, enum vect_cost_model_location where
)
96 if ((kind
== vector_load
|| kind
== unaligned_load
)
97 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
98 kind
= vector_gather_load
;
99 if ((kind
== vector_store
|| kind
== unaligned_store
)
100 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
101 kind
= vector_scatter_store
;
103 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, misalign
};
104 body_cost_vec
->safe_push (si
);
106 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
108 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
111 /* Return a variable of type ELEM_TYPE[NELEMS]. */
114 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
116 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
120 /* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT_INFO and the vector is associated
123 with scalar destination SCALAR_DEST. */
126 read_vector_array (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
127 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
129 tree vect_type
, vect
, vect_name
, array_ref
;
132 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
133 vect_type
= TREE_TYPE (TREE_TYPE (array
));
134 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
135 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
136 build_int_cst (size_type_node
, n
),
137 NULL_TREE
, NULL_TREE
);
139 new_stmt
= gimple_build_assign (vect
, array_ref
);
140 vect_name
= make_ssa_name (vect
, new_stmt
);
141 gimple_assign_set_lhs (new_stmt
, vect_name
);
142 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
147 /* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT_INFO. */
152 write_vector_array (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
153 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
158 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
159 build_int_cst (size_type_node
, n
),
160 NULL_TREE
, NULL_TREE
);
162 new_stmt
= gimple_build_assign (array_ref
, vect
);
163 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
166 /* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
171 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
175 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
176 /* Arrays have the same alignment as their type. */
177 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
181 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
182 Emit the clobber before *GSI. */
185 vect_clobber_variable (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
188 tree clobber
= build_clobber (TREE_TYPE (var
));
189 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
190 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
193 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
195 /* Function vect_mark_relevant.
197 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
200 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
201 enum vect_relevant relevant
, bool live_p
)
203 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
204 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
206 if (dump_enabled_p ())
207 dump_printf_loc (MSG_NOTE
, vect_location
,
208 "mark relevant %d, live %d: %G", relevant
, live_p
,
211 /* If this stmt is an original stmt in a pattern, we might need to mark its
212 related pattern stmt instead of the original stmt. However, such stmts
213 may have their own uses that are not in any pattern, in such cases the
214 stmt itself should be marked. */
215 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
217 /* This is the last stmt in a sequence that was detected as a
218 pattern that can potentially be vectorized. Don't mark the stmt
219 as relevant/live because it's not going to be vectorized.
220 Instead mark the pattern-stmt that replaces it. */
222 if (dump_enabled_p ())
223 dump_printf_loc (MSG_NOTE
, vect_location
,
224 "last stmt in pattern. don't mark"
225 " relevant/live.\n");
226 stmt_vec_info old_stmt_info
= stmt_info
;
227 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
228 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
229 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
230 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
233 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
234 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
235 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
237 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
238 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
240 if (dump_enabled_p ())
241 dump_printf_loc (MSG_NOTE
, vect_location
,
242 "already marked relevant/live.\n");
246 worklist
->safe_push (stmt_info
);
250 /* Function is_simple_and_all_uses_invariant
252 Return true if STMT_INFO is simple and all uses of it are invariant. */
255 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
256 loop_vec_info loop_vinfo
)
261 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
265 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
267 enum vect_def_type dt
= vect_uninitialized_def
;
269 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
271 if (dump_enabled_p ())
272 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
273 "use not simple.\n");
277 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
283 /* Function vect_stmt_relevant_p.
285 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
286 is "relevant for vectorization".
288 A stmt is considered "relevant for vectorization" if:
289 - it has uses outside the loop.
290 - it has vdefs (it alters memory).
291 - control stmts in the loop (except for the exit condition).
293 CHECKME: what other side effects would the vectorizer allow? */
296 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
297 enum vect_relevant
*relevant
, bool *live_p
)
299 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
301 imm_use_iterator imm_iter
;
305 *relevant
= vect_unused_in_scope
;
308 /* cond stmt other than loop exit cond. */
309 if (is_ctrl_stmt (stmt_info
->stmt
)
310 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
311 *relevant
= vect_used_in_scope
;
313 /* changing memory. */
314 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
315 if (gimple_vdef (stmt_info
->stmt
)
316 && !gimple_clobber_p (stmt_info
->stmt
))
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE
, vect_location
,
320 "vec_stmt_relevant_p: stmt has vdefs.\n");
321 *relevant
= vect_used_in_scope
;
324 /* uses outside the loop. */
325 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
327 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
329 basic_block bb
= gimple_bb (USE_STMT (use_p
));
330 if (!flow_bb_inside_loop_p (loop
, bb
))
332 if (dump_enabled_p ())
333 dump_printf_loc (MSG_NOTE
, vect_location
,
334 "vec_stmt_relevant_p: used out of loop.\n");
336 if (is_gimple_debug (USE_STMT (use_p
)))
339 /* We expect all such uses to be in the loop exit phis
340 (because of loop closed form) */
341 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
342 gcc_assert (bb
== single_exit (loop
)->dest
);
349 if (*live_p
&& *relevant
== vect_unused_in_scope
350 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
352 if (dump_enabled_p ())
353 dump_printf_loc (MSG_NOTE
, vect_location
,
354 "vec_stmt_relevant_p: stmt live but not relevant.\n");
355 *relevant
= vect_used_only_live
;
358 return (*live_p
|| *relevant
);
362 /* Function exist_non_indexing_operands_for_use_p
364 USE is one of the uses attached to STMT_INFO. Check if USE is
365 used in STMT_INFO for anything other than indexing an array. */
368 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
372 /* USE corresponds to some operand in STMT. If there is no data
373 reference in STMT, then any operand that corresponds to USE
374 is not indexing an array. */
375 if (!STMT_VINFO_DATA_REF (stmt_info
))
378 /* STMT has a data_ref. FORNOW this means that its of one of
382 (This should have been verified in analyze_data_refs).
384 'var' in the second case corresponds to a def, not a use,
385 so USE cannot correspond to any operands that are not used
388 Therefore, all we need to check is if STMT falls into the
389 first case, and whether var corresponds to USE. */
391 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
392 if (!assign
|| !gimple_assign_copy_p (assign
))
394 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
395 if (call
&& gimple_call_internal_p (call
))
397 internal_fn ifn
= gimple_call_internal_fn (call
);
398 int mask_index
= internal_fn_mask_index (ifn
);
400 && use
== gimple_call_arg (call
, mask_index
))
402 int stored_value_index
= internal_fn_stored_value_index (ifn
);
403 if (stored_value_index
>= 0
404 && use
== gimple_call_arg (call
, stored_value_index
))
406 if (internal_gather_scatter_fn_p (ifn
)
407 && use
== gimple_call_arg (call
, 1))
413 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
415 operand
= gimple_assign_rhs1 (assign
);
416 if (TREE_CODE (operand
) != SSA_NAME
)
427 Function process_use.
430 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
431 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
432 that defined USE. This is done by calling mark_relevant and passing it
433 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
434 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 Generally, LIVE_P and RELEVANT are used to define the liveness and
439 relevance info of the DEF_STMT of this USE:
440 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
441 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
443 - case 1: If USE is used only for address computations (e.g. array indexing),
444 which does not need to be directly vectorized, then the liveness/relevance
445 of the respective DEF_STMT is left unchanged.
446 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
447 we skip DEF_STMT cause it had already been processed.
448 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
449 "relevant" will be modified accordingly.
451 Return true if everything is as expected. Return false otherwise. */
454 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
455 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
458 stmt_vec_info dstmt_vinfo
;
459 basic_block bb
, def_bb
;
460 enum vect_def_type dt
;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
465 return opt_result::success ();
467 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
468 return opt_result::failure_at (stmt_vinfo
->stmt
,
470 " unsupported use in stmt.\n");
473 return opt_result::success ();
475 def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
477 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
478 DSTMT_VINFO must have already been processed, because this should be the
479 only way that STMT, which is a reduction-phi, was put in the worklist,
480 as there should be no other uses for DSTMT_VINFO in the loop. So we just
481 check that everything is as expected, and we are done. */
482 bb
= gimple_bb (stmt_vinfo
->stmt
);
483 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
485 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
487 && bb
->loop_father
== def_bb
->loop_father
)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
493 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
494 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
495 return opt_result::success ();
498 /* case 3a: outer-loop stmt defining an inner-loop stmt:
499 outer-loop-header-bb:
505 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_NOTE
, vect_location
,
509 "outer-loop def-stmt defining inner-loop stmt.\n");
513 case vect_unused_in_scope
:
514 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
515 vect_used_in_scope
: vect_unused_in_scope
;
518 case vect_used_in_outer_by_reduction
:
519 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
520 relevant
= vect_used_by_reduction
;
523 case vect_used_in_outer
:
524 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
525 relevant
= vect_used_in_scope
;
528 case vect_used_in_scope
:
536 /* case 3b: inner-loop stmt defining an outer-loop stmt:
537 outer-loop-header-bb:
541 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
543 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
545 if (dump_enabled_p ())
546 dump_printf_loc (MSG_NOTE
, vect_location
,
547 "inner-loop def-stmt defining outer-loop stmt.\n");
551 case vect_unused_in_scope
:
552 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
553 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
554 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
557 case vect_used_by_reduction
:
558 case vect_used_only_live
:
559 relevant
= vect_used_in_outer_by_reduction
;
562 case vect_used_in_scope
:
563 relevant
= vect_used_in_outer
;
570 /* We are also not interested in uses on loop PHI backedges that are
571 inductions. Otherwise we'll needlessly vectorize the IV increment
572 and cause hybrid SLP for SLP inductions. Unless the PHI is live
574 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
575 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
576 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
577 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
578 loop_latch_edge (bb
->loop_father
))
581 if (dump_enabled_p ())
582 dump_printf_loc (MSG_NOTE
, vect_location
,
583 "induction value on backedge.\n");
584 return opt_result::success ();
588 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
589 return opt_result::success ();
593 /* Function vect_mark_stmts_to_be_vectorized.
595 Not all stmts in the loop need to be vectorized. For example:
604 Stmt 1 and 3 do not need to be vectorized, because loop control and
605 addressing of vectorized data-refs are handled differently.
607 This pass detects such stmts. */
610 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
612 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
613 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
614 unsigned int nbbs
= loop
->num_nodes
;
615 gimple_stmt_iterator si
;
619 enum vect_relevant relevant
;
621 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
623 auto_vec
<stmt_vec_info
, 64> worklist
;
625 /* 1. Init worklist. */
626 for (i
= 0; i
< nbbs
; i
++)
629 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
631 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
632 if (dump_enabled_p ())
633 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
636 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
637 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
639 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
641 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE
, vect_location
,
644 "init: stmt relevant? %G", stmt_info
->stmt
);
646 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
647 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
651 /* 2. Process_worklist */
652 while (worklist
.length () > 0)
657 stmt_vec_info stmt_vinfo
= worklist
.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE
, vect_location
,
660 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
665 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
680 case vect_reduction_def
:
681 gcc_assert (relevant
!= vect_unused_in_scope
);
682 if (relevant
!= vect_unused_in_scope
683 && relevant
!= vect_used_in_scope
684 && relevant
!= vect_used_by_reduction
685 && relevant
!= vect_used_only_live
)
686 return opt_result::failure_at
687 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
690 case vect_nested_cycle
:
691 if (relevant
!= vect_unused_in_scope
692 && relevant
!= vect_used_in_outer_by_reduction
693 && relevant
!= vect_used_in_outer
)
694 return opt_result::failure_at
695 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
698 case vect_double_reduction_def
:
699 if (relevant
!= vect_unused_in_scope
700 && relevant
!= vect_used_by_reduction
701 && relevant
!= vect_used_only_live
)
702 return opt_result::failure_at
703 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
710 if (is_pattern_stmt_p (stmt_vinfo
))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
717 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
718 tree op
= gimple_assign_rhs1 (assign
);
721 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
724 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
725 loop_vinfo
, relevant
, &worklist
, false);
728 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
729 loop_vinfo
, relevant
, &worklist
, false);
734 for (; i
< gimple_num_ops (assign
); i
++)
736 op
= gimple_op (assign
, i
);
737 if (TREE_CODE (op
) == SSA_NAME
)
740 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
747 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
749 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
751 tree arg
= gimple_call_arg (call
, i
);
753 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
761 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
763 tree op
= USE_FROM_PTR (use_p
);
765 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
773 gather_scatter_info gs_info
;
774 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
777 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
782 } /* while worklist */
784 return opt_result::success ();
787 /* Compute the prologue cost for invariant or constant operands. */
790 vect_prologue_cost_for_slp_op (slp_tree node
, stmt_vec_info stmt_info
,
791 unsigned opno
, enum vect_def_type dt
,
792 stmt_vector_for_cost
*cost_vec
)
794 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
795 tree op
= gimple_op (stmt
, opno
);
796 unsigned prologue_cost
= 0;
798 /* Without looking at the actual initializer a vector of
799 constants can be implemented as load from the constant pool.
800 When all elements are the same we can use a splat. */
801 tree vectype
= get_vectype_for_scalar_type (TREE_TYPE (op
));
802 unsigned group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
803 unsigned num_vects_to_check
;
804 unsigned HOST_WIDE_INT const_nunits
;
806 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
807 && ! multiple_p (const_nunits
, group_size
))
809 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
810 nelt_limit
= const_nunits
;
814 /* If either the vector has variable length or the vectors
815 are composed of repeated whole groups we only need to
816 cost construction once. All vectors will be the same. */
817 num_vects_to_check
= 1;
818 nelt_limit
= group_size
;
820 tree elt
= NULL_TREE
;
822 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
824 unsigned si
= j
% group_size
;
826 elt
= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
, opno
);
827 /* ??? We're just tracking whether all operands of a single
828 vector initializer are the same, ideally we'd check if
829 we emitted the same one already. */
830 else if (elt
!= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
,
834 if (nelt
== nelt_limit
)
836 /* ??? We need to pass down stmt_info for a vector type
837 even if it points to the wrong stmt. */
838 prologue_cost
+= record_stmt_cost
840 dt
== vect_external_def
841 ? (elt
? scalar_to_vec
: vec_construct
)
843 stmt_info
, 0, vect_prologue
);
848 return prologue_cost
;
851 /* Function vect_model_simple_cost.
853 Models cost for simple operations, i.e. those that only emit ncopies of a
854 single op. Right now, this does not account for multiple insns that could
855 be generated for the single vector op. We will handle that shortly. */
858 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
859 enum vect_def_type
*dt
,
862 stmt_vector_for_cost
*cost_vec
)
864 int inside_cost
= 0, prologue_cost
= 0;
866 gcc_assert (cost_vec
!= NULL
);
868 /* ??? Somehow we need to fix this at the callers. */
870 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
874 /* Scan operands and account for prologue cost of constants/externals.
875 ??? This over-estimates cost for multiple uses and should be
877 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
878 tree lhs
= gimple_get_lhs (stmt
);
879 for (unsigned i
= 0; i
< gimple_num_ops (stmt
); ++i
)
881 tree op
= gimple_op (stmt
, i
);
882 enum vect_def_type dt
;
883 if (!op
|| op
== lhs
)
885 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &dt
)
886 && (dt
== vect_constant_def
|| dt
== vect_external_def
))
887 prologue_cost
+= vect_prologue_cost_for_slp_op (node
, stmt_info
,
892 /* Cost the "broadcast" of a scalar operand in to a vector operand.
893 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
895 for (int i
= 0; i
< ndts
; i
++)
896 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
897 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
898 stmt_info
, 0, vect_prologue
);
900 /* Adjust for two-operator SLP nodes. */
901 if (node
&& SLP_TREE_TWO_OPERATORS (node
))
904 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_perm
,
905 stmt_info
, 0, vect_body
);
908 /* Pass the inside-of-loop statements to the target-specific cost model. */
909 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vector_stmt
,
910 stmt_info
, 0, vect_body
);
912 if (dump_enabled_p ())
913 dump_printf_loc (MSG_NOTE
, vect_location
,
914 "vect_model_simple_cost: inside_cost = %d, "
915 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
919 /* Model cost for type demotion and promotion operations. PWR is normally
920 zero for single-step promotions and demotions. It will be one if
921 two-step promotion/demotion is required, and so on. Each additional
922 step doubles the number of instructions required. */
925 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
926 enum vect_def_type
*dt
, int pwr
,
927 stmt_vector_for_cost
*cost_vec
)
930 int inside_cost
= 0, prologue_cost
= 0;
932 for (i
= 0; i
< pwr
+ 1; i
++)
934 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
936 inside_cost
+= record_stmt_cost (cost_vec
, vect_pow2 (tmp
),
937 vec_promote_demote
, stmt_info
, 0,
941 /* FORNOW: Assuming maximum 2 args per stmts. */
942 for (i
= 0; i
< 2; i
++)
943 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
944 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
945 stmt_info
, 0, vect_prologue
);
947 if (dump_enabled_p ())
948 dump_printf_loc (MSG_NOTE
, vect_location
,
949 "vect_model_promotion_demotion_cost: inside_cost = %d, "
950 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
953 /* Returns true if the current function returns DECL. */
956 cfun_returns (tree decl
)
960 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
962 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
965 if (gimple_return_retval (ret
) == decl
)
967 /* We often end up with an aggregate copy to the result decl,
968 handle that case as well. First skip intermediate clobbers
973 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
975 while (gimple_clobber_p (def
));
976 if (is_a
<gassign
*> (def
)
977 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
978 && gimple_assign_rhs1 (def
) == decl
)
984 /* Function vect_model_store_cost
986 Models cost for stores. In the case of grouped accesses, one access
987 has the overhead of the grouped access attributed to it. */
990 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
991 enum vect_def_type dt
,
992 vect_memory_access_type memory_access_type
,
993 vec_load_store_type vls_type
, slp_tree slp_node
,
994 stmt_vector_for_cost
*cost_vec
)
996 unsigned int inside_cost
= 0, prologue_cost
= 0;
997 stmt_vec_info first_stmt_info
= stmt_info
;
998 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1000 /* ??? Somehow we need to fix this at the callers. */
1002 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1004 if (vls_type
== VLS_STORE_INVARIANT
)
1007 prologue_cost
+= vect_prologue_cost_for_slp_op (slp_node
, stmt_info
,
1010 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
1011 stmt_info
, 0, vect_prologue
);
1014 /* Grouped stores update all elements in the group at once,
1015 so we want the DR for the first statement. */
1016 if (!slp_node
&& grouped_access_p
)
1017 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1019 /* True if we should include any once-per-group costs as well as
1020 the cost of the statement itself. For SLP we only get called
1021 once per group anyhow. */
1022 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1024 /* We assume that the cost of a single store-lanes instruction is
1025 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1026 access is instead being provided by a permute-and-store operation,
1027 include the cost of the permutes. */
1029 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1031 /* Uses a high and low interleave or shuffle operations for each
1033 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1034 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1035 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1036 stmt_info
, 0, vect_body
);
1038 if (dump_enabled_p ())
1039 dump_printf_loc (MSG_NOTE
, vect_location
,
1040 "vect_model_store_cost: strided group_size = %d .\n",
1044 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1045 /* Costs of the stores. */
1046 if (memory_access_type
== VMAT_ELEMENTWISE
1047 || memory_access_type
== VMAT_GATHER_SCATTER
)
1049 /* N scalar stores plus extracting the elements. */
1050 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1051 inside_cost
+= record_stmt_cost (cost_vec
,
1052 ncopies
* assumed_nunits
,
1053 scalar_store
, stmt_info
, 0, vect_body
);
1056 vect_get_store_cost (stmt_info
, ncopies
, &inside_cost
, cost_vec
);
1058 if (memory_access_type
== VMAT_ELEMENTWISE
1059 || memory_access_type
== VMAT_STRIDED_SLP
)
1061 /* N scalar stores plus extracting the elements. */
1062 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1063 inside_cost
+= record_stmt_cost (cost_vec
,
1064 ncopies
* assumed_nunits
,
1065 vec_to_scalar
, stmt_info
, 0, vect_body
);
1068 /* When vectorizing a store into the function result assign
1069 a penalty if the function returns in a multi-register location.
1070 In this case we assume we'll end up with having to spill the
1071 vector result and do piecewise loads as a conservative estimate. */
1072 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
1074 && (TREE_CODE (base
) == RESULT_DECL
1075 || (DECL_P (base
) && cfun_returns (base
)))
1076 && !aggregate_value_p (base
, cfun
->decl
))
1078 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
1079 /* ??? Handle PARALLEL in some way. */
1082 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1083 /* Assume that a single reg-reg move is possible and cheap,
1084 do not account for vector to gp register move cost. */
1088 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1090 stmt_info
, 0, vect_epilogue
);
1092 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1094 stmt_info
, 0, vect_epilogue
);
1099 if (dump_enabled_p ())
1100 dump_printf_loc (MSG_NOTE
, vect_location
,
1101 "vect_model_store_cost: inside_cost = %d, "
1102 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1106 /* Calculate cost of DR's memory access. */
1108 vect_get_store_cost (stmt_vec_info stmt_info
, int ncopies
,
1109 unsigned int *inside_cost
,
1110 stmt_vector_for_cost
*body_cost_vec
)
1112 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1113 int alignment_support_scheme
1114 = vect_supportable_dr_alignment (dr_info
, false);
1116 switch (alignment_support_scheme
)
1120 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1121 vector_store
, stmt_info
, 0,
1124 if (dump_enabled_p ())
1125 dump_printf_loc (MSG_NOTE
, vect_location
,
1126 "vect_model_store_cost: aligned.\n");
1130 case dr_unaligned_supported
:
1132 /* Here, we assign an additional cost for the unaligned store. */
1133 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1134 unaligned_store
, stmt_info
,
1135 DR_MISALIGNMENT (dr_info
),
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE
, vect_location
,
1139 "vect_model_store_cost: unaligned supported by "
1144 case dr_unaligned_unsupported
:
1146 *inside_cost
= VECT_MAX_COST
;
1148 if (dump_enabled_p ())
1149 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1150 "vect_model_store_cost: unsupported access.\n");
1160 /* Function vect_model_load_cost
1162 Models cost for loads. In the case of grouped accesses, one access has
1163 the overhead of the grouped access attributed to it. Since unaligned
1164 accesses are supported for loads, we also account for the costs of the
1165 access scheme chosen. */
1168 vect_model_load_cost (stmt_vec_info stmt_info
, unsigned ncopies
,
1169 vect_memory_access_type memory_access_type
,
1170 slp_instance instance
,
1172 stmt_vector_for_cost
*cost_vec
)
1174 unsigned int inside_cost
= 0, prologue_cost
= 0;
1175 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1177 gcc_assert (cost_vec
);
1179 /* ??? Somehow we need to fix this at the callers. */
1181 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1183 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1185 /* If the load is permuted then the alignment is determined by
1186 the first group element not by the first scalar stmt DR. */
1187 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1188 /* Record the cost for the permutation. */
1190 unsigned assumed_nunits
1191 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info
));
1192 unsigned slp_vf
= (ncopies
* assumed_nunits
) / instance
->group_size
;
1193 vect_transform_slp_perm_load (slp_node
, vNULL
, NULL
,
1194 slp_vf
, instance
, true,
1196 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1197 first_stmt_info
, 0, vect_body
);
1198 /* And adjust the number of loads performed. This handles
1199 redundancies as well as loads that are later dead. */
1200 auto_sbitmap
perm (DR_GROUP_SIZE (first_stmt_info
));
1201 bitmap_clear (perm
);
1202 for (unsigned i
= 0;
1203 i
< SLP_TREE_LOAD_PERMUTATION (slp_node
).length (); ++i
)
1204 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (slp_node
)[i
]);
1206 bool load_seen
= false;
1207 for (unsigned i
= 0; i
< DR_GROUP_SIZE (first_stmt_info
); ++i
)
1209 if (i
% assumed_nunits
== 0)
1215 if (bitmap_bit_p (perm
, i
))
1221 <= (DR_GROUP_SIZE (first_stmt_info
)
1222 - DR_GROUP_GAP (first_stmt_info
)
1223 + assumed_nunits
- 1) / assumed_nunits
);
1226 /* Grouped loads read all elements in the group at once,
1227 so we want the DR for the first statement. */
1228 stmt_vec_info first_stmt_info
= stmt_info
;
1229 if (!slp_node
&& grouped_access_p
)
1230 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1232 /* True if we should include any once-per-group costs as well as
1233 the cost of the statement itself. For SLP we only get called
1234 once per group anyhow. */
1235 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1237 /* We assume that the cost of a single load-lanes instruction is
1238 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1239 access is instead being provided by a load-and-permute operation,
1240 include the cost of the permutes. */
1242 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1244 /* Uses an even and odd extract operations or shuffle operations
1245 for each needed permute. */
1246 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1247 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1248 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1249 stmt_info
, 0, vect_body
);
1251 if (dump_enabled_p ())
1252 dump_printf_loc (MSG_NOTE
, vect_location
,
1253 "vect_model_load_cost: strided group_size = %d .\n",
1257 /* The loads themselves. */
1258 if (memory_access_type
== VMAT_ELEMENTWISE
1259 || memory_access_type
== VMAT_GATHER_SCATTER
)
1261 /* N scalar loads plus gathering them into a vector. */
1262 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1263 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1264 inside_cost
+= record_stmt_cost (cost_vec
,
1265 ncopies
* assumed_nunits
,
1266 scalar_load
, stmt_info
, 0, vect_body
);
1269 vect_get_load_cost (stmt_info
, ncopies
, first_stmt_p
,
1270 &inside_cost
, &prologue_cost
,
1271 cost_vec
, cost_vec
, true);
1272 if (memory_access_type
== VMAT_ELEMENTWISE
1273 || memory_access_type
== VMAT_STRIDED_SLP
)
1274 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1275 stmt_info
, 0, vect_body
);
1277 if (dump_enabled_p ())
1278 dump_printf_loc (MSG_NOTE
, vect_location
,
1279 "vect_model_load_cost: inside_cost = %d, "
1280 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1284 /* Calculate cost of DR's memory access. */
1286 vect_get_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1287 bool add_realign_cost
, unsigned int *inside_cost
,
1288 unsigned int *prologue_cost
,
1289 stmt_vector_for_cost
*prologue_cost_vec
,
1290 stmt_vector_for_cost
*body_cost_vec
,
1291 bool record_prologue_costs
)
1293 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1294 int alignment_support_scheme
1295 = vect_supportable_dr_alignment (dr_info
, false);
1297 switch (alignment_support_scheme
)
1301 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1302 stmt_info
, 0, vect_body
);
1304 if (dump_enabled_p ())
1305 dump_printf_loc (MSG_NOTE
, vect_location
,
1306 "vect_model_load_cost: aligned.\n");
1310 case dr_unaligned_supported
:
1312 /* Here, we assign an additional cost for the unaligned load. */
1313 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1314 unaligned_load
, stmt_info
,
1315 DR_MISALIGNMENT (dr_info
),
1318 if (dump_enabled_p ())
1319 dump_printf_loc (MSG_NOTE
, vect_location
,
1320 "vect_model_load_cost: unaligned supported by "
1325 case dr_explicit_realign
:
1327 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1328 vector_load
, stmt_info
, 0, vect_body
);
1329 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1330 vec_perm
, stmt_info
, 0, vect_body
);
1332 /* FIXME: If the misalignment remains fixed across the iterations of
1333 the containing loop, the following cost should be added to the
1335 if (targetm
.vectorize
.builtin_mask_for_load
)
1336 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1337 stmt_info
, 0, vect_body
);
1339 if (dump_enabled_p ())
1340 dump_printf_loc (MSG_NOTE
, vect_location
,
1341 "vect_model_load_cost: explicit realign\n");
1345 case dr_explicit_realign_optimized
:
1347 if (dump_enabled_p ())
1348 dump_printf_loc (MSG_NOTE
, vect_location
,
1349 "vect_model_load_cost: unaligned software "
1352 /* Unaligned software pipeline has a load of an address, an initial
1353 load, and possibly a mask operation to "prime" the loop. However,
1354 if this is an access in a group of loads, which provide grouped
1355 access, then the above cost should only be considered for one
1356 access in the group. Inside the loop, there is a load op
1357 and a realignment op. */
1359 if (add_realign_cost
&& record_prologue_costs
)
1361 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1362 vector_stmt
, stmt_info
,
1364 if (targetm
.vectorize
.builtin_mask_for_load
)
1365 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1366 vector_stmt
, stmt_info
,
1370 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1371 stmt_info
, 0, vect_body
);
1372 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1373 stmt_info
, 0, vect_body
);
1375 if (dump_enabled_p ())
1376 dump_printf_loc (MSG_NOTE
, vect_location
,
1377 "vect_model_load_cost: explicit realign optimized"
1383 case dr_unaligned_unsupported
:
1385 *inside_cost
= VECT_MAX_COST
;
1387 if (dump_enabled_p ())
1388 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1389 "vect_model_load_cost: unsupported access.\n");
1398 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1399 the loop preheader for the vectorized stmt STMT_VINFO. */
1402 vect_init_vector_1 (stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1403 gimple_stmt_iterator
*gsi
)
1406 vect_finish_stmt_generation (stmt_vinfo
, new_stmt
, gsi
);
1409 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1413 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1417 if (nested_in_vect_loop_p (loop
, stmt_vinfo
))
1420 pe
= loop_preheader_edge (loop
);
1421 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1422 gcc_assert (!new_bb
);
1426 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1428 gimple_stmt_iterator gsi_bb_start
;
1430 gcc_assert (bb_vinfo
);
1431 bb
= BB_VINFO_BB (bb_vinfo
);
1432 gsi_bb_start
= gsi_after_labels (bb
);
1433 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1437 if (dump_enabled_p ())
1438 dump_printf_loc (MSG_NOTE
, vect_location
,
1439 "created new init_stmt: %G", new_stmt
);
1442 /* Function vect_init_vector.
1444 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1445 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1446 vector type a vector with all elements equal to VAL is created first.
1447 Place the initialization at BSI if it is not NULL. Otherwise, place the
1448 initialization at the loop preheader.
1449 Return the DEF of INIT_STMT.
1450 It will be used in the vectorization of STMT_INFO. */
1453 vect_init_vector (stmt_vec_info stmt_info
, tree val
, tree type
,
1454 gimple_stmt_iterator
*gsi
)
1459 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1460 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1462 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1463 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1465 /* Scalar boolean value should be transformed into
1466 all zeros or all ones value before building a vector. */
1467 if (VECTOR_BOOLEAN_TYPE_P (type
))
1469 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1470 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1472 if (CONSTANT_CLASS_P (val
))
1473 val
= integer_zerop (val
) ? false_val
: true_val
;
1476 new_temp
= make_ssa_name (TREE_TYPE (type
));
1477 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1478 val
, true_val
, false_val
);
1479 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1483 else if (CONSTANT_CLASS_P (val
))
1484 val
= fold_convert (TREE_TYPE (type
), val
);
1487 new_temp
= make_ssa_name (TREE_TYPE (type
));
1488 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1489 init_stmt
= gimple_build_assign (new_temp
,
1490 fold_build1 (VIEW_CONVERT_EXPR
,
1494 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1495 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1499 val
= build_vector_from_val (type
, val
);
1502 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1503 init_stmt
= gimple_build_assign (new_temp
, val
);
1504 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1508 /* Function vect_get_vec_def_for_operand_1.
1510 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1511 with type DT that will be used in the vectorized stmt. */
1514 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info
,
1515 enum vect_def_type dt
)
1518 stmt_vec_info vec_stmt_info
;
1522 /* operand is a constant or a loop invariant. */
1523 case vect_constant_def
:
1524 case vect_external_def
:
1525 /* Code should use vect_get_vec_def_for_operand. */
1528 /* Operand is defined by a loop header phi. In case of nested
1529 cycles we also may have uses of the backedge def. */
1530 case vect_reduction_def
:
1531 case vect_double_reduction_def
:
1532 case vect_nested_cycle
:
1533 case vect_induction_def
:
1534 gcc_assert (gimple_code (def_stmt_info
->stmt
) == GIMPLE_PHI
1535 || dt
== vect_nested_cycle
);
1538 /* operand is defined inside the loop. */
1539 case vect_internal_def
:
1541 /* Get the def from the vectorized stmt. */
1542 vec_stmt_info
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1543 /* Get vectorized pattern statement. */
1545 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1546 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1547 vec_stmt_info
= (STMT_VINFO_VEC_STMT
1548 (STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1549 gcc_assert (vec_stmt_info
);
1550 if (gphi
*phi
= dyn_cast
<gphi
*> (vec_stmt_info
->stmt
))
1551 vec_oprnd
= PHI_RESULT (phi
);
1553 vec_oprnd
= gimple_get_lhs (vec_stmt_info
->stmt
);
1563 /* Function vect_get_vec_def_for_operand.
1565 OP is an operand in STMT_VINFO. This function returns a (vector) def
1566 that will be used in the vectorized stmt for STMT_VINFO.
1568 In the case that OP is an SSA_NAME which is defined in the loop, then
1569 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1571 In case OP is an invariant or constant, a new stmt that creates a vector def
1572 needs to be introduced. VECTYPE may be used to specify a required type for
1573 vector invariant. */
1576 vect_get_vec_def_for_operand (tree op
, stmt_vec_info stmt_vinfo
, tree vectype
)
1579 enum vect_def_type dt
;
1581 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1583 if (dump_enabled_p ())
1584 dump_printf_loc (MSG_NOTE
, vect_location
,
1585 "vect_get_vec_def_for_operand: %T\n", op
);
1587 stmt_vec_info def_stmt_info
;
1588 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1589 &def_stmt_info
, &def_stmt
);
1590 gcc_assert (is_simple_use
);
1591 if (def_stmt
&& dump_enabled_p ())
1592 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1594 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1596 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1600 vector_type
= vectype
;
1601 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1602 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1603 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1605 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1607 gcc_assert (vector_type
);
1608 return vect_init_vector (stmt_vinfo
, op
, vector_type
, NULL
);
1611 return vect_get_vec_def_for_operand_1 (def_stmt_info
, dt
);
1615 /* Function vect_get_vec_def_for_stmt_copy
1617 Return a vector-def for an operand. This function is used when the
1618 vectorized stmt to be created (by the caller to this function) is a "copy"
1619 created in case the vectorized result cannot fit in one vector, and several
1620 copies of the vector-stmt are required. In this case the vector-def is
1621 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1622 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1625 In case the vectorization factor (VF) is bigger than the number
1626 of elements that can fit in a vectype (nunits), we have to generate
1627 more than one vector stmt to vectorize the scalar stmt. This situation
1628 arises when there are multiple data-types operated upon in the loop; the
1629 smallest data-type determines the VF, and as a result, when vectorizing
1630 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1631 vector stmt (each computing a vector of 'nunits' results, and together
1632 computing 'VF' results in each iteration). This function is called when
1633 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1634 which VF=16 and nunits=4, so the number of copies required is 4):
1636 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1638 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1639 VS1.1: vx.1 = memref1 VS1.2
1640 VS1.2: vx.2 = memref2 VS1.3
1641 VS1.3: vx.3 = memref3
1643 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1644 VSnew.1: vz1 = vx.1 + ... VSnew.2
1645 VSnew.2: vz2 = vx.2 + ... VSnew.3
1646 VSnew.3: vz3 = vx.3 + ...
1648 The vectorization of S1 is explained in vectorizable_load.
1649 The vectorization of S2:
1650 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1651 the function 'vect_get_vec_def_for_operand' is called to
1652 get the relevant vector-def for each operand of S2. For operand x it
1653 returns the vector-def 'vx.0'.
1655 To create the remaining copies of the vector-stmt (VSnew.j), this
1656 function is called to get the relevant vector-def for each operand. It is
1657 obtained from the respective VS1.j stmt, which is recorded in the
1658 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1660 For example, to obtain the vector-def 'vx.1' in order to create the
1661 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1662 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1663 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1664 and return its def ('vx.1').
1665 Overall, to create the above sequence this function will be called 3 times:
1666 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1667 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1668 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1671 vect_get_vec_def_for_stmt_copy (vec_info
*vinfo
, tree vec_oprnd
)
1673 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (vec_oprnd
);
1675 /* Do nothing; can reuse same def. */
1678 def_stmt_info
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1679 gcc_assert (def_stmt_info
);
1680 if (gphi
*phi
= dyn_cast
<gphi
*> (def_stmt_info
->stmt
))
1681 vec_oprnd
= PHI_RESULT (phi
);
1683 vec_oprnd
= gimple_get_lhs (def_stmt_info
->stmt
);
1688 /* Get vectorized definitions for the operands to create a copy of an original
1689 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1692 vect_get_vec_defs_for_stmt_copy (vec_info
*vinfo
,
1693 vec
<tree
> *vec_oprnds0
,
1694 vec
<tree
> *vec_oprnds1
)
1696 tree vec_oprnd
= vec_oprnds0
->pop ();
1698 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1699 vec_oprnds0
->quick_push (vec_oprnd
);
1701 if (vec_oprnds1
&& vec_oprnds1
->length ())
1703 vec_oprnd
= vec_oprnds1
->pop ();
1704 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1705 vec_oprnds1
->quick_push (vec_oprnd
);
1710 /* Get vectorized definitions for OP0 and OP1. */
1713 vect_get_vec_defs (tree op0
, tree op1
, stmt_vec_info stmt_info
,
1714 vec
<tree
> *vec_oprnds0
,
1715 vec
<tree
> *vec_oprnds1
,
1720 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1721 auto_vec
<tree
> ops (nops
);
1722 auto_vec
<vec
<tree
> > vec_defs (nops
);
1724 ops
.quick_push (op0
);
1726 ops
.quick_push (op1
);
1728 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
1730 *vec_oprnds0
= vec_defs
[0];
1732 *vec_oprnds1
= vec_defs
[1];
1738 vec_oprnds0
->create (1);
1739 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt_info
);
1740 vec_oprnds0
->quick_push (vec_oprnd
);
1744 vec_oprnds1
->create (1);
1745 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt_info
);
1746 vec_oprnds1
->quick_push (vec_oprnd
);
1751 /* Helper function called by vect_finish_replace_stmt and
1752 vect_finish_stmt_generation. Set the location of the new
1753 statement and create and return a stmt_vec_info for it. */
1755 static stmt_vec_info
1756 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1758 vec_info
*vinfo
= stmt_info
->vinfo
;
1760 stmt_vec_info vec_stmt_info
= vinfo
->add_stmt (vec_stmt
);
1762 if (dump_enabled_p ())
1763 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1765 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1767 /* While EH edges will generally prevent vectorization, stmt might
1768 e.g. be in a must-not-throw region. Ensure newly created stmts
1769 that could throw are part of the same region. */
1770 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1771 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1772 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1774 return vec_stmt_info
;
1777 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1778 which sets the same scalar result as STMT_INFO did. Create and return a
1779 stmt_vec_info for VEC_STMT. */
1782 vect_finish_replace_stmt (stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1784 gcc_assert (gimple_get_lhs (stmt_info
->stmt
) == gimple_get_lhs (vec_stmt
));
1786 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt_info
->stmt
);
1787 gsi_replace (&gsi
, vec_stmt
, true);
1789 return vect_finish_stmt_generation_1 (stmt_info
, vec_stmt
);
1792 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1793 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1796 vect_finish_stmt_generation (stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1797 gimple_stmt_iterator
*gsi
)
1799 gcc_assert (gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1801 if (!gsi_end_p (*gsi
)
1802 && gimple_has_mem_ops (vec_stmt
))
1804 gimple
*at_stmt
= gsi_stmt (*gsi
);
1805 tree vuse
= gimple_vuse (at_stmt
);
1806 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1808 tree vdef
= gimple_vdef (at_stmt
);
1809 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1810 /* If we have an SSA vuse and insert a store, update virtual
1811 SSA form to avoid triggering the renamer. Do so only
1812 if we can easily see all uses - which is what almost always
1813 happens with the way vectorized stmts are inserted. */
1814 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1815 && ((is_gimple_assign (vec_stmt
)
1816 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1817 || (is_gimple_call (vec_stmt
)
1818 && !(gimple_call_flags (vec_stmt
)
1819 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1821 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1822 gimple_set_vdef (vec_stmt
, new_vdef
);
1823 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1827 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1828 return vect_finish_stmt_generation_1 (stmt_info
, vec_stmt
);
1831 /* We want to vectorize a call to combined function CFN with function
1832 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1833 as the types of all inputs. Check whether this is possible using
1834 an internal function, returning its code if so or IFN_LAST if not. */
1837 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1838 tree vectype_out
, tree vectype_in
)
1841 if (internal_fn_p (cfn
))
1842 ifn
= as_internal_fn (cfn
);
1844 ifn
= associated_internal_fn (fndecl
);
1845 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1847 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1848 if (info
.vectorizable
)
1850 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1851 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1852 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1853 OPTIMIZE_FOR_SPEED
))
1861 static tree
permute_vec_elements (tree
, tree
, tree
, stmt_vec_info
,
1862 gimple_stmt_iterator
*);
1864 /* Check whether a load or store statement in the loop described by
1865 LOOP_VINFO is possible in a fully-masked loop. This is testing
1866 whether the vectorizer pass has the appropriate support, as well as
1867 whether the target does.
1869 VLS_TYPE says whether the statement is a load or store and VECTYPE
1870 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1871 says how the load or store is going to be implemented and GROUP_SIZE
1872 is the number of load or store statements in the containing group.
1873 If the access is a gather load or scatter store, GS_INFO describes
1876 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1877 supported, otherwise record the required mask types. */
1880 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1881 vec_load_store_type vls_type
, int group_size
,
1882 vect_memory_access_type memory_access_type
,
1883 gather_scatter_info
*gs_info
)
1885 /* Invariant loads need no special support. */
1886 if (memory_access_type
== VMAT_INVARIANT
)
1889 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1890 machine_mode vecmode
= TYPE_MODE (vectype
);
1891 bool is_load
= (vls_type
== VLS_LOAD
);
1892 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1895 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1896 : !vect_store_lanes_supported (vectype
, group_size
, true))
1898 if (dump_enabled_p ())
1899 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1900 "can't use a fully-masked loop because the"
1901 " target doesn't have an appropriate masked"
1902 " load/store-lanes instruction.\n");
1903 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1906 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1907 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1911 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1913 internal_fn ifn
= (is_load
1914 ? IFN_MASK_GATHER_LOAD
1915 : IFN_MASK_SCATTER_STORE
);
1916 tree offset_type
= TREE_TYPE (gs_info
->offset
);
1917 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1918 gs_info
->memory_type
,
1919 TYPE_SIGN (offset_type
),
1922 if (dump_enabled_p ())
1923 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1924 "can't use a fully-masked loop because the"
1925 " target doesn't have an appropriate masked"
1926 " gather load or scatter store instruction.\n");
1927 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1930 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1931 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1935 if (memory_access_type
!= VMAT_CONTIGUOUS
1936 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1938 /* Element X of the data must come from iteration i * VF + X of the
1939 scalar loop. We need more work to support other mappings. */
1940 if (dump_enabled_p ())
1941 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1942 "can't use a fully-masked loop because an access"
1943 " isn't contiguous.\n");
1944 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1948 machine_mode mask_mode
;
1949 if (!(targetm
.vectorize
.get_mask_mode
1950 (GET_MODE_NUNITS (vecmode
),
1951 GET_MODE_SIZE (vecmode
)).exists (&mask_mode
))
1952 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1954 if (dump_enabled_p ())
1955 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1956 "can't use a fully-masked loop because the target"
1957 " doesn't have the appropriate masked load or"
1959 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1962 /* We might load more scalars than we need for permuting SLP loads.
1963 We checked in get_group_load_store_type that the extra elements
1964 don't leak into a new vector. */
1965 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1966 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1967 unsigned int nvectors
;
1968 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1969 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
);
1974 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1975 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1976 that needs to be applied to all loads and stores in a vectorized loop.
1977 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1979 MASK_TYPE is the type of both masks. If new statements are needed,
1980 insert them before GSI. */
1983 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1984 gimple_stmt_iterator
*gsi
)
1986 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1990 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1991 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1992 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1993 vec_mask
, loop_mask
);
1994 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1998 /* Determine whether we can use a gather load or scatter store to vectorize
1999 strided load or store STMT_INFO by truncating the current offset to a
2000 smaller width. We need to be able to construct an offset vector:
2002 { 0, X, X*2, X*3, ... }
2004 without loss of precision, where X is STMT_INFO's DR_STEP.
2006 Return true if this is possible, describing the gather load or scatter
2007 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2010 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
2011 loop_vec_info loop_vinfo
, bool masked_p
,
2012 gather_scatter_info
*gs_info
)
2014 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2015 data_reference
*dr
= dr_info
->dr
;
2016 tree step
= DR_STEP (dr
);
2017 if (TREE_CODE (step
) != INTEGER_CST
)
2019 /* ??? Perhaps we could use range information here? */
2020 if (dump_enabled_p ())
2021 dump_printf_loc (MSG_NOTE
, vect_location
,
2022 "cannot truncate variable step.\n");
2026 /* Get the number of bits in an element. */
2027 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2028 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
2029 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
2031 /* Set COUNT to the upper limit on the number of elements - 1.
2032 Start with the maximum vectorization factor. */
2033 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
2035 /* Try lowering COUNT to the number of scalar latch iterations. */
2036 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2037 widest_int max_iters
;
2038 if (max_loop_iterations (loop
, &max_iters
)
2039 && max_iters
< count
)
2040 count
= max_iters
.to_shwi ();
2042 /* Try scales of 1 and the element size. */
2043 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
2044 wi::overflow_type overflow
= wi::OVF_NONE
;
2045 for (int i
= 0; i
< 2; ++i
)
2047 int scale
= scales
[i
];
2049 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
2052 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2053 in OFFSET_BITS bits. */
2054 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
2057 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
2058 if (wi::min_precision (range
, sign
) > element_bits
)
2060 overflow
= wi::OVF_UNKNOWN
;
2064 /* See whether the target supports the operation. */
2065 tree memory_type
= TREE_TYPE (DR_REF (dr
));
2066 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr
), masked_p
, vectype
,
2067 memory_type
, element_bits
, sign
, scale
,
2068 &gs_info
->ifn
, &gs_info
->element_type
))
2071 tree offset_type
= build_nonstandard_integer_type (element_bits
,
2074 gs_info
->decl
= NULL_TREE
;
2075 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2076 but we don't need to store that here. */
2077 gs_info
->base
= NULL_TREE
;
2078 gs_info
->offset
= fold_convert (offset_type
, step
);
2079 gs_info
->offset_dt
= vect_constant_def
;
2080 gs_info
->offset_vectype
= NULL_TREE
;
2081 gs_info
->scale
= scale
;
2082 gs_info
->memory_type
= memory_type
;
2086 if (overflow
&& dump_enabled_p ())
2087 dump_printf_loc (MSG_NOTE
, vect_location
,
2088 "truncating gather/scatter offset to %d bits"
2089 " might change its value.\n", element_bits
);
2094 /* Return true if we can use gather/scatter internal functions to
2095 vectorize STMT_INFO, which is a grouped or strided load or store.
2096 MASKED_P is true if load or store is conditional. When returning
2097 true, fill in GS_INFO with the information required to perform the
2101 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
2102 loop_vec_info loop_vinfo
, bool masked_p
,
2103 gather_scatter_info
*gs_info
)
2105 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
2107 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
2110 scalar_mode element_mode
= SCALAR_TYPE_MODE (gs_info
->element_type
);
2111 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
2112 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2113 unsigned int offset_bits
= TYPE_PRECISION (offset_type
);
2115 /* Enforced by vect_check_gather_scatter. */
2116 gcc_assert (element_bits
>= offset_bits
);
2118 /* If the elements are wider than the offset, convert the offset to the
2119 same width, without changing its sign. */
2120 if (element_bits
> offset_bits
)
2122 bool unsigned_p
= TYPE_UNSIGNED (offset_type
);
2123 offset_type
= build_nonstandard_integer_type (element_bits
, unsigned_p
);
2124 gs_info
->offset
= fold_convert (offset_type
, gs_info
->offset
);
2127 if (dump_enabled_p ())
2128 dump_printf_loc (MSG_NOTE
, vect_location
,
2129 "using gather/scatter for strided/grouped access,"
2130 " scale = %d\n", gs_info
->scale
);
2135 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2136 elements with a known constant step. Return -1 if that step
2137 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2140 compare_step_with_zero (stmt_vec_info stmt_info
)
2142 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2143 return tree_int_cst_compare (vect_dr_behavior (dr_info
)->step
,
2147 /* If the target supports a permute mask that reverses the elements in
2148 a vector of type VECTYPE, return that mask, otherwise return null. */
2151 perm_mask_for_reverse (tree vectype
)
2153 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2155 /* The encoding has a single stepped pattern. */
2156 vec_perm_builder
sel (nunits
, 1, 3);
2157 for (int i
= 0; i
< 3; ++i
)
2158 sel
.quick_push (nunits
- 1 - i
);
2160 vec_perm_indices
indices (sel
, 1, nunits
);
2161 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
2163 return vect_gen_perm_mask_checked (vectype
, indices
);
2166 /* STMT_INFO is either a masked or unconditional store. Return the value
2170 vect_get_store_rhs (stmt_vec_info stmt_info
)
2172 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2174 gcc_assert (gimple_assign_single_p (assign
));
2175 return gimple_assign_rhs1 (assign
);
2177 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2179 internal_fn ifn
= gimple_call_internal_fn (call
);
2180 int index
= internal_fn_stored_value_index (ifn
);
2181 gcc_assert (index
>= 0);
2182 return gimple_call_arg (call
, index
);
2187 /* A subroutine of get_load_store_type, with a subset of the same
2188 arguments. Handle the case where STMT_INFO is part of a grouped load
2191 For stores, the statements in the group are all consecutive
2192 and there is no gap at the end. For loads, the statements in the
2193 group might not be consecutive; there can be gaps between statements
2194 as well as at the end. */
2197 get_group_load_store_type (stmt_vec_info stmt_info
, tree vectype
, bool slp
,
2198 bool masked_p
, vec_load_store_type vls_type
,
2199 vect_memory_access_type
*memory_access_type
,
2200 gather_scatter_info
*gs_info
)
2202 vec_info
*vinfo
= stmt_info
->vinfo
;
2203 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2204 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2205 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2206 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2207 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2208 bool single_element_p
= (stmt_info
== first_stmt_info
2209 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2210 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2211 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2213 /* True if the vectorized statements would access beyond the last
2214 statement in the group. */
2215 bool overrun_p
= false;
2217 /* True if we can cope with such overrun by peeling for gaps, so that
2218 there is at least one final scalar iteration after the vector loop. */
2219 bool can_overrun_p
= (!masked_p
2220 && vls_type
== VLS_LOAD
2224 /* There can only be a gap at the end of the group if the stride is
2225 known at compile time. */
2226 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2228 /* Stores can't yet have gaps. */
2229 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
2233 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2235 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2236 separated by the stride, until we have a complete vector.
2237 Fall back to scalar accesses if that isn't possible. */
2238 if (multiple_p (nunits
, group_size
))
2239 *memory_access_type
= VMAT_STRIDED_SLP
;
2241 *memory_access_type
= VMAT_ELEMENTWISE
;
2245 overrun_p
= loop_vinfo
&& gap
!= 0;
2246 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2248 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2249 "Grouped store with gaps requires"
2250 " non-consecutive accesses\n");
2253 /* An overrun is fine if the trailing elements are smaller
2254 than the alignment boundary B. Every vector access will
2255 be a multiple of B and so we are guaranteed to access a
2256 non-gap element in the same B-sized block. */
2258 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2259 / vect_get_scalar_dr_size (first_dr_info
)))
2261 if (overrun_p
&& !can_overrun_p
)
2263 if (dump_enabled_p ())
2264 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2265 "Peeling for outer loop is not supported\n");
2268 *memory_access_type
= VMAT_CONTIGUOUS
;
2273 /* We can always handle this case using elementwise accesses,
2274 but see if something more efficient is available. */
2275 *memory_access_type
= VMAT_ELEMENTWISE
;
2277 /* If there is a gap at the end of the group then these optimizations
2278 would access excess elements in the last iteration. */
2279 bool would_overrun_p
= (gap
!= 0);
2280 /* An overrun is fine if the trailing elements are smaller than the
2281 alignment boundary B. Every vector access will be a multiple of B
2282 and so we are guaranteed to access a non-gap element in the
2283 same B-sized block. */
2286 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2287 / vect_get_scalar_dr_size (first_dr_info
)))
2288 would_overrun_p
= false;
2290 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2291 && (can_overrun_p
|| !would_overrun_p
)
2292 && compare_step_with_zero (stmt_info
) > 0)
2294 /* First cope with the degenerate case of a single-element
2296 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2297 *memory_access_type
= VMAT_CONTIGUOUS
;
2299 /* Otherwise try using LOAD/STORE_LANES. */
2300 if (*memory_access_type
== VMAT_ELEMENTWISE
2301 && (vls_type
== VLS_LOAD
2302 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2303 : vect_store_lanes_supported (vectype
, group_size
,
2306 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2307 overrun_p
= would_overrun_p
;
2310 /* If that fails, try using permuting loads. */
2311 if (*memory_access_type
== VMAT_ELEMENTWISE
2312 && (vls_type
== VLS_LOAD
2313 ? vect_grouped_load_supported (vectype
, single_element_p
,
2315 : vect_grouped_store_supported (vectype
, group_size
)))
2317 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2318 overrun_p
= would_overrun_p
;
2322 /* As a last resort, trying using a gather load or scatter store.
2324 ??? Although the code can handle all group sizes correctly,
2325 it probably isn't a win to use separate strided accesses based
2326 on nearby locations. Or, even if it's a win over scalar code,
2327 it might not be a win over vectorizing at a lower VF, if that
2328 allows us to use contiguous accesses. */
2329 if (*memory_access_type
== VMAT_ELEMENTWISE
2332 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2334 *memory_access_type
= VMAT_GATHER_SCATTER
;
2337 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2339 /* STMT is the leader of the group. Check the operands of all the
2340 stmts of the group. */
2341 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2342 while (next_stmt_info
)
2344 tree op
= vect_get_store_rhs (next_stmt_info
);
2345 enum vect_def_type dt
;
2346 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2348 if (dump_enabled_p ())
2349 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2350 "use not simple.\n");
2353 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2359 gcc_assert (can_overrun_p
);
2360 if (dump_enabled_p ())
2361 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2362 "Data access with gaps requires scalar "
2364 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2370 /* A subroutine of get_load_store_type, with a subset of the same
2371 arguments. Handle the case where STMT_INFO is a load or store that
2372 accesses consecutive elements with a negative step. */
2374 static vect_memory_access_type
2375 get_negative_load_store_type (stmt_vec_info stmt_info
, tree vectype
,
2376 vec_load_store_type vls_type
,
2377 unsigned int ncopies
)
2379 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2380 dr_alignment_support alignment_support_scheme
;
2384 if (dump_enabled_p ())
2385 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2386 "multiple types with negative step.\n");
2387 return VMAT_ELEMENTWISE
;
2390 alignment_support_scheme
= vect_supportable_dr_alignment (dr_info
, false);
2391 if (alignment_support_scheme
!= dr_aligned
2392 && alignment_support_scheme
!= dr_unaligned_supported
)
2394 if (dump_enabled_p ())
2395 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2396 "negative step but alignment required.\n");
2397 return VMAT_ELEMENTWISE
;
2400 if (vls_type
== VLS_STORE_INVARIANT
)
2402 if (dump_enabled_p ())
2403 dump_printf_loc (MSG_NOTE
, vect_location
,
2404 "negative step with invariant source;"
2405 " no permute needed.\n");
2406 return VMAT_CONTIGUOUS_DOWN
;
2409 if (!perm_mask_for_reverse (vectype
))
2411 if (dump_enabled_p ())
2412 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2413 "negative step and reversing not supported.\n");
2414 return VMAT_ELEMENTWISE
;
2417 return VMAT_CONTIGUOUS_REVERSE
;
2420 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2421 if there is a memory access type that the vectorized form can use,
2422 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2423 or scatters, fill in GS_INFO accordingly.
2425 SLP says whether we're performing SLP rather than loop vectorization.
2426 MASKED_P is true if the statement is conditional on a vectorized mask.
2427 VECTYPE is the vector type that the vectorized statements will use.
2428 NCOPIES is the number of vector statements that will be needed. */
2431 get_load_store_type (stmt_vec_info stmt_info
, tree vectype
, bool slp
,
2432 bool masked_p
, vec_load_store_type vls_type
,
2433 unsigned int ncopies
,
2434 vect_memory_access_type
*memory_access_type
,
2435 gather_scatter_info
*gs_info
)
2437 vec_info
*vinfo
= stmt_info
->vinfo
;
2438 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2439 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2440 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2442 *memory_access_type
= VMAT_GATHER_SCATTER
;
2443 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2445 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2446 &gs_info
->offset_dt
,
2447 &gs_info
->offset_vectype
))
2449 if (dump_enabled_p ())
2450 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2451 "%s index use not simple.\n",
2452 vls_type
== VLS_LOAD
? "gather" : "scatter");
2456 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2458 if (!get_group_load_store_type (stmt_info
, vectype
, slp
, masked_p
,
2459 vls_type
, memory_access_type
, gs_info
))
2462 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2466 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2468 *memory_access_type
= VMAT_GATHER_SCATTER
;
2470 *memory_access_type
= VMAT_ELEMENTWISE
;
2474 int cmp
= compare_step_with_zero (stmt_info
);
2476 *memory_access_type
= get_negative_load_store_type
2477 (stmt_info
, vectype
, vls_type
, ncopies
);
2480 gcc_assert (vls_type
== VLS_LOAD
);
2481 *memory_access_type
= VMAT_INVARIANT
;
2484 *memory_access_type
= VMAT_CONTIGUOUS
;
2487 if ((*memory_access_type
== VMAT_ELEMENTWISE
2488 || *memory_access_type
== VMAT_STRIDED_SLP
)
2489 && !nunits
.is_constant ())
2491 if (dump_enabled_p ())
2492 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2493 "Not using elementwise accesses due to variable "
2494 "vectorization factor.\n");
2498 /* FIXME: At the moment the cost model seems to underestimate the
2499 cost of using elementwise accesses. This check preserves the
2500 traditional behavior until that can be fixed. */
2501 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2502 if (!first_stmt_info
)
2503 first_stmt_info
= stmt_info
;
2504 if (*memory_access_type
== VMAT_ELEMENTWISE
2505 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2506 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2507 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2508 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2510 if (dump_enabled_p ())
2511 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2512 "not falling back to elementwise accesses\n");
2518 /* Return true if boolean argument MASK is suitable for vectorizing
2519 conditional load or store STMT_INFO. When returning true, store the type
2520 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2521 in *MASK_VECTYPE_OUT. */
2524 vect_check_load_store_mask (stmt_vec_info stmt_info
, tree mask
,
2525 vect_def_type
*mask_dt_out
,
2526 tree
*mask_vectype_out
)
2528 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2530 if (dump_enabled_p ())
2531 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2532 "mask argument is not a boolean.\n");
2536 if (TREE_CODE (mask
) != SSA_NAME
)
2538 if (dump_enabled_p ())
2539 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2540 "mask argument is not an SSA name.\n");
2544 enum vect_def_type mask_dt
;
2546 if (!vect_is_simple_use (mask
, stmt_info
->vinfo
, &mask_dt
, &mask_vectype
))
2548 if (dump_enabled_p ())
2549 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2550 "mask use not simple.\n");
2554 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2556 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2558 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2560 if (dump_enabled_p ())
2561 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2562 "could not find an appropriate vector mask type.\n");
2566 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2567 TYPE_VECTOR_SUBPARTS (vectype
)))
2569 if (dump_enabled_p ())
2570 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2571 "vector mask type %T",
2572 " does not match vector data type %T.\n",
2573 mask_vectype
, vectype
);
2578 *mask_dt_out
= mask_dt
;
2579 *mask_vectype_out
= mask_vectype
;
2583 /* Return true if stored value RHS is suitable for vectorizing store
2584 statement STMT_INFO. When returning true, store the type of the
2585 definition in *RHS_DT_OUT, the type of the vectorized store value in
2586 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2589 vect_check_store_rhs (stmt_vec_info stmt_info
, tree rhs
,
2590 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2591 vec_load_store_type
*vls_type_out
)
2593 /* In the case this is a store from a constant make sure
2594 native_encode_expr can handle it. */
2595 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2597 if (dump_enabled_p ())
2598 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2599 "cannot encode constant as a byte sequence.\n");
2603 enum vect_def_type rhs_dt
;
2605 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &rhs_dt
, &rhs_vectype
))
2607 if (dump_enabled_p ())
2608 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2609 "use not simple.\n");
2613 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2614 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2616 if (dump_enabled_p ())
2617 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2618 "incompatible vector types.\n");
2622 *rhs_dt_out
= rhs_dt
;
2623 *rhs_vectype_out
= rhs_vectype
;
2624 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2625 *vls_type_out
= VLS_STORE_INVARIANT
;
2627 *vls_type_out
= VLS_STORE
;
2631 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2632 Note that we support masks with floating-point type, in which case the
2633 floats are interpreted as a bitmask. */
2636 vect_build_all_ones_mask (stmt_vec_info stmt_info
, tree masktype
)
2638 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2639 return build_int_cst (masktype
, -1);
2640 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2642 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2643 mask
= build_vector_from_val (masktype
, mask
);
2644 return vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
2646 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2650 for (int j
= 0; j
< 6; ++j
)
2652 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2653 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2654 mask
= build_vector_from_val (masktype
, mask
);
2655 return vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
2660 /* Build an all-zero merge value of type VECTYPE while vectorizing
2661 STMT_INFO as a gather load. */
2664 vect_build_zero_merge_argument (stmt_vec_info stmt_info
, tree vectype
)
2667 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2668 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2669 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2673 for (int j
= 0; j
< 6; ++j
)
2675 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2676 merge
= build_real (TREE_TYPE (vectype
), r
);
2680 merge
= build_vector_from_val (vectype
, merge
);
2681 return vect_init_vector (stmt_info
, merge
, vectype
, NULL
);
2684 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2685 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2686 the gather load operation. If the load is conditional, MASK is the
2687 unvectorized condition and MASK_DT is its definition type, otherwise
2691 vect_build_gather_load_calls (stmt_vec_info stmt_info
,
2692 gimple_stmt_iterator
*gsi
,
2693 stmt_vec_info
*vec_stmt
,
2694 gather_scatter_info
*gs_info
,
2697 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2698 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2699 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2700 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2701 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2702 edge pe
= loop_preheader_edge (loop
);
2703 enum { NARROW
, NONE
, WIDEN
} modifier
;
2704 poly_uint64 gather_off_nunits
2705 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2707 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2708 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2709 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2710 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2711 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2712 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2713 tree scaletype
= TREE_VALUE (arglist
);
2714 tree real_masktype
= masktype
;
2715 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2717 || TREE_CODE (masktype
) == INTEGER_TYPE
2718 || types_compatible_p (srctype
, masktype
)));
2719 if (mask
&& TREE_CODE (masktype
) == INTEGER_TYPE
)
2720 masktype
= build_same_sized_truth_vector_type (srctype
);
2722 tree mask_halftype
= masktype
;
2723 tree perm_mask
= NULL_TREE
;
2724 tree mask_perm_mask
= NULL_TREE
;
2725 if (known_eq (nunits
, gather_off_nunits
))
2727 else if (known_eq (nunits
* 2, gather_off_nunits
))
2731 /* Currently widening gathers and scatters are only supported for
2732 fixed-length vectors. */
2733 int count
= gather_off_nunits
.to_constant ();
2734 vec_perm_builder
sel (count
, count
, 1);
2735 for (int i
= 0; i
< count
; ++i
)
2736 sel
.quick_push (i
| (count
/ 2));
2738 vec_perm_indices
indices (sel
, 1, count
);
2739 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2742 else if (known_eq (nunits
, gather_off_nunits
* 2))
2746 /* Currently narrowing gathers and scatters are only supported for
2747 fixed-length vectors. */
2748 int count
= nunits
.to_constant ();
2749 vec_perm_builder
sel (count
, count
, 1);
2750 sel
.quick_grow (count
);
2751 for (int i
= 0; i
< count
; ++i
)
2752 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2753 vec_perm_indices
indices (sel
, 2, count
);
2754 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2758 if (mask
&& masktype
== real_masktype
)
2760 for (int i
= 0; i
< count
; ++i
)
2761 sel
[i
] = i
| (count
/ 2);
2762 indices
.new_vector (sel
, 2, count
);
2763 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2767 = build_same_sized_truth_vector_type (gs_info
->offset_vectype
);
2772 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2773 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2775 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2776 if (!is_gimple_min_invariant (ptr
))
2779 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2780 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2781 gcc_assert (!new_bb
);
2784 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2786 tree vec_oprnd0
= NULL_TREE
;
2787 tree vec_mask
= NULL_TREE
;
2788 tree src_op
= NULL_TREE
;
2789 tree mask_op
= NULL_TREE
;
2790 tree prev_res
= NULL_TREE
;
2791 stmt_vec_info prev_stmt_info
= NULL
;
2795 src_op
= vect_build_zero_merge_argument (stmt_info
, rettype
);
2796 mask_op
= vect_build_all_ones_mask (stmt_info
, masktype
);
2799 for (int j
= 0; j
< ncopies
; ++j
)
2802 if (modifier
== WIDEN
&& (j
& 1))
2803 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2804 perm_mask
, stmt_info
, gsi
);
2807 = vect_get_vec_def_for_operand (gs_info
->offset
, stmt_info
);
2809 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2812 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2814 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2815 TYPE_VECTOR_SUBPARTS (idxtype
)));
2816 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2817 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2818 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2819 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2825 if (mask_perm_mask
&& (j
& 1))
2826 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2827 mask_perm_mask
, stmt_info
, gsi
);
2831 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
);
2832 else if (modifier
!= NARROW
|| (j
& 1) == 0)
2833 vec_mask
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2837 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2839 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2840 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2841 gcc_assert (known_eq (sub1
, sub2
));
2842 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2843 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2845 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2846 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2850 if (modifier
== NARROW
&& masktype
!= real_masktype
)
2852 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2854 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2855 : VEC_UNPACK_LO_EXPR
,
2857 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2863 tree mask_arg
= mask_op
;
2864 if (masktype
!= real_masktype
)
2866 tree utype
, optype
= TREE_TYPE (mask_op
);
2867 if (TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2868 utype
= real_masktype
;
2870 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2871 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2872 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2874 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2875 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2877 if (!useless_type_conversion_p (real_masktype
, utype
))
2879 gcc_assert (TYPE_PRECISION (utype
)
2880 <= TYPE_PRECISION (real_masktype
));
2881 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2882 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2883 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2886 src_op
= build_zero_cst (srctype
);
2888 gcall
*new_call
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2891 stmt_vec_info new_stmt_info
;
2892 if (!useless_type_conversion_p (vectype
, rettype
))
2894 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2895 TYPE_VECTOR_SUBPARTS (rettype
)));
2896 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2897 gimple_call_set_lhs (new_call
, op
);
2898 vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
2899 var
= make_ssa_name (vec_dest
);
2900 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2901 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2903 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2907 var
= make_ssa_name (vec_dest
, new_call
);
2908 gimple_call_set_lhs (new_call
, var
);
2910 = vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
2913 if (modifier
== NARROW
)
2920 var
= permute_vec_elements (prev_res
, var
, perm_mask
,
2922 new_stmt_info
= loop_vinfo
->lookup_def (var
);
2925 if (prev_stmt_info
== NULL
)
2926 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
2928 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
2929 prev_stmt_info
= new_stmt_info
;
2933 /* Prepare the base and offset in GS_INFO for vectorization.
2934 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2935 to the vectorized offset argument for the first copy of STMT_INFO.
2936 STMT_INFO is the statement described by GS_INFO and LOOP is the
2940 vect_get_gather_scatter_ops (struct loop
*loop
, stmt_vec_info stmt_info
,
2941 gather_scatter_info
*gs_info
,
2942 tree
*dataref_ptr
, tree
*vec_offset
)
2944 gimple_seq stmts
= NULL
;
2945 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2949 edge pe
= loop_preheader_edge (loop
);
2950 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2951 gcc_assert (!new_bb
);
2953 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2954 tree offset_vectype
= get_vectype_for_scalar_type (offset_type
);
2955 *vec_offset
= vect_get_vec_def_for_operand (gs_info
->offset
, stmt_info
,
2959 /* Prepare to implement a grouped or strided load or store using
2960 the gather load or scatter store operation described by GS_INFO.
2961 STMT_INFO is the load or store statement.
2963 Set *DATAREF_BUMP to the amount that should be added to the base
2964 address after each copy of the vectorized statement. Set *VEC_OFFSET
2965 to an invariant offset vector in which element I has the value
2966 I * DR_STEP / SCALE. */
2969 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2970 loop_vec_info loop_vinfo
,
2971 gather_scatter_info
*gs_info
,
2972 tree
*dataref_bump
, tree
*vec_offset
)
2974 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2975 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2976 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2979 tree bump
= size_binop (MULT_EXPR
,
2980 fold_convert (sizetype
, DR_STEP (dr
)),
2981 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2982 *dataref_bump
= force_gimple_operand (bump
, &stmts
, true, NULL_TREE
);
2984 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2986 /* The offset given in GS_INFO can have pointer type, so use the element
2987 type of the vector instead. */
2988 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2989 tree offset_vectype
= get_vectype_for_scalar_type (offset_type
);
2990 offset_type
= TREE_TYPE (offset_vectype
);
2992 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2993 tree step
= size_binop (EXACT_DIV_EXPR
, DR_STEP (dr
),
2994 ssize_int (gs_info
->scale
));
2995 step
= fold_convert (offset_type
, step
);
2996 step
= force_gimple_operand (step
, &stmts
, true, NULL_TREE
);
2998 /* Create {0, X, X*2, X*3, ...}. */
2999 *vec_offset
= gimple_build (&stmts
, VEC_SERIES_EXPR
, offset_vectype
,
3000 build_zero_cst (offset_type
), step
);
3002 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
3005 /* Return the amount that should be added to a vector pointer to move
3006 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3007 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3011 vect_get_data_ptr_increment (dr_vec_info
*dr_info
, tree aggr_type
,
3012 vect_memory_access_type memory_access_type
)
3014 if (memory_access_type
== VMAT_INVARIANT
)
3015 return size_zero_node
;
3017 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3018 tree step
= vect_dr_behavior (dr_info
)->step
;
3019 if (tree_int_cst_sgn (step
) == -1)
3020 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3024 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
3027 vectorizable_bswap (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3028 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3029 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3032 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3033 vec_info
*vinfo
= stmt_info
->vinfo
;
3034 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3037 op
= gimple_call_arg (stmt
, 0);
3038 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3039 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3041 /* Multiple types in SLP are handled by creating the appropriate number of
3042 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3047 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3049 gcc_assert (ncopies
>= 1);
3051 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3055 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3056 unsigned word_bytes
;
3057 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3060 /* The encoding uses one stepped pattern for each byte in the word. */
3061 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3062 for (unsigned i
= 0; i
< 3; ++i
)
3063 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3064 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3066 vec_perm_indices
indices (elts
, 1, num_bytes
);
3067 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
3072 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3073 DUMP_VECT_SCOPE ("vectorizable_bswap");
3076 record_stmt_cost (cost_vec
,
3077 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3078 record_stmt_cost (cost_vec
,
3079 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
3084 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3087 vec
<tree
> vec_oprnds
= vNULL
;
3088 stmt_vec_info new_stmt_info
= NULL
;
3089 stmt_vec_info prev_stmt_info
= NULL
;
3090 for (unsigned j
= 0; j
< ncopies
; j
++)
3094 vect_get_vec_defs (op
, NULL
, stmt_info
, &vec_oprnds
, NULL
, slp_node
);
3096 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
3098 /* Arguments are ready. create the new vector stmt. */
3101 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3104 tree tem
= make_ssa_name (char_vectype
);
3105 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3106 char_vectype
, vop
));
3107 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3108 tree tem2
= make_ssa_name (char_vectype
);
3109 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3110 tem
, tem
, bswap_vconst
);
3111 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3112 tem
= make_ssa_name (vectype
);
3113 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3116 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3118 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3125 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3127 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3129 prev_stmt_info
= new_stmt_info
;
3132 vec_oprnds
.release ();
3136 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3137 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3138 in a single step. On success, store the binary pack code in
3142 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3143 tree_code
*convert_code
)
3145 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3146 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3150 int multi_step_cvt
= 0;
3151 auto_vec
<tree
, 8> interm_types
;
3152 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3153 &code
, &multi_step_cvt
,
3158 *convert_code
= code
;
3162 /* Function vectorizable_call.
3164 Check if STMT_INFO performs a function call that can be vectorized.
3165 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3166 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3167 Return true if STMT_INFO is vectorizable in this way. */
3170 vectorizable_call (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3171 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3172 stmt_vector_for_cost
*cost_vec
)
3178 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3179 stmt_vec_info prev_stmt_info
;
3180 tree vectype_out
, vectype_in
;
3181 poly_uint64 nunits_in
;
3182 poly_uint64 nunits_out
;
3183 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3184 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3185 vec_info
*vinfo
= stmt_info
->vinfo
;
3186 tree fndecl
, new_temp
, rhs_type
;
3187 enum vect_def_type dt
[4]
3188 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3189 vect_unknown_def_type
};
3190 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3191 int ndts
= ARRAY_SIZE (dt
);
3193 auto_vec
<tree
, 8> vargs
;
3194 auto_vec
<tree
, 8> orig_vargs
;
3195 enum { NARROW
, NONE
, WIDEN
} modifier
;
3199 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3202 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3206 /* Is STMT_INFO a vectorizable call? */
3207 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3211 if (gimple_call_internal_p (stmt
)
3212 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3213 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3214 /* Handled by vectorizable_load and vectorizable_store. */
3217 if (gimple_call_lhs (stmt
) == NULL_TREE
3218 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3221 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3223 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3225 /* Process function arguments. */
3226 rhs_type
= NULL_TREE
;
3227 vectype_in
= NULL_TREE
;
3228 nargs
= gimple_call_num_args (stmt
);
3230 /* Bail out if the function has more than three arguments, we do not have
3231 interesting builtin functions to vectorize with more than two arguments
3232 except for fma. No arguments is also not good. */
3233 if (nargs
== 0 || nargs
> 4)
3236 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3237 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3238 if (cfn
== CFN_GOMP_SIMD_LANE
)
3241 rhs_type
= unsigned_type_node
;
3245 if (internal_fn_p (cfn
))
3246 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3248 for (i
= 0; i
< nargs
; i
++)
3250 op
= gimple_call_arg (stmt
, i
);
3251 if (!vect_is_simple_use (op
, vinfo
, &dt
[i
], &vectypes
[i
]))
3253 if (dump_enabled_p ())
3254 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3255 "use not simple.\n");
3259 /* Skip the mask argument to an internal function. This operand
3260 has been converted via a pattern if necessary. */
3261 if ((int) i
== mask_opno
)
3264 /* We can only handle calls with arguments of the same type. */
3266 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3268 if (dump_enabled_p ())
3269 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3270 "argument types differ.\n");
3274 rhs_type
= TREE_TYPE (op
);
3277 vectype_in
= vectypes
[i
];
3278 else if (vectypes
[i
]
3279 && vectypes
[i
] != vectype_in
)
3281 if (dump_enabled_p ())
3282 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3283 "argument vector types differ.\n");
3287 /* If all arguments are external or constant defs use a vector type with
3288 the same size as the output vector type. */
3290 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3292 gcc_assert (vectype_in
);
3295 if (dump_enabled_p ())
3296 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3297 "no vectype for scalar type %T\n", rhs_type
);
3303 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3304 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3305 if (known_eq (nunits_in
* 2, nunits_out
))
3307 else if (known_eq (nunits_out
, nunits_in
))
3309 else if (known_eq (nunits_out
* 2, nunits_in
))
3314 /* We only handle functions that do not read or clobber memory. */
3315 if (gimple_vuse (stmt
))
3317 if (dump_enabled_p ())
3318 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3319 "function reads from or writes to memory.\n");
3323 /* For now, we only vectorize functions if a target specific builtin
3324 is available. TODO -- in some cases, it might be profitable to
3325 insert the calls for pieces of the vector, in order to be able
3326 to vectorize other operations in the loop. */
3328 internal_fn ifn
= IFN_LAST
;
3329 tree callee
= gimple_call_fndecl (stmt
);
3331 /* First try using an internal function. */
3332 tree_code convert_code
= ERROR_MARK
;
3334 && (modifier
== NONE
3335 || (modifier
== NARROW
3336 && simple_integer_narrowing (vectype_out
, vectype_in
,
3338 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3341 /* If that fails, try asking for a target-specific built-in function. */
3342 if (ifn
== IFN_LAST
)
3344 if (cfn
!= CFN_LAST
)
3345 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3346 (cfn
, vectype_out
, vectype_in
);
3348 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3349 (callee
, vectype_out
, vectype_in
);
3352 if (ifn
== IFN_LAST
&& !fndecl
)
3354 if (cfn
== CFN_GOMP_SIMD_LANE
3357 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3358 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3359 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3360 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3362 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3363 { 0, 1, 2, ... vf - 1 } vector. */
3364 gcc_assert (nargs
== 0);
3366 else if (modifier
== NONE
3367 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3368 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3369 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
3370 return vectorizable_bswap (stmt_info
, gsi
, vec_stmt
, slp_node
,
3371 vectype_in
, cost_vec
);
3374 if (dump_enabled_p ())
3375 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3376 "function is not vectorizable.\n");
3383 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3384 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3386 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3388 /* Sanity check: make sure that at least one copy of the vectorized stmt
3389 needs to be generated. */
3390 gcc_assert (ncopies
>= 1);
3392 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3393 if (!vec_stmt
) /* transformation not required. */
3395 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3396 DUMP_VECT_SCOPE ("vectorizable_call");
3397 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3398 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3399 record_stmt_cost (cost_vec
, ncopies
/ 2,
3400 vec_promote_demote
, stmt_info
, 0, vect_body
);
3402 if (loop_vinfo
&& mask_opno
>= 0)
3404 unsigned int nvectors
= (slp_node
3405 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3407 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype_out
);
3414 if (dump_enabled_p ())
3415 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3418 scalar_dest
= gimple_call_lhs (stmt
);
3419 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3421 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3423 stmt_vec_info new_stmt_info
= NULL
;
3424 prev_stmt_info
= NULL
;
3425 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3427 tree prev_res
= NULL_TREE
;
3428 vargs
.safe_grow (nargs
);
3429 orig_vargs
.safe_grow (nargs
);
3430 for (j
= 0; j
< ncopies
; ++j
)
3432 /* Build argument list for the vectorized call. */
3435 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3436 vec
<tree
> vec_oprnds0
;
3438 for (i
= 0; i
< nargs
; i
++)
3439 vargs
[i
] = gimple_call_arg (stmt
, i
);
3440 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3441 vec_oprnds0
= vec_defs
[0];
3443 /* Arguments are ready. Create the new vector stmt. */
3444 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3447 for (k
= 0; k
< nargs
; k
++)
3449 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3450 vargs
[k
] = vec_oprndsk
[i
];
3452 if (modifier
== NARROW
)
3454 /* We don't define any narrowing conditional functions
3456 gcc_assert (mask_opno
< 0);
3457 tree half_res
= make_ssa_name (vectype_in
);
3459 = gimple_build_call_internal_vec (ifn
, vargs
);
3460 gimple_call_set_lhs (call
, half_res
);
3461 gimple_call_set_nothrow (call
, true);
3463 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3466 prev_res
= half_res
;
3469 new_temp
= make_ssa_name (vec_dest
);
3471 = gimple_build_assign (new_temp
, convert_code
,
3472 prev_res
, half_res
);
3474 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
3479 if (mask_opno
>= 0 && masked_loop_p
)
3481 unsigned int vec_num
= vec_oprnds0
.length ();
3482 /* Always true for SLP. */
3483 gcc_assert (ncopies
== 1);
3484 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3486 vargs
[mask_opno
] = prepare_load_store_mask
3487 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3491 if (ifn
!= IFN_LAST
)
3492 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3494 call
= gimple_build_call_vec (fndecl
, vargs
);
3495 new_temp
= make_ssa_name (vec_dest
, call
);
3496 gimple_call_set_lhs (call
, new_temp
);
3497 gimple_call_set_nothrow (call
, true);
3499 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3501 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3504 for (i
= 0; i
< nargs
; i
++)
3506 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3507 vec_oprndsi
.release ();
3512 if (mask_opno
>= 0 && !vectypes
[mask_opno
])
3514 gcc_assert (modifier
!= WIDEN
);
3516 = build_same_sized_truth_vector_type (vectype_in
);
3519 for (i
= 0; i
< nargs
; i
++)
3521 op
= gimple_call_arg (stmt
, i
);
3524 = vect_get_vec_def_for_operand (op
, stmt_info
, vectypes
[i
]);
3527 = vect_get_vec_def_for_stmt_copy (vinfo
, orig_vargs
[i
]);
3529 orig_vargs
[i
] = vargs
[i
] = vec_oprnd0
;
3532 if (mask_opno
>= 0 && masked_loop_p
)
3534 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3537 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3538 vargs
[mask_opno
], gsi
);
3541 if (cfn
== CFN_GOMP_SIMD_LANE
)
3543 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3545 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3546 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3547 vect_init_vector_1 (stmt_info
, init_stmt
, NULL
);
3548 new_temp
= make_ssa_name (vec_dest
);
3549 gimple
*new_stmt
= gimple_build_assign (new_temp
, new_var
);
3551 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3553 else if (modifier
== NARROW
)
3555 /* We don't define any narrowing conditional functions at
3557 gcc_assert (mask_opno
< 0);
3558 tree half_res
= make_ssa_name (vectype_in
);
3559 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3560 gimple_call_set_lhs (call
, half_res
);
3561 gimple_call_set_nothrow (call
, true);
3563 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3566 prev_res
= half_res
;
3569 new_temp
= make_ssa_name (vec_dest
);
3570 gassign
*new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3571 prev_res
, half_res
);
3573 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3578 if (ifn
!= IFN_LAST
)
3579 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3581 call
= gimple_build_call_vec (fndecl
, vargs
);
3582 new_temp
= make_ssa_name (vec_dest
, call
);
3583 gimple_call_set_lhs (call
, new_temp
);
3584 gimple_call_set_nothrow (call
, true);
3586 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3589 if (j
== (modifier
== NARROW
? 1 : 0))
3590 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3592 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3594 prev_stmt_info
= new_stmt_info
;
3597 else if (modifier
== NARROW
)
3599 /* We don't define any narrowing conditional functions at present. */
3600 gcc_assert (mask_opno
< 0);
3601 for (j
= 0; j
< ncopies
; ++j
)
3603 /* Build argument list for the vectorized call. */
3605 vargs
.create (nargs
* 2);
3611 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3612 vec
<tree
> vec_oprnds0
;
3614 for (i
= 0; i
< nargs
; i
++)
3615 vargs
.quick_push (gimple_call_arg (stmt
, i
));
3616 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3617 vec_oprnds0
= vec_defs
[0];
3619 /* Arguments are ready. Create the new vector stmt. */
3620 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3624 for (k
= 0; k
< nargs
; k
++)
3626 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3627 vargs
.quick_push (vec_oprndsk
[i
]);
3628 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3631 if (ifn
!= IFN_LAST
)
3632 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3634 call
= gimple_build_call_vec (fndecl
, vargs
);
3635 new_temp
= make_ssa_name (vec_dest
, call
);
3636 gimple_call_set_lhs (call
, new_temp
);
3637 gimple_call_set_nothrow (call
, true);
3639 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3640 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3643 for (i
= 0; i
< nargs
; i
++)
3645 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3646 vec_oprndsi
.release ();
3651 for (i
= 0; i
< nargs
; i
++)
3653 op
= gimple_call_arg (stmt
, i
);
3657 = vect_get_vec_def_for_operand (op
, stmt_info
,
3660 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3664 vec_oprnd1
= gimple_call_arg (new_stmt_info
->stmt
,
3667 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
3669 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3672 vargs
.quick_push (vec_oprnd0
);
3673 vargs
.quick_push (vec_oprnd1
);
3676 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3677 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3678 gimple_call_set_lhs (new_stmt
, new_temp
);
3680 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3683 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
3685 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3687 prev_stmt_info
= new_stmt_info
;
3690 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3693 /* No current target implements this case. */
3698 /* The call in STMT might prevent it from being removed in dce.
3699 We however cannot remove it here, due to the way the ssa name
3700 it defines is mapped to the new definition. So just replace
3701 rhs of the statement with something harmless. */
3706 stmt_info
= vect_orig_stmt (stmt_info
);
3707 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3710 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3711 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3717 struct simd_call_arg_info
3721 HOST_WIDE_INT linear_step
;
3722 enum vect_def_type dt
;
3724 bool simd_lane_linear
;
3727 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3728 is linear within simd lane (but not within whole loop), note it in
3732 vect_simd_lane_linear (tree op
, struct loop
*loop
,
3733 struct simd_call_arg_info
*arginfo
)
3735 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3737 if (!is_gimple_assign (def_stmt
)
3738 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3739 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3742 tree base
= gimple_assign_rhs1 (def_stmt
);
3743 HOST_WIDE_INT linear_step
= 0;
3744 tree v
= gimple_assign_rhs2 (def_stmt
);
3745 while (TREE_CODE (v
) == SSA_NAME
)
3748 def_stmt
= SSA_NAME_DEF_STMT (v
);
3749 if (is_gimple_assign (def_stmt
))
3750 switch (gimple_assign_rhs_code (def_stmt
))
3753 t
= gimple_assign_rhs2 (def_stmt
);
3754 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3756 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3757 v
= gimple_assign_rhs1 (def_stmt
);
3760 t
= gimple_assign_rhs2 (def_stmt
);
3761 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3763 linear_step
= tree_to_shwi (t
);
3764 v
= gimple_assign_rhs1 (def_stmt
);
3767 t
= gimple_assign_rhs1 (def_stmt
);
3768 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3769 || (TYPE_PRECISION (TREE_TYPE (v
))
3770 < TYPE_PRECISION (TREE_TYPE (t
))))
3779 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3781 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3782 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3787 arginfo
->linear_step
= linear_step
;
3789 arginfo
->simd_lane_linear
= true;
3795 /* Return the number of elements in vector type VECTYPE, which is associated
3796 with a SIMD clone. At present these vectors always have a constant
3799 static unsigned HOST_WIDE_INT
3800 simd_clone_subparts (tree vectype
)
3802 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3805 /* Function vectorizable_simd_clone_call.
3807 Check if STMT_INFO performs a function call that can be vectorized
3808 by calling a simd clone of the function.
3809 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3810 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3811 Return true if STMT_INFO is vectorizable in this way. */
3814 vectorizable_simd_clone_call (stmt_vec_info stmt_info
,
3815 gimple_stmt_iterator
*gsi
,
3816 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3817 stmt_vector_for_cost
*)
3822 tree vec_oprnd0
= NULL_TREE
;
3823 stmt_vec_info prev_stmt_info
;
3825 unsigned int nunits
;
3826 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3827 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3828 vec_info
*vinfo
= stmt_info
->vinfo
;
3829 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3830 tree fndecl
, new_temp
;
3832 auto_vec
<simd_call_arg_info
> arginfo
;
3833 vec
<tree
> vargs
= vNULL
;
3835 tree lhs
, rtype
, ratype
;
3836 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3838 /* Is STMT a vectorizable call? */
3839 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3843 fndecl
= gimple_call_fndecl (stmt
);
3844 if (fndecl
== NULL_TREE
)
3847 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3848 if (node
== NULL
|| node
->simd_clones
== NULL
)
3851 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3854 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3858 if (gimple_call_lhs (stmt
)
3859 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3862 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3864 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3866 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3873 /* Process function arguments. */
3874 nargs
= gimple_call_num_args (stmt
);
3876 /* Bail out if the function has zero arguments. */
3880 arginfo
.reserve (nargs
, true);
3882 for (i
= 0; i
< nargs
; i
++)
3884 simd_call_arg_info thisarginfo
;
3887 thisarginfo
.linear_step
= 0;
3888 thisarginfo
.align
= 0;
3889 thisarginfo
.op
= NULL_TREE
;
3890 thisarginfo
.simd_lane_linear
= false;
3892 op
= gimple_call_arg (stmt
, i
);
3893 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3894 &thisarginfo
.vectype
)
3895 || thisarginfo
.dt
== vect_uninitialized_def
)
3897 if (dump_enabled_p ())
3898 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3899 "use not simple.\n");
3903 if (thisarginfo
.dt
== vect_constant_def
3904 || thisarginfo
.dt
== vect_external_def
)
3905 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3907 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3909 /* For linear arguments, the analyze phase should have saved
3910 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3911 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3912 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3914 gcc_assert (vec_stmt
);
3915 thisarginfo
.linear_step
3916 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3918 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3919 thisarginfo
.simd_lane_linear
3920 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3921 == boolean_true_node
);
3922 /* If loop has been peeled for alignment, we need to adjust it. */
3923 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3924 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3925 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3927 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3928 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3929 tree opt
= TREE_TYPE (thisarginfo
.op
);
3930 bias
= fold_convert (TREE_TYPE (step
), bias
);
3931 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3933 = fold_build2 (POINTER_TYPE_P (opt
)
3934 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3935 thisarginfo
.op
, bias
);
3939 && thisarginfo
.dt
!= vect_constant_def
3940 && thisarginfo
.dt
!= vect_external_def
3942 && TREE_CODE (op
) == SSA_NAME
3943 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3945 && tree_fits_shwi_p (iv
.step
))
3947 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3948 thisarginfo
.op
= iv
.base
;
3950 else if ((thisarginfo
.dt
== vect_constant_def
3951 || thisarginfo
.dt
== vect_external_def
)
3952 && POINTER_TYPE_P (TREE_TYPE (op
)))
3953 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3954 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3956 if (POINTER_TYPE_P (TREE_TYPE (op
))
3957 && !thisarginfo
.linear_step
3959 && thisarginfo
.dt
!= vect_constant_def
3960 && thisarginfo
.dt
!= vect_external_def
3963 && TREE_CODE (op
) == SSA_NAME
)
3964 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3966 arginfo
.quick_push (thisarginfo
);
3969 unsigned HOST_WIDE_INT vf
;
3970 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
3972 if (dump_enabled_p ())
3973 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3974 "not considering SIMD clones; not yet supported"
3975 " for variable-width vectors.\n");
3979 unsigned int badness
= 0;
3980 struct cgraph_node
*bestn
= NULL
;
3981 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3982 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3984 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3985 n
= n
->simdclone
->next_clone
)
3987 unsigned int this_badness
= 0;
3988 if (n
->simdclone
->simdlen
> vf
3989 || n
->simdclone
->nargs
!= nargs
)
3991 if (n
->simdclone
->simdlen
< vf
)
3992 this_badness
+= (exact_log2 (vf
)
3993 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3994 if (n
->simdclone
->inbranch
)
3995 this_badness
+= 2048;
3996 int target_badness
= targetm
.simd_clone
.usable (n
);
3997 if (target_badness
< 0)
3999 this_badness
+= target_badness
* 512;
4000 /* FORNOW: Have to add code to add the mask argument. */
4001 if (n
->simdclone
->inbranch
)
4003 for (i
= 0; i
< nargs
; i
++)
4005 switch (n
->simdclone
->args
[i
].arg_type
)
4007 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4008 if (!useless_type_conversion_p
4009 (n
->simdclone
->args
[i
].orig_type
,
4010 TREE_TYPE (gimple_call_arg (stmt
, i
))))
4012 else if (arginfo
[i
].dt
== vect_constant_def
4013 || arginfo
[i
].dt
== vect_external_def
4014 || arginfo
[i
].linear_step
)
4017 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4018 if (arginfo
[i
].dt
!= vect_constant_def
4019 && arginfo
[i
].dt
!= vect_external_def
)
4022 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4023 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4024 if (arginfo
[i
].dt
== vect_constant_def
4025 || arginfo
[i
].dt
== vect_external_def
4026 || (arginfo
[i
].linear_step
4027 != n
->simdclone
->args
[i
].linear_step
))
4030 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4031 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4032 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4033 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4034 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4035 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4039 case SIMD_CLONE_ARG_TYPE_MASK
:
4042 if (i
== (size_t) -1)
4044 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4049 if (arginfo
[i
].align
)
4050 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4051 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4053 if (i
== (size_t) -1)
4055 if (bestn
== NULL
|| this_badness
< badness
)
4058 badness
= this_badness
;
4065 for (i
= 0; i
< nargs
; i
++)
4066 if ((arginfo
[i
].dt
== vect_constant_def
4067 || arginfo
[i
].dt
== vect_external_def
)
4068 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4071 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
4073 if (arginfo
[i
].vectype
== NULL
4074 || (simd_clone_subparts (arginfo
[i
].vectype
)
4075 > bestn
->simdclone
->simdlen
))
4079 fndecl
= bestn
->decl
;
4080 nunits
= bestn
->simdclone
->simdlen
;
4081 ncopies
= vf
/ nunits
;
4083 /* If the function isn't const, only allow it in simd loops where user
4084 has asserted that at least nunits consecutive iterations can be
4085 performed using SIMD instructions. */
4086 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
4087 && gimple_vuse (stmt
))
4090 /* Sanity check: make sure that at least one copy of the vectorized stmt
4091 needs to be generated. */
4092 gcc_assert (ncopies
>= 1);
4094 if (!vec_stmt
) /* transformation not required. */
4096 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4097 for (i
= 0; i
< nargs
; i
++)
4098 if ((bestn
->simdclone
->args
[i
].arg_type
4099 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4100 || (bestn
->simdclone
->args
[i
].arg_type
4101 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4103 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4105 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4106 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4107 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4108 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4109 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4110 tree sll
= arginfo
[i
].simd_lane_linear
4111 ? boolean_true_node
: boolean_false_node
;
4112 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4114 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4115 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4116 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4122 if (dump_enabled_p ())
4123 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4126 scalar_dest
= gimple_call_lhs (stmt
);
4127 vec_dest
= NULL_TREE
;
4132 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4133 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4134 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4137 rtype
= TREE_TYPE (ratype
);
4141 prev_stmt_info
= NULL
;
4142 for (j
= 0; j
< ncopies
; ++j
)
4144 /* Build argument list for the vectorized call. */
4146 vargs
.create (nargs
);
4150 for (i
= 0; i
< nargs
; i
++)
4152 unsigned int k
, l
, m
, o
;
4154 op
= gimple_call_arg (stmt
, i
);
4155 switch (bestn
->simdclone
->args
[i
].arg_type
)
4157 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4158 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4159 o
= nunits
/ simd_clone_subparts (atype
);
4160 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4162 if (simd_clone_subparts (atype
)
4163 < simd_clone_subparts (arginfo
[i
].vectype
))
4165 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4166 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4167 / simd_clone_subparts (atype
));
4168 gcc_assert ((k
& (k
- 1)) == 0);
4171 = vect_get_vec_def_for_operand (op
, stmt_info
);
4174 vec_oprnd0
= arginfo
[i
].op
;
4175 if ((m
& (k
- 1)) == 0)
4177 = vect_get_vec_def_for_stmt_copy (vinfo
,
4180 arginfo
[i
].op
= vec_oprnd0
;
4182 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4184 bitsize_int ((m
& (k
- 1)) * prec
));
4186 = gimple_build_assign (make_ssa_name (atype
),
4188 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4189 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4193 k
= (simd_clone_subparts (atype
)
4194 / simd_clone_subparts (arginfo
[i
].vectype
));
4195 gcc_assert ((k
& (k
- 1)) == 0);
4196 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4198 vec_alloc (ctor_elts
, k
);
4201 for (l
= 0; l
< k
; l
++)
4203 if (m
== 0 && l
== 0)
4205 = vect_get_vec_def_for_operand (op
, stmt_info
);
4208 = vect_get_vec_def_for_stmt_copy (vinfo
,
4210 arginfo
[i
].op
= vec_oprnd0
;
4213 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4217 vargs
.safe_push (vec_oprnd0
);
4220 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4222 = gimple_build_assign (make_ssa_name (atype
),
4224 vect_finish_stmt_generation (stmt_info
, new_stmt
,
4226 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4231 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4232 vargs
.safe_push (op
);
4234 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4235 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4240 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
4245 edge pe
= loop_preheader_edge (loop
);
4246 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4247 gcc_assert (!new_bb
);
4249 if (arginfo
[i
].simd_lane_linear
)
4251 vargs
.safe_push (arginfo
[i
].op
);
4254 tree phi_res
= copy_ssa_name (op
);
4255 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4256 loop_vinfo
->add_stmt (new_phi
);
4257 add_phi_arg (new_phi
, arginfo
[i
].op
,
4258 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4260 = POINTER_TYPE_P (TREE_TYPE (op
))
4261 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4262 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4263 ? sizetype
: TREE_TYPE (op
);
4265 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4267 tree tcst
= wide_int_to_tree (type
, cst
);
4268 tree phi_arg
= copy_ssa_name (op
);
4270 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4271 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4272 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4273 loop_vinfo
->add_stmt (new_stmt
);
4274 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4276 arginfo
[i
].op
= phi_res
;
4277 vargs
.safe_push (phi_res
);
4282 = POINTER_TYPE_P (TREE_TYPE (op
))
4283 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4284 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4285 ? sizetype
: TREE_TYPE (op
);
4287 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4289 tree tcst
= wide_int_to_tree (type
, cst
);
4290 new_temp
= make_ssa_name (TREE_TYPE (op
));
4292 = gimple_build_assign (new_temp
, code
,
4293 arginfo
[i
].op
, tcst
);
4294 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4295 vargs
.safe_push (new_temp
);
4298 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4299 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4300 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4301 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4302 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4303 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4309 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4312 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
4314 new_temp
= create_tmp_var (ratype
);
4315 else if (simd_clone_subparts (vectype
)
4316 == simd_clone_subparts (rtype
))
4317 new_temp
= make_ssa_name (vec_dest
, new_call
);
4319 new_temp
= make_ssa_name (rtype
, new_call
);
4320 gimple_call_set_lhs (new_call
, new_temp
);
4322 stmt_vec_info new_stmt_info
4323 = vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
4327 if (simd_clone_subparts (vectype
) < nunits
)
4330 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4331 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4332 k
= nunits
/ simd_clone_subparts (vectype
);
4333 gcc_assert ((k
& (k
- 1)) == 0);
4334 for (l
= 0; l
< k
; l
++)
4339 t
= build_fold_addr_expr (new_temp
);
4340 t
= build2 (MEM_REF
, vectype
, t
,
4341 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4344 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4345 bitsize_int (prec
), bitsize_int (l
* prec
));
4347 = gimple_build_assign (make_ssa_name (vectype
), t
);
4349 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4351 if (j
== 0 && l
== 0)
4352 STMT_VINFO_VEC_STMT (stmt_info
)
4353 = *vec_stmt
= new_stmt_info
;
4355 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4357 prev_stmt_info
= new_stmt_info
;
4361 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4364 else if (simd_clone_subparts (vectype
) > nunits
)
4366 unsigned int k
= (simd_clone_subparts (vectype
)
4367 / simd_clone_subparts (rtype
));
4368 gcc_assert ((k
& (k
- 1)) == 0);
4369 if ((j
& (k
- 1)) == 0)
4370 vec_alloc (ret_ctor_elts
, k
);
4373 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
4374 for (m
= 0; m
< o
; m
++)
4376 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4377 size_int (m
), NULL_TREE
, NULL_TREE
);
4379 = gimple_build_assign (make_ssa_name (rtype
), tem
);
4381 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
4383 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4384 gimple_assign_lhs (new_stmt
));
4386 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4389 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4390 if ((j
& (k
- 1)) != k
- 1)
4392 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4394 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4396 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4398 if ((unsigned) j
== k
- 1)
4399 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4401 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4403 prev_stmt_info
= new_stmt_info
;
4408 tree t
= build_fold_addr_expr (new_temp
);
4409 t
= build2 (MEM_REF
, vectype
, t
,
4410 build_int_cst (TREE_TYPE (t
), 0));
4412 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
4414 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4415 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4420 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4422 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4424 prev_stmt_info
= new_stmt_info
;
4429 /* The call in STMT might prevent it from being removed in dce.
4430 We however cannot remove it here, due to the way the ssa name
4431 it defines is mapped to the new definition. So just replace
4432 rhs of the statement with something harmless. */
4440 type
= TREE_TYPE (scalar_dest
);
4441 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4442 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4445 new_stmt
= gimple_build_nop ();
4446 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4447 unlink_stmt_vdef (stmt
);
4453 /* Function vect_gen_widened_results_half
4455 Create a vector stmt whose code, type, number of arguments, and result
4456 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4457 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4458 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4459 needs to be created (DECL is a function-decl of a target-builtin).
4460 STMT_INFO is the original scalar stmt that we are vectorizing. */
4463 vect_gen_widened_results_half (enum tree_code code
,
4465 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4466 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4467 stmt_vec_info stmt_info
)
4472 /* Generate half of the widened result: */
4473 if (code
== CALL_EXPR
)
4475 /* Target specific support */
4476 if (op_type
== binary_op
)
4477 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
4479 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
4480 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4481 gimple_call_set_lhs (new_stmt
, new_temp
);
4485 /* Generic support */
4486 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4487 if (op_type
!= binary_op
)
4489 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4490 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4491 gimple_assign_set_lhs (new_stmt
, new_temp
);
4493 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4499 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4500 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4501 containing scalar operand), and for the rest we get a copy with
4502 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4503 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4504 The vectors are collected into VEC_OPRNDS. */
4507 vect_get_loop_based_defs (tree
*oprnd
, stmt_vec_info stmt_info
,
4508 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
4510 vec_info
*vinfo
= stmt_info
->vinfo
;
4513 /* Get first vector operand. */
4514 /* All the vector operands except the very first one (that is scalar oprnd)
4516 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4517 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt_info
);
4519 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, *oprnd
);
4521 vec_oprnds
->quick_push (vec_oprnd
);
4523 /* Get second vector operand. */
4524 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
4525 vec_oprnds
->quick_push (vec_oprnd
);
4529 /* For conversion in multiple steps, continue to get operands
4532 vect_get_loop_based_defs (oprnd
, stmt_info
, vec_oprnds
,
4533 multi_step_cvt
- 1);
4537 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4538 For multi-step conversions store the resulting vectors and call the function
4542 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
4544 stmt_vec_info stmt_info
,
4546 gimple_stmt_iterator
*gsi
,
4547 slp_tree slp_node
, enum tree_code code
,
4548 stmt_vec_info
*prev_stmt_info
)
4551 tree vop0
, vop1
, new_tmp
, vec_dest
;
4553 vec_dest
= vec_dsts
.pop ();
4555 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4557 /* Create demotion operation. */
4558 vop0
= (*vec_oprnds
)[i
];
4559 vop1
= (*vec_oprnds
)[i
+ 1];
4560 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4561 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4562 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4563 stmt_vec_info new_stmt_info
4564 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4567 /* Store the resulting vector for next recursive call. */
4568 (*vec_oprnds
)[i
/2] = new_tmp
;
4571 /* This is the last step of the conversion sequence. Store the
4572 vectors in SLP_NODE or in vector info of the scalar statement
4573 (or in STMT_VINFO_RELATED_STMT chain). */
4575 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
4578 if (!*prev_stmt_info
)
4579 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
4581 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt_info
;
4583 *prev_stmt_info
= new_stmt_info
;
4588 /* For multi-step demotion operations we first generate demotion operations
4589 from the source type to the intermediate types, and then combine the
4590 results (stored in VEC_OPRNDS) in demotion operation to the destination
4594 /* At each level of recursion we have half of the operands we had at the
4596 vec_oprnds
->truncate ((i
+1)/2);
4597 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
4598 stmt_info
, vec_dsts
, gsi
,
4599 slp_node
, VEC_PACK_TRUNC_EXPR
,
4603 vec_dsts
.quick_push (vec_dest
);
4607 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4608 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4609 STMT_INFO. For multi-step conversions store the resulting vectors and
4610 call the function recursively. */
4613 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4614 vec
<tree
> *vec_oprnds1
,
4615 stmt_vec_info stmt_info
, tree vec_dest
,
4616 gimple_stmt_iterator
*gsi
,
4617 enum tree_code code1
,
4618 enum tree_code code2
, tree decl1
,
4619 tree decl2
, int op_type
)
4622 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4623 gimple
*new_stmt1
, *new_stmt2
;
4624 vec
<tree
> vec_tmp
= vNULL
;
4626 vec_tmp
.create (vec_oprnds0
->length () * 2);
4627 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4629 if (op_type
== binary_op
)
4630 vop1
= (*vec_oprnds1
)[i
];
4634 /* Generate the two halves of promotion operation. */
4635 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4636 op_type
, vec_dest
, gsi
,
4638 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4639 op_type
, vec_dest
, gsi
,
4641 if (is_gimple_call (new_stmt1
))
4643 new_tmp1
= gimple_call_lhs (new_stmt1
);
4644 new_tmp2
= gimple_call_lhs (new_stmt2
);
4648 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4649 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4652 /* Store the results for the next step. */
4653 vec_tmp
.quick_push (new_tmp1
);
4654 vec_tmp
.quick_push (new_tmp2
);
4657 vec_oprnds0
->release ();
4658 *vec_oprnds0
= vec_tmp
;
4662 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4663 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4664 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4665 Return true if STMT_INFO is vectorizable in this way. */
4668 vectorizable_conversion (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4669 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
4670 stmt_vector_for_cost
*cost_vec
)
4674 tree op0
, op1
= NULL_TREE
;
4675 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4676 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4677 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4678 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4679 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4681 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4683 stmt_vec_info prev_stmt_info
;
4684 poly_uint64 nunits_in
;
4685 poly_uint64 nunits_out
;
4686 tree vectype_out
, vectype_in
;
4688 tree lhs_type
, rhs_type
;
4689 enum { NARROW
, NONE
, WIDEN
} modifier
;
4690 vec
<tree
> vec_oprnds0
= vNULL
;
4691 vec
<tree
> vec_oprnds1
= vNULL
;
4693 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4694 vec_info
*vinfo
= stmt_info
->vinfo
;
4695 int multi_step_cvt
= 0;
4696 vec
<tree
> interm_types
= vNULL
;
4697 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4699 unsigned short fltsz
;
4701 /* Is STMT a vectorizable conversion? */
4703 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4706 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4710 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4714 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4717 code
= gimple_assign_rhs_code (stmt
);
4718 if (!CONVERT_EXPR_CODE_P (code
)
4719 && code
!= FIX_TRUNC_EXPR
4720 && code
!= FLOAT_EXPR
4721 && code
!= WIDEN_MULT_EXPR
4722 && code
!= WIDEN_LSHIFT_EXPR
)
4725 op_type
= TREE_CODE_LENGTH (code
);
4727 /* Check types of lhs and rhs. */
4728 scalar_dest
= gimple_assign_lhs (stmt
);
4729 lhs_type
= TREE_TYPE (scalar_dest
);
4730 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4732 op0
= gimple_assign_rhs1 (stmt
);
4733 rhs_type
= TREE_TYPE (op0
);
4735 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4736 && !((INTEGRAL_TYPE_P (lhs_type
)
4737 && INTEGRAL_TYPE_P (rhs_type
))
4738 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4739 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4742 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4743 && ((INTEGRAL_TYPE_P (lhs_type
)
4744 && !type_has_mode_precision_p (lhs_type
))
4745 || (INTEGRAL_TYPE_P (rhs_type
)
4746 && !type_has_mode_precision_p (rhs_type
))))
4748 if (dump_enabled_p ())
4749 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4750 "type conversion to/from bit-precision unsupported."
4755 /* Check the operands of the operation. */
4756 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype_in
))
4758 if (dump_enabled_p ())
4759 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4760 "use not simple.\n");
4763 if (op_type
== binary_op
)
4767 op1
= gimple_assign_rhs2 (stmt
);
4768 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4769 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4771 if (CONSTANT_CLASS_P (op0
))
4772 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1], &vectype_in
);
4774 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1]);
4778 if (dump_enabled_p ())
4779 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4780 "use not simple.\n");
4785 /* If op0 is an external or constant defs use a vector type of
4786 the same size as the output vector type. */
4788 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4790 gcc_assert (vectype_in
);
4793 if (dump_enabled_p ())
4794 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4795 "no vectype for scalar type %T\n", rhs_type
);
4800 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4801 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4803 if (dump_enabled_p ())
4804 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4805 "can't convert between boolean and non "
4806 "boolean vectors %T\n", rhs_type
);
4811 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4812 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4813 if (known_eq (nunits_out
, nunits_in
))
4815 else if (multiple_p (nunits_out
, nunits_in
))
4819 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4823 /* Multiple types in SLP are handled by creating the appropriate number of
4824 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4828 else if (modifier
== NARROW
)
4829 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4831 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4833 /* Sanity check: make sure that at least one copy of the vectorized stmt
4834 needs to be generated. */
4835 gcc_assert (ncopies
>= 1);
4837 bool found_mode
= false;
4838 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4839 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4840 opt_scalar_mode rhs_mode_iter
;
4842 /* Supportable by target? */
4846 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4848 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4853 if (dump_enabled_p ())
4854 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4855 "conversion not supported by target.\n");
4859 if (supportable_widening_operation (code
, stmt_info
, vectype_out
,
4860 vectype_in
, &code1
, &code2
,
4861 &multi_step_cvt
, &interm_types
))
4863 /* Binary widening operation can only be supported directly by the
4865 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4869 if (code
!= FLOAT_EXPR
4870 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4873 fltsz
= GET_MODE_SIZE (lhs_mode
);
4874 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4876 rhs_mode
= rhs_mode_iter
.require ();
4877 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4881 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4882 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4883 if (cvt_type
== NULL_TREE
)
4886 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4888 if (!supportable_convert_operation (code
, vectype_out
,
4889 cvt_type
, &decl1
, &codecvt1
))
4892 else if (!supportable_widening_operation (code
, stmt_info
,
4893 vectype_out
, cvt_type
,
4894 &codecvt1
, &codecvt2
,
4899 gcc_assert (multi_step_cvt
== 0);
4901 if (supportable_widening_operation (NOP_EXPR
, stmt_info
, cvt_type
,
4902 vectype_in
, &code1
, &code2
,
4903 &multi_step_cvt
, &interm_types
))
4913 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4914 codecvt2
= ERROR_MARK
;
4918 interm_types
.safe_push (cvt_type
);
4919 cvt_type
= NULL_TREE
;
4924 gcc_assert (op_type
== unary_op
);
4925 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4926 &code1
, &multi_step_cvt
,
4930 if (code
!= FIX_TRUNC_EXPR
4931 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4935 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4936 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4937 if (cvt_type
== NULL_TREE
)
4939 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4942 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4943 &code1
, &multi_step_cvt
,
4952 if (!vec_stmt
) /* transformation not required. */
4954 DUMP_VECT_SCOPE ("vectorizable_conversion");
4955 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4957 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4958 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
,
4961 else if (modifier
== NARROW
)
4963 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4964 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
,
4969 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4970 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
,
4973 interm_types
.release ();
4978 if (dump_enabled_p ())
4979 dump_printf_loc (MSG_NOTE
, vect_location
,
4980 "transform conversion. ncopies = %d.\n", ncopies
);
4982 if (op_type
== binary_op
)
4984 if (CONSTANT_CLASS_P (op0
))
4985 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4986 else if (CONSTANT_CLASS_P (op1
))
4987 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4990 /* In case of multi-step conversion, we first generate conversion operations
4991 to the intermediate types, and then from that types to the final one.
4992 We create vector destinations for the intermediate type (TYPES) received
4993 from supportable_*_operation, and store them in the correct order
4994 for future use in vect_create_vectorized_*_stmts (). */
4995 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4996 vec_dest
= vect_create_destination_var (scalar_dest
,
4997 (cvt_type
&& modifier
== WIDEN
)
4998 ? cvt_type
: vectype_out
);
4999 vec_dsts
.quick_push (vec_dest
);
5003 for (i
= interm_types
.length () - 1;
5004 interm_types
.iterate (i
, &intermediate_type
); i
--)
5006 vec_dest
= vect_create_destination_var (scalar_dest
,
5008 vec_dsts
.quick_push (vec_dest
);
5013 vec_dest
= vect_create_destination_var (scalar_dest
,
5015 ? vectype_out
: cvt_type
);
5019 if (modifier
== WIDEN
)
5021 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
5022 if (op_type
== binary_op
)
5023 vec_oprnds1
.create (1);
5025 else if (modifier
== NARROW
)
5026 vec_oprnds0
.create (
5027 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
5029 else if (code
== WIDEN_LSHIFT_EXPR
)
5030 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5033 prev_stmt_info
= NULL
;
5037 for (j
= 0; j
< ncopies
; j
++)
5040 vect_get_vec_defs (op0
, NULL
, stmt_info
, &vec_oprnds0
,
5043 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, NULL
);
5045 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5047 stmt_vec_info new_stmt_info
;
5048 /* Arguments are ready, create the new vector stmt. */
5049 if (code1
== CALL_EXPR
)
5051 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5052 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5053 gimple_call_set_lhs (new_stmt
, new_temp
);
5055 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5059 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5061 = gimple_build_assign (vec_dest
, code1
, vop0
);
5062 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5063 gimple_assign_set_lhs (new_stmt
, new_temp
);
5065 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5069 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5072 if (!prev_stmt_info
)
5073 STMT_VINFO_VEC_STMT (stmt_info
)
5074 = *vec_stmt
= new_stmt_info
;
5076 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5077 prev_stmt_info
= new_stmt_info
;
5084 /* In case the vectorization factor (VF) is bigger than the number
5085 of elements that we can fit in a vectype (nunits), we have to
5086 generate more than one vector stmt - i.e - we need to "unroll"
5087 the vector stmt by a factor VF/nunits. */
5088 for (j
= 0; j
< ncopies
; j
++)
5095 if (code
== WIDEN_LSHIFT_EXPR
)
5100 /* Store vec_oprnd1 for every vector stmt to be created
5101 for SLP_NODE. We check during the analysis that all
5102 the shift arguments are the same. */
5103 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5104 vec_oprnds1
.quick_push (vec_oprnd1
);
5106 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
,
5107 &vec_oprnds0
, NULL
, slp_node
);
5110 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
,
5111 &vec_oprnds1
, slp_node
);
5115 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt_info
);
5116 vec_oprnds0
.quick_push (vec_oprnd0
);
5117 if (op_type
== binary_op
)
5119 if (code
== WIDEN_LSHIFT_EXPR
)
5123 = vect_get_vec_def_for_operand (op1
, stmt_info
);
5124 vec_oprnds1
.quick_push (vec_oprnd1
);
5130 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
5131 vec_oprnds0
.truncate (0);
5132 vec_oprnds0
.quick_push (vec_oprnd0
);
5133 if (op_type
== binary_op
)
5135 if (code
== WIDEN_LSHIFT_EXPR
)
5138 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
5140 vec_oprnds1
.truncate (0);
5141 vec_oprnds1
.quick_push (vec_oprnd1
);
5145 /* Arguments are ready. Create the new vector stmts. */
5146 for (i
= multi_step_cvt
; i
>= 0; i
--)
5148 tree this_dest
= vec_dsts
[i
];
5149 enum tree_code c1
= code1
, c2
= code2
;
5150 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5155 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
5156 &vec_oprnds1
, stmt_info
,
5158 c1
, c2
, decl1
, decl2
,
5162 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5164 stmt_vec_info new_stmt_info
;
5167 if (codecvt1
== CALL_EXPR
)
5169 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5170 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5171 gimple_call_set_lhs (new_stmt
, new_temp
);
5173 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
5178 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5179 new_temp
= make_ssa_name (vec_dest
);
5181 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5183 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
5188 new_stmt_info
= vinfo
->lookup_def (vop0
);
5191 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5194 if (!prev_stmt_info
)
5195 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
5197 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5198 prev_stmt_info
= new_stmt_info
;
5203 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5207 /* In case the vectorization factor (VF) is bigger than the number
5208 of elements that we can fit in a vectype (nunits), we have to
5209 generate more than one vector stmt - i.e - we need to "unroll"
5210 the vector stmt by a factor VF/nunits. */
5211 for (j
= 0; j
< ncopies
; j
++)
5215 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
5219 vec_oprnds0
.truncate (0);
5220 vect_get_loop_based_defs (&last_oprnd
, stmt_info
, &vec_oprnds0
,
5221 vect_pow2 (multi_step_cvt
) - 1);
5224 /* Arguments are ready. Create the new vector stmts. */
5226 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5228 if (codecvt1
== CALL_EXPR
)
5230 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5231 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5232 gimple_call_set_lhs (new_stmt
, new_temp
);
5233 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5237 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5238 new_temp
= make_ssa_name (vec_dest
);
5240 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5241 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5244 vec_oprnds0
[i
] = new_temp
;
5247 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
5248 stmt_info
, vec_dsts
, gsi
,
5253 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5257 vec_oprnds0
.release ();
5258 vec_oprnds1
.release ();
5259 interm_types
.release ();
5265 /* Function vectorizable_assignment.
5267 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5268 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5269 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5270 Return true if STMT_INFO is vectorizable in this way. */
5273 vectorizable_assignment (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5274 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5275 stmt_vector_for_cost
*cost_vec
)
5280 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5282 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5286 vec
<tree
> vec_oprnds
= vNULL
;
5288 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5289 vec_info
*vinfo
= stmt_info
->vinfo
;
5290 stmt_vec_info prev_stmt_info
= NULL
;
5291 enum tree_code code
;
5294 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5297 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5301 /* Is vectorizable assignment? */
5302 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5306 scalar_dest
= gimple_assign_lhs (stmt
);
5307 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5310 code
= gimple_assign_rhs_code (stmt
);
5311 if (gimple_assign_single_p (stmt
)
5312 || code
== PAREN_EXPR
5313 || CONVERT_EXPR_CODE_P (code
))
5314 op
= gimple_assign_rhs1 (stmt
);
5318 if (code
== VIEW_CONVERT_EXPR
)
5319 op
= TREE_OPERAND (op
, 0);
5321 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5322 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5324 /* Multiple types in SLP are handled by creating the appropriate number of
5325 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5330 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5332 gcc_assert (ncopies
>= 1);
5334 if (!vect_is_simple_use (op
, vinfo
, &dt
[0], &vectype_in
))
5336 if (dump_enabled_p ())
5337 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5338 "use not simple.\n");
5342 /* We can handle NOP_EXPR conversions that do not change the number
5343 of elements or the vector size. */
5344 if ((CONVERT_EXPR_CODE_P (code
)
5345 || code
== VIEW_CONVERT_EXPR
)
5347 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5348 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5349 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5352 /* We do not handle bit-precision changes. */
5353 if ((CONVERT_EXPR_CODE_P (code
)
5354 || code
== VIEW_CONVERT_EXPR
)
5355 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5356 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5357 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5358 /* But a conversion that does not change the bit-pattern is ok. */
5359 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5360 > TYPE_PRECISION (TREE_TYPE (op
)))
5361 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5362 /* Conversion between boolean types of different sizes is
5363 a simple assignment in case their vectypes are same
5365 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
5366 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
5368 if (dump_enabled_p ())
5369 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5370 "type conversion to/from bit-precision "
5375 if (!vec_stmt
) /* transformation not required. */
5377 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5378 DUMP_VECT_SCOPE ("vectorizable_assignment");
5379 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5384 if (dump_enabled_p ())
5385 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5388 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5391 for (j
= 0; j
< ncopies
; j
++)
5395 vect_get_vec_defs (op
, NULL
, stmt_info
, &vec_oprnds
, NULL
, slp_node
);
5397 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
5399 /* Arguments are ready. create the new vector stmt. */
5400 stmt_vec_info new_stmt_info
= NULL
;
5401 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5403 if (CONVERT_EXPR_CODE_P (code
)
5404 || code
== VIEW_CONVERT_EXPR
)
5405 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5406 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5407 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5408 gimple_assign_set_lhs (new_stmt
, new_temp
);
5410 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5412 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5419 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5421 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5423 prev_stmt_info
= new_stmt_info
;
5426 vec_oprnds
.release ();
5431 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5432 either as shift by a scalar or by a vector. */
5435 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
5438 machine_mode vec_mode
;
5443 vectype
= get_vectype_for_scalar_type (scalar_type
);
5447 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5449 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5451 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5453 || (optab_handler (optab
, TYPE_MODE (vectype
))
5454 == CODE_FOR_nothing
))
5458 vec_mode
= TYPE_MODE (vectype
);
5459 icode
= (int) optab_handler (optab
, vec_mode
);
5460 if (icode
== CODE_FOR_nothing
)
5467 /* Function vectorizable_shift.
5469 Check if STMT_INFO performs a shift operation that can be vectorized.
5470 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5471 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5472 Return true if STMT_INFO is vectorizable in this way. */
5475 vectorizable_shift (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5476 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5477 stmt_vector_for_cost
*cost_vec
)
5481 tree op0
, op1
= NULL
;
5482 tree vec_oprnd1
= NULL_TREE
;
5484 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5485 enum tree_code code
;
5486 machine_mode vec_mode
;
5490 machine_mode optab_op2_mode
;
5491 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5493 stmt_vec_info prev_stmt_info
;
5494 poly_uint64 nunits_in
;
5495 poly_uint64 nunits_out
;
5500 vec
<tree
> vec_oprnds0
= vNULL
;
5501 vec
<tree
> vec_oprnds1
= vNULL
;
5504 bool scalar_shift_arg
= true;
5505 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5506 vec_info
*vinfo
= stmt_info
->vinfo
;
5508 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5511 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5512 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5516 /* Is STMT a vectorizable binary/unary operation? */
5517 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5521 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5524 code
= gimple_assign_rhs_code (stmt
);
5526 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5527 || code
== RROTATE_EXPR
))
5530 scalar_dest
= gimple_assign_lhs (stmt
);
5531 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5532 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5534 if (dump_enabled_p ())
5535 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5536 "bit-precision shifts not supported.\n");
5540 op0
= gimple_assign_rhs1 (stmt
);
5541 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5543 if (dump_enabled_p ())
5544 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5545 "use not simple.\n");
5548 /* If op0 is an external or constant def use a vector type with
5549 the same size as the output vector type. */
5551 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5553 gcc_assert (vectype
);
5556 if (dump_enabled_p ())
5557 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5558 "no vectype for scalar type\n");
5562 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5563 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5564 if (maybe_ne (nunits_out
, nunits_in
))
5567 op1
= gimple_assign_rhs2 (stmt
);
5568 stmt_vec_info op1_def_stmt_info
;
5569 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1], &op1_vectype
,
5570 &op1_def_stmt_info
))
5572 if (dump_enabled_p ())
5573 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5574 "use not simple.\n");
5578 /* Multiple types in SLP are handled by creating the appropriate number of
5579 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5584 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5586 gcc_assert (ncopies
>= 1);
5588 /* Determine whether the shift amount is a vector, or scalar. If the
5589 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5591 if ((dt
[1] == vect_internal_def
5592 || dt
[1] == vect_induction_def
5593 || dt
[1] == vect_nested_cycle
)
5595 scalar_shift_arg
= false;
5596 else if (dt
[1] == vect_constant_def
5597 || dt
[1] == vect_external_def
5598 || dt
[1] == vect_internal_def
)
5600 /* In SLP, need to check whether the shift count is the same,
5601 in loops if it is a constant or invariant, it is always
5605 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5606 stmt_vec_info slpstmt_info
;
5608 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5610 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5611 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5612 scalar_shift_arg
= false;
5615 /* For internal SLP defs we have to make sure we see scalar stmts
5616 for all vector elements.
5617 ??? For different vectors we could resort to a different
5618 scalar shift operand but code-generation below simply always
5620 if (dt
[1] == vect_internal_def
5621 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5623 scalar_shift_arg
= false;
5626 /* If the shift amount is computed by a pattern stmt we cannot
5627 use the scalar amount directly thus give up and use a vector
5629 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5630 scalar_shift_arg
= false;
5634 if (dump_enabled_p ())
5635 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5636 "operand mode requires invariant argument.\n");
5640 /* Vector shifted by vector. */
5641 if (!scalar_shift_arg
)
5643 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5644 if (dump_enabled_p ())
5645 dump_printf_loc (MSG_NOTE
, vect_location
,
5646 "vector/vector shift/rotate found.\n");
5649 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
5650 if (op1_vectype
== NULL_TREE
5651 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
5653 if (dump_enabled_p ())
5654 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5655 "unusable type for last operand in"
5656 " vector/vector shift/rotate.\n");
5660 /* See if the machine has a vector shifted by scalar insn and if not
5661 then see if it has a vector shifted by vector insn. */
5664 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5666 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5668 if (dump_enabled_p ())
5669 dump_printf_loc (MSG_NOTE
, vect_location
,
5670 "vector/scalar shift/rotate found.\n");
5674 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5676 && (optab_handler (optab
, TYPE_MODE (vectype
))
5677 != CODE_FOR_nothing
))
5679 scalar_shift_arg
= false;
5681 if (dump_enabled_p ())
5682 dump_printf_loc (MSG_NOTE
, vect_location
,
5683 "vector/vector shift/rotate found.\n");
5685 /* Unlike the other binary operators, shifts/rotates have
5686 the rhs being int, instead of the same type as the lhs,
5687 so make sure the scalar is the right type if we are
5688 dealing with vectors of long long/long/short/char. */
5689 if (dt
[1] == vect_constant_def
)
5690 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5691 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5695 && TYPE_MODE (TREE_TYPE (vectype
))
5696 != TYPE_MODE (TREE_TYPE (op1
)))
5698 if (dump_enabled_p ())
5699 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5700 "unusable type for last operand in"
5701 " vector/vector shift/rotate.\n");
5704 if (vec_stmt
&& !slp_node
)
5706 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5707 op1
= vect_init_vector (stmt_info
, op1
,
5708 TREE_TYPE (vectype
), NULL
);
5715 /* Supportable by target? */
5718 if (dump_enabled_p ())
5719 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5723 vec_mode
= TYPE_MODE (vectype
);
5724 icode
= (int) optab_handler (optab
, vec_mode
);
5725 if (icode
== CODE_FOR_nothing
)
5727 if (dump_enabled_p ())
5728 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5729 "op not supported by target.\n");
5730 /* Check only during analysis. */
5731 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5733 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5735 if (dump_enabled_p ())
5736 dump_printf_loc (MSG_NOTE
, vect_location
,
5737 "proceeding using word mode.\n");
5740 /* Worthwhile without SIMD support? Check only during analysis. */
5742 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5743 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5745 if (dump_enabled_p ())
5746 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5747 "not worthwhile without SIMD support.\n");
5751 if (!vec_stmt
) /* transformation not required. */
5753 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5754 DUMP_VECT_SCOPE ("vectorizable_shift");
5755 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5761 if (dump_enabled_p ())
5762 dump_printf_loc (MSG_NOTE
, vect_location
,
5763 "transform binary/unary operation.\n");
5766 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5768 prev_stmt_info
= NULL
;
5769 for (j
= 0; j
< ncopies
; j
++)
5774 if (scalar_shift_arg
)
5776 /* Vector shl and shr insn patterns can be defined with scalar
5777 operand 2 (shift operand). In this case, use constant or loop
5778 invariant op1 directly, without extending it to vector mode
5780 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5781 if (!VECTOR_MODE_P (optab_op2_mode
))
5783 if (dump_enabled_p ())
5784 dump_printf_loc (MSG_NOTE
, vect_location
,
5785 "operand 1 using scalar mode.\n");
5787 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5788 vec_oprnds1
.quick_push (vec_oprnd1
);
5791 /* Store vec_oprnd1 for every vector stmt to be created
5792 for SLP_NODE. We check during the analysis that all
5793 the shift arguments are the same.
5794 TODO: Allow different constants for different vector
5795 stmts generated for an SLP instance. */
5796 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5797 vec_oprnds1
.quick_push (vec_oprnd1
);
5802 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5803 (a special case for certain kind of vector shifts); otherwise,
5804 operand 1 should be of a vector type (the usual case). */
5806 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
5809 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
, &vec_oprnds1
,
5813 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
5815 /* Arguments are ready. Create the new vector stmt. */
5816 stmt_vec_info new_stmt_info
= NULL
;
5817 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5819 vop1
= vec_oprnds1
[i
];
5820 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5821 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5822 gimple_assign_set_lhs (new_stmt
, new_temp
);
5824 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5826 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5833 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5835 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5836 prev_stmt_info
= new_stmt_info
;
5839 vec_oprnds0
.release ();
5840 vec_oprnds1
.release ();
5846 /* Function vectorizable_operation.
5848 Check if STMT_INFO performs a binary, unary or ternary operation that can
5850 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5851 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5852 Return true if STMT_INFO is vectorizable in this way. */
5855 vectorizable_operation (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5856 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5857 stmt_vector_for_cost
*cost_vec
)
5861 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5863 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5864 enum tree_code code
, orig_code
;
5865 machine_mode vec_mode
;
5869 bool target_support_p
;
5870 enum vect_def_type dt
[3]
5871 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5873 stmt_vec_info prev_stmt_info
;
5874 poly_uint64 nunits_in
;
5875 poly_uint64 nunits_out
;
5879 vec
<tree
> vec_oprnds0
= vNULL
;
5880 vec
<tree
> vec_oprnds1
= vNULL
;
5881 vec
<tree
> vec_oprnds2
= vNULL
;
5882 tree vop0
, vop1
, vop2
;
5883 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5884 vec_info
*vinfo
= stmt_info
->vinfo
;
5886 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5889 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5893 /* Is STMT a vectorizable binary/unary operation? */
5894 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5898 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5901 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5903 /* For pointer addition and subtraction, we should use the normal
5904 plus and minus for the vector operation. */
5905 if (code
== POINTER_PLUS_EXPR
)
5907 if (code
== POINTER_DIFF_EXPR
)
5910 /* Support only unary or binary operations. */
5911 op_type
= TREE_CODE_LENGTH (code
);
5912 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5914 if (dump_enabled_p ())
5915 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5916 "num. args = %d (not unary/binary/ternary op).\n",
5921 scalar_dest
= gimple_assign_lhs (stmt
);
5922 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5924 /* Most operations cannot handle bit-precision types without extra
5926 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5927 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5928 /* Exception are bitwise binary operations. */
5929 && code
!= BIT_IOR_EXPR
5930 && code
!= BIT_XOR_EXPR
5931 && code
!= BIT_AND_EXPR
)
5933 if (dump_enabled_p ())
5934 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5935 "bit-precision arithmetic not supported.\n");
5939 op0
= gimple_assign_rhs1 (stmt
);
5940 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5942 if (dump_enabled_p ())
5943 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5944 "use not simple.\n");
5947 /* If op0 is an external or constant def use a vector type with
5948 the same size as the output vector type. */
5951 /* For boolean type we cannot determine vectype by
5952 invariant value (don't know whether it is a vector
5953 of booleans or vector of integers). We use output
5954 vectype because operations on boolean don't change
5956 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5958 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5960 if (dump_enabled_p ())
5961 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5962 "not supported operation on bool value.\n");
5965 vectype
= vectype_out
;
5968 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5971 gcc_assert (vectype
);
5974 if (dump_enabled_p ())
5975 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5976 "no vectype for scalar type %T\n",
5982 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5983 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5984 if (maybe_ne (nunits_out
, nunits_in
))
5987 if (op_type
== binary_op
|| op_type
== ternary_op
)
5989 op1
= gimple_assign_rhs2 (stmt
);
5990 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1]))
5992 if (dump_enabled_p ())
5993 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5994 "use not simple.\n");
5998 if (op_type
== ternary_op
)
6000 op2
= gimple_assign_rhs3 (stmt
);
6001 if (!vect_is_simple_use (op2
, vinfo
, &dt
[2]))
6003 if (dump_enabled_p ())
6004 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6005 "use not simple.\n");
6010 /* Multiple types in SLP are handled by creating the appropriate number of
6011 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6016 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6018 gcc_assert (ncopies
>= 1);
6020 /* Shifts are handled in vectorizable_shift (). */
6021 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
6022 || code
== RROTATE_EXPR
)
6025 /* Supportable by target? */
6027 vec_mode
= TYPE_MODE (vectype
);
6028 if (code
== MULT_HIGHPART_EXPR
)
6029 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6032 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6035 if (dump_enabled_p ())
6036 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6040 target_support_p
= (optab_handler (optab
, vec_mode
)
6041 != CODE_FOR_nothing
);
6044 if (!target_support_p
)
6046 if (dump_enabled_p ())
6047 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6048 "op not supported by target.\n");
6049 /* Check only during analysis. */
6050 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6051 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
6053 if (dump_enabled_p ())
6054 dump_printf_loc (MSG_NOTE
, vect_location
,
6055 "proceeding using word mode.\n");
6058 /* Worthwhile without SIMD support? Check only during analysis. */
6059 if (!VECTOR_MODE_P (vec_mode
)
6061 && !vect_worthwhile_without_simd_p (vinfo
, code
))
6063 if (dump_enabled_p ())
6064 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6065 "not worthwhile without SIMD support.\n");
6069 if (!vec_stmt
) /* transformation not required. */
6071 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6072 DUMP_VECT_SCOPE ("vectorizable_operation");
6073 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6079 if (dump_enabled_p ())
6080 dump_printf_loc (MSG_NOTE
, vect_location
,
6081 "transform binary/unary operation.\n");
6083 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6084 vectors with unsigned elements, but the result is signed. So, we
6085 need to compute the MINUS_EXPR into vectype temporary and
6086 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6087 tree vec_cvt_dest
= NULL_TREE
;
6088 if (orig_code
== POINTER_DIFF_EXPR
)
6090 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6091 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6095 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6097 /* In case the vectorization factor (VF) is bigger than the number
6098 of elements that we can fit in a vectype (nunits), we have to generate
6099 more than one vector stmt - i.e - we need to "unroll" the
6100 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6101 from one copy of the vector stmt to the next, in the field
6102 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6103 stages to find the correct vector defs to be used when vectorizing
6104 stmts that use the defs of the current stmt. The example below
6105 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6106 we need to create 4 vectorized stmts):
6108 before vectorization:
6109 RELATED_STMT VEC_STMT
6113 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6115 RELATED_STMT VEC_STMT
6116 VS1_0: vx0 = memref0 VS1_1 -
6117 VS1_1: vx1 = memref1 VS1_2 -
6118 VS1_2: vx2 = memref2 VS1_3 -
6119 VS1_3: vx3 = memref3 - -
6120 S1: x = load - VS1_0
6123 step2: vectorize stmt S2 (done here):
6124 To vectorize stmt S2 we first need to find the relevant vector
6125 def for the first operand 'x'. This is, as usual, obtained from
6126 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6127 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6128 relevant vector def 'vx0'. Having found 'vx0' we can generate
6129 the vector stmt VS2_0, and as usual, record it in the
6130 STMT_VINFO_VEC_STMT of stmt S2.
6131 When creating the second copy (VS2_1), we obtain the relevant vector
6132 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6133 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6134 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6135 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6136 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6137 chain of stmts and pointers:
6138 RELATED_STMT VEC_STMT
6139 VS1_0: vx0 = memref0 VS1_1 -
6140 VS1_1: vx1 = memref1 VS1_2 -
6141 VS1_2: vx2 = memref2 VS1_3 -
6142 VS1_3: vx3 = memref3 - -
6143 S1: x = load - VS1_0
6144 VS2_0: vz0 = vx0 + v1 VS2_1 -
6145 VS2_1: vz1 = vx1 + v1 VS2_2 -
6146 VS2_2: vz2 = vx2 + v1 VS2_3 -
6147 VS2_3: vz3 = vx3 + v1 - -
6148 S2: z = x + 1 - VS2_0 */
6150 prev_stmt_info
= NULL
;
6151 for (j
= 0; j
< ncopies
; j
++)
6156 if (op_type
== binary_op
)
6157 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
, &vec_oprnds1
,
6159 else if (op_type
== ternary_op
)
6163 auto_vec
<tree
> ops(3);
6164 ops
.quick_push (op0
);
6165 ops
.quick_push (op1
);
6166 ops
.quick_push (op2
);
6167 auto_vec
<vec
<tree
> > vec_defs(3);
6168 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
6169 vec_oprnds0
= vec_defs
[0];
6170 vec_oprnds1
= vec_defs
[1];
6171 vec_oprnds2
= vec_defs
[2];
6175 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
,
6176 &vec_oprnds1
, NULL
);
6177 vect_get_vec_defs (op2
, NULL_TREE
, stmt_info
, &vec_oprnds2
,
6182 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
6187 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
6188 if (op_type
== ternary_op
)
6190 tree vec_oprnd
= vec_oprnds2
.pop ();
6191 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (vinfo
,
6196 /* Arguments are ready. Create the new vector stmt. */
6197 stmt_vec_info new_stmt_info
= NULL
;
6198 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6200 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6201 ? vec_oprnds1
[i
] : NULL_TREE
);
6202 vop2
= ((op_type
== ternary_op
)
6203 ? vec_oprnds2
[i
] : NULL_TREE
);
6204 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
,
6206 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6207 gimple_assign_set_lhs (new_stmt
, new_temp
);
6209 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6212 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6214 = gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6216 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6217 gimple_assign_set_lhs (new_stmt
, new_temp
);
6219 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6222 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
6229 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
6231 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
6232 prev_stmt_info
= new_stmt_info
;
6235 vec_oprnds0
.release ();
6236 vec_oprnds1
.release ();
6237 vec_oprnds2
.release ();
6242 /* A helper function to ensure data reference DR_INFO's base alignment. */
6245 ensure_base_align (dr_vec_info
*dr_info
)
6247 if (dr_info
->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6250 if (dr_info
->base_misaligned
)
6252 tree base_decl
= dr_info
->base_decl
;
6254 // We should only be able to increase the alignment of a base object if
6255 // we know what its new alignment should be at compile time.
6256 unsigned HOST_WIDE_INT align_base_to
=
6257 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6259 if (decl_in_symtab_p (base_decl
))
6260 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6263 SET_DECL_ALIGN (base_decl
, align_base_to
);
6264 DECL_USER_ALIGN (base_decl
) = 1;
6266 dr_info
->base_misaligned
= false;
6271 /* Function get_group_alias_ptr_type.
6273 Return the alias type for the group starting at FIRST_STMT_INFO. */
6276 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6278 struct data_reference
*first_dr
, *next_dr
;
6280 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6281 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6282 while (next_stmt_info
)
6284 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6285 if (get_alias_set (DR_REF (first_dr
))
6286 != get_alias_set (DR_REF (next_dr
)))
6288 if (dump_enabled_p ())
6289 dump_printf_loc (MSG_NOTE
, vect_location
,
6290 "conflicting alias set types.\n");
6291 return ptr_type_node
;
6293 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6295 return reference_alias_ptr_type (DR_REF (first_dr
));
6299 /* Function vectorizable_store.
6301 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6302 that can be vectorized.
6303 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6304 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6305 Return true if STMT_INFO is vectorizable in this way. */
6308 vectorizable_store (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6309 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
6310 stmt_vector_for_cost
*cost_vec
)
6314 tree vec_oprnd
= NULL_TREE
;
6316 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6317 struct loop
*loop
= NULL
;
6318 machine_mode vec_mode
;
6320 enum dr_alignment_support alignment_support_scheme
;
6321 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
6322 enum vect_def_type mask_dt
= vect_unknown_def_type
;
6323 stmt_vec_info prev_stmt_info
= NULL
;
6324 tree dataref_ptr
= NULL_TREE
;
6325 tree dataref_offset
= NULL_TREE
;
6326 gimple
*ptr_incr
= NULL
;
6329 stmt_vec_info first_stmt_info
;
6331 unsigned int group_size
, i
;
6332 vec
<tree
> oprnds
= vNULL
;
6333 vec
<tree
> result_chain
= vNULL
;
6334 tree offset
= NULL_TREE
;
6335 vec
<tree
> vec_oprnds
= vNULL
;
6336 bool slp
= (slp_node
!= NULL
);
6337 unsigned int vec_num
;
6338 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6339 vec_info
*vinfo
= stmt_info
->vinfo
;
6341 gather_scatter_info gs_info
;
6343 vec_load_store_type vls_type
;
6346 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6349 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6353 /* Is vectorizable store? */
6355 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
6356 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
6358 tree scalar_dest
= gimple_assign_lhs (assign
);
6359 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
6360 && is_pattern_stmt_p (stmt_info
))
6361 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
6362 if (TREE_CODE (scalar_dest
) != ARRAY_REF
6363 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
6364 && TREE_CODE (scalar_dest
) != INDIRECT_REF
6365 && TREE_CODE (scalar_dest
) != COMPONENT_REF
6366 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
6367 && TREE_CODE (scalar_dest
) != REALPART_EXPR
6368 && TREE_CODE (scalar_dest
) != MEM_REF
)
6373 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
6374 if (!call
|| !gimple_call_internal_p (call
))
6377 internal_fn ifn
= gimple_call_internal_fn (call
);
6378 if (!internal_store_fn_p (ifn
))
6381 if (slp_node
!= NULL
)
6383 if (dump_enabled_p ())
6384 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6385 "SLP of masked stores not supported.\n");
6389 int mask_index
= internal_fn_mask_index (ifn
);
6390 if (mask_index
>= 0)
6392 mask
= gimple_call_arg (call
, mask_index
);
6393 if (!vect_check_load_store_mask (stmt_info
, mask
, &mask_dt
,
6399 op
= vect_get_store_rhs (stmt_info
);
6401 /* Cannot have hybrid store SLP -- that would mean storing to the
6402 same location twice. */
6403 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
6405 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
6406 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6410 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6411 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6416 /* Multiple types in SLP are handled by creating the appropriate number of
6417 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6422 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6424 gcc_assert (ncopies
>= 1);
6426 /* FORNOW. This restriction should be relaxed. */
6427 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
6429 if (dump_enabled_p ())
6430 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6431 "multiple types in nested loop.\n");
6435 if (!vect_check_store_rhs (stmt_info
, op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
6438 elem_type
= TREE_TYPE (vectype
);
6439 vec_mode
= TYPE_MODE (vectype
);
6441 if (!STMT_VINFO_DATA_REF (stmt_info
))
6444 vect_memory_access_type memory_access_type
;
6445 if (!get_load_store_type (stmt_info
, vectype
, slp
, mask
, vls_type
, ncopies
,
6446 &memory_access_type
, &gs_info
))
6451 if (memory_access_type
== VMAT_CONTIGUOUS
)
6453 if (!VECTOR_MODE_P (vec_mode
)
6454 || !can_vec_mask_load_store_p (vec_mode
,
6455 TYPE_MODE (mask_vectype
), false))
6458 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
6459 && (memory_access_type
!= VMAT_GATHER_SCATTER
6460 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
6462 if (dump_enabled_p ())
6463 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6464 "unsupported access type for masked store.\n");
6470 /* FORNOW. In some cases can vectorize even if data-type not supported
6471 (e.g. - array initialization with 0). */
6472 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
6476 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
6477 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6478 && memory_access_type
!= VMAT_GATHER_SCATTER
6479 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
6482 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
6483 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
6484 group_size
= DR_GROUP_SIZE (first_stmt_info
);
6488 first_stmt_info
= stmt_info
;
6489 first_dr_info
= dr_info
;
6490 group_size
= vec_num
= 1;
6493 if (!vec_stmt
) /* transformation not required. */
6495 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
6498 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
6499 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
6500 memory_access_type
, &gs_info
);
6502 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
6503 vect_model_store_cost (stmt_info
, ncopies
, rhs_dt
, memory_access_type
,
6504 vls_type
, slp_node
, cost_vec
);
6507 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
6511 ensure_base_align (dr_info
);
6513 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
6515 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
6516 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
6517 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6518 tree ptr
, var
, scale
, vec_mask
;
6519 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
6520 tree mask_halfvectype
= mask_vectype
;
6521 edge pe
= loop_preheader_edge (loop
);
6524 enum { NARROW
, NONE
, WIDEN
} modifier
;
6525 poly_uint64 scatter_off_nunits
6526 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
6528 if (known_eq (nunits
, scatter_off_nunits
))
6530 else if (known_eq (nunits
* 2, scatter_off_nunits
))
6534 /* Currently gathers and scatters are only supported for
6535 fixed-length vectors. */
6536 unsigned int count
= scatter_off_nunits
.to_constant ();
6537 vec_perm_builder
sel (count
, count
, 1);
6538 for (i
= 0; i
< (unsigned int) count
; ++i
)
6539 sel
.quick_push (i
| (count
/ 2));
6541 vec_perm_indices
indices (sel
, 1, count
);
6542 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
6544 gcc_assert (perm_mask
!= NULL_TREE
);
6546 else if (known_eq (nunits
, scatter_off_nunits
* 2))
6550 /* Currently gathers and scatters are only supported for
6551 fixed-length vectors. */
6552 unsigned int count
= nunits
.to_constant ();
6553 vec_perm_builder
sel (count
, count
, 1);
6554 for (i
= 0; i
< (unsigned int) count
; ++i
)
6555 sel
.quick_push (i
| (count
/ 2));
6557 vec_perm_indices
indices (sel
, 2, count
);
6558 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
6559 gcc_assert (perm_mask
!= NULL_TREE
);
6564 = build_same_sized_truth_vector_type (gs_info
.offset_vectype
);
6569 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6570 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6571 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6572 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6573 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6574 scaletype
= TREE_VALUE (arglist
);
6576 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
6577 && TREE_CODE (rettype
) == VOID_TYPE
);
6579 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6580 if (!is_gimple_min_invariant (ptr
))
6582 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6583 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6584 gcc_assert (!new_bb
);
6587 if (mask
== NULL_TREE
)
6589 mask_arg
= build_int_cst (masktype
, -1);
6590 mask_arg
= vect_init_vector (stmt_info
, mask_arg
, masktype
, NULL
);
6593 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6595 prev_stmt_info
= NULL
;
6596 for (j
= 0; j
< ncopies
; ++j
)
6600 src
= vec_oprnd1
= vect_get_vec_def_for_operand (op
, stmt_info
);
6601 op
= vec_oprnd0
= vect_get_vec_def_for_operand (gs_info
.offset
,
6604 mask_op
= vec_mask
= vect_get_vec_def_for_operand (mask
,
6607 else if (modifier
!= NONE
&& (j
& 1))
6609 if (modifier
== WIDEN
)
6612 = vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
6614 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
6618 = vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
6621 else if (modifier
== NARROW
)
6623 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
6625 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
6633 src
= vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
6635 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
,
6638 mask_op
= vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
,
6642 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
6644 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
6645 TYPE_VECTOR_SUBPARTS (srctype
)));
6646 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
6647 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
6649 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
6650 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6654 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6656 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
6657 TYPE_VECTOR_SUBPARTS (idxtype
)));
6658 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6659 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6661 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6662 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6670 if (modifier
== NARROW
)
6672 var
= vect_get_new_ssa_name (mask_halfvectype
,
6675 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
6676 : VEC_UNPACK_LO_EXPR
,
6678 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6681 tree optype
= TREE_TYPE (mask_arg
);
6682 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
6685 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
6686 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
6687 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
6689 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
6690 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6692 if (!useless_type_conversion_p (masktype
, utype
))
6694 gcc_assert (TYPE_PRECISION (utype
)
6695 <= TYPE_PRECISION (masktype
));
6696 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
6697 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
6698 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6704 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
6705 stmt_vec_info new_stmt_info
6706 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6708 if (prev_stmt_info
== NULL
)
6709 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
6711 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
6712 prev_stmt_info
= new_stmt_info
;
6717 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6718 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
6723 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
6725 /* We vectorize all the stmts of the interleaving group when we
6726 reach the last stmt in the group. */
6727 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
6728 < DR_GROUP_SIZE (first_stmt_info
)
6737 grouped_store
= false;
6738 /* VEC_NUM is the number of vect stmts to be created for this
6740 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6741 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6742 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
6743 == first_stmt_info
);
6744 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
6745 op
= vect_get_store_rhs (first_stmt_info
);
6748 /* VEC_NUM is the number of vect stmts to be created for this
6750 vec_num
= group_size
;
6752 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
6755 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
6757 if (dump_enabled_p ())
6758 dump_printf_loc (MSG_NOTE
, vect_location
,
6759 "transform store. ncopies = %d\n", ncopies
);
6761 if (memory_access_type
== VMAT_ELEMENTWISE
6762 || memory_access_type
== VMAT_STRIDED_SLP
)
6764 gimple_stmt_iterator incr_gsi
;
6770 tree stride_base
, stride_step
, alias_off
;
6773 /* Checked by get_load_store_type. */
6774 unsigned int const_nunits
= nunits
.to_constant ();
6776 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
6777 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
6780 = fold_build_pointer_plus
6781 (DR_BASE_ADDRESS (first_dr_info
->dr
),
6782 size_binop (PLUS_EXPR
,
6783 convert_to_ptrofftype (DR_OFFSET (first_dr_info
->dr
)),
6784 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
6785 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
6787 /* For a store with loop-invariant (but other than power-of-2)
6788 stride (i.e. not a grouped access) like so:
6790 for (i = 0; i < n; i += stride)
6793 we generate a new induction variable and new stores from
6794 the components of the (vectorized) rhs:
6796 for (j = 0; ; j += VF*stride)
6801 array[j + stride] = tmp2;
6805 unsigned nstores
= const_nunits
;
6807 tree ltype
= elem_type
;
6808 tree lvectype
= vectype
;
6811 if (group_size
< const_nunits
6812 && const_nunits
% group_size
== 0)
6814 nstores
= const_nunits
/ group_size
;
6816 ltype
= build_vector_type (elem_type
, group_size
);
6819 /* First check if vec_extract optab doesn't support extraction
6820 of vector elts directly. */
6821 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
6823 if (!mode_for_vector (elmode
, group_size
).exists (&vmode
)
6824 || !VECTOR_MODE_P (vmode
)
6825 || !targetm
.vector_mode_supported_p (vmode
)
6826 || (convert_optab_handler (vec_extract_optab
,
6827 TYPE_MODE (vectype
), vmode
)
6828 == CODE_FOR_nothing
))
6830 /* Try to avoid emitting an extract of vector elements
6831 by performing the extracts using an integer type of the
6832 same size, extracting from a vector of those and then
6833 re-interpreting it as the original vector type if
6836 = group_size
* GET_MODE_BITSIZE (elmode
);
6837 unsigned int lnunits
= const_nunits
/ group_size
;
6838 /* If we can't construct such a vector fall back to
6839 element extracts from the original vector type and
6840 element size stores. */
6841 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
6842 && mode_for_vector (elmode
, lnunits
).exists (&vmode
)
6843 && VECTOR_MODE_P (vmode
)
6844 && targetm
.vector_mode_supported_p (vmode
)
6845 && (convert_optab_handler (vec_extract_optab
,
6847 != CODE_FOR_nothing
))
6851 ltype
= build_nonstandard_integer_type (lsize
, 1);
6852 lvectype
= build_vector_type (ltype
, nstores
);
6854 /* Else fall back to vector extraction anyway.
6855 Fewer stores are more important than avoiding spilling
6856 of the vector we extract from. Compared to the
6857 construction case in vectorizable_load no store-forwarding
6858 issue exists here for reasonable archs. */
6861 else if (group_size
>= const_nunits
6862 && group_size
% const_nunits
== 0)
6865 lnel
= const_nunits
;
6869 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
6870 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6873 ivstep
= stride_step
;
6874 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6875 build_int_cst (TREE_TYPE (ivstep
), vf
));
6877 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6879 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
6880 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
6881 create_iv (stride_base
, ivstep
, NULL
,
6882 loop
, &incr_gsi
, insert_after
,
6884 incr
= gsi_stmt (incr_gsi
);
6885 loop_vinfo
->add_stmt (incr
);
6887 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
6889 prev_stmt_info
= NULL
;
6890 alias_off
= build_int_cst (ref_type
, 0);
6891 stmt_vec_info next_stmt_info
= first_stmt_info
;
6892 for (g
= 0; g
< group_size
; g
++)
6894 running_off
= offvar
;
6897 tree size
= TYPE_SIZE_UNIT (ltype
);
6898 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
6900 tree newoff
= copy_ssa_name (running_off
, NULL
);
6901 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6903 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
6904 running_off
= newoff
;
6906 unsigned int group_el
= 0;
6907 unsigned HOST_WIDE_INT
6908 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6909 for (j
= 0; j
< ncopies
; j
++)
6911 /* We've set op and dt above, from vect_get_store_rhs,
6912 and first_stmt_info == stmt_info. */
6917 vect_get_vec_defs (op
, NULL_TREE
, stmt_info
,
6918 &vec_oprnds
, NULL
, slp_node
);
6919 vec_oprnd
= vec_oprnds
[0];
6923 op
= vect_get_store_rhs (next_stmt_info
);
6924 vec_oprnd
= vect_get_vec_def_for_operand
6925 (op
, next_stmt_info
);
6931 vec_oprnd
= vec_oprnds
[j
];
6933 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
,
6936 /* Pun the vector to extract from if necessary. */
6937 if (lvectype
!= vectype
)
6939 tree tem
= make_ssa_name (lvectype
);
6941 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
6942 lvectype
, vec_oprnd
));
6943 vect_finish_stmt_generation (stmt_info
, pun
, gsi
);
6946 for (i
= 0; i
< nstores
; i
++)
6948 tree newref
, newoff
;
6949 gimple
*incr
, *assign
;
6950 tree size
= TYPE_SIZE (ltype
);
6951 /* Extract the i'th component. */
6952 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
6953 bitsize_int (i
), size
);
6954 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
6957 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
6961 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6963 newref
= build2 (MEM_REF
, ltype
,
6964 running_off
, this_off
);
6965 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
6967 /* And store it to *running_off. */
6968 assign
= gimple_build_assign (newref
, elem
);
6969 stmt_vec_info assign_info
6970 = vect_finish_stmt_generation (stmt_info
, assign
, gsi
);
6974 || group_el
== group_size
)
6976 newoff
= copy_ssa_name (running_off
, NULL
);
6977 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6978 running_off
, stride_step
);
6979 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
6981 running_off
= newoff
;
6984 if (g
== group_size
- 1
6987 if (j
== 0 && i
== 0)
6988 STMT_VINFO_VEC_STMT (stmt_info
)
6989 = *vec_stmt
= assign_info
;
6991 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign_info
;
6992 prev_stmt_info
= assign_info
;
6996 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7001 vec_oprnds
.release ();
7005 auto_vec
<tree
> dr_chain (group_size
);
7006 oprnds
.create (group_size
);
7008 alignment_support_scheme
7009 = vect_supportable_dr_alignment (first_dr_info
, false);
7010 gcc_assert (alignment_support_scheme
);
7011 vec_loop_masks
*loop_masks
7012 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7013 ? &LOOP_VINFO_MASKS (loop_vinfo
)
7015 /* Targets with store-lane instructions must not require explicit
7016 realignment. vect_supportable_dr_alignment always returns either
7017 dr_aligned or dr_unaligned_supported for masked operations. */
7018 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
7021 || alignment_support_scheme
== dr_aligned
7022 || alignment_support_scheme
== dr_unaligned_supported
);
7024 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
7025 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7026 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7029 tree vec_offset
= NULL_TREE
;
7030 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7032 aggr_type
= NULL_TREE
;
7035 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
7037 aggr_type
= elem_type
;
7038 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
7039 &bump
, &vec_offset
);
7043 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7044 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7046 aggr_type
= vectype
;
7047 bump
= vect_get_data_ptr_increment (dr_info
, aggr_type
,
7048 memory_access_type
);
7052 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
7054 /* In case the vectorization factor (VF) is bigger than the number
7055 of elements that we can fit in a vectype (nunits), we have to generate
7056 more than one vector stmt - i.e - we need to "unroll" the
7057 vector stmt by a factor VF/nunits. For more details see documentation in
7058 vect_get_vec_def_for_copy_stmt. */
7060 /* In case of interleaving (non-unit grouped access):
7067 We create vectorized stores starting from base address (the access of the
7068 first stmt in the chain (S2 in the above example), when the last store stmt
7069 of the chain (S4) is reached:
7072 VS2: &base + vec_size*1 = vx0
7073 VS3: &base + vec_size*2 = vx1
7074 VS4: &base + vec_size*3 = vx3
7076 Then permutation statements are generated:
7078 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7079 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7082 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7083 (the order of the data-refs in the output of vect_permute_store_chain
7084 corresponds to the order of scalar stmts in the interleaving chain - see
7085 the documentation of vect_permute_store_chain()).
7087 In case of both multiple types and interleaving, above vector stores and
7088 permutation stmts are created for every copy. The result vector stmts are
7089 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7090 STMT_VINFO_RELATED_STMT for the next copies.
7093 prev_stmt_info
= NULL
;
7094 tree vec_mask
= NULL_TREE
;
7095 for (j
= 0; j
< ncopies
; j
++)
7097 stmt_vec_info new_stmt_info
;
7102 /* Get vectorized arguments for SLP_NODE. */
7103 vect_get_vec_defs (op
, NULL_TREE
, stmt_info
, &vec_oprnds
,
7106 vec_oprnd
= vec_oprnds
[0];
7110 /* For interleaved stores we collect vectorized defs for all the
7111 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7112 used as an input to vect_permute_store_chain(), and OPRNDS as
7113 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
7115 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7116 OPRNDS are of size 1. */
7117 stmt_vec_info next_stmt_info
= first_stmt_info
;
7118 for (i
= 0; i
< group_size
; i
++)
7120 /* Since gaps are not supported for interleaved stores,
7121 DR_GROUP_SIZE is the exact number of stmts in the chain.
7122 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7123 that there is no interleaving, DR_GROUP_SIZE is 1,
7124 and only one iteration of the loop will be executed. */
7125 op
= vect_get_store_rhs (next_stmt_info
);
7126 vec_oprnd
= vect_get_vec_def_for_operand
7127 (op
, next_stmt_info
);
7128 dr_chain
.quick_push (vec_oprnd
);
7129 oprnds
.quick_push (vec_oprnd
);
7130 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7133 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
,
7137 /* We should have catched mismatched types earlier. */
7138 gcc_assert (useless_type_conversion_p (vectype
,
7139 TREE_TYPE (vec_oprnd
)));
7140 bool simd_lane_access_p
7141 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7142 if (simd_lane_access_p
7144 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
7145 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
7146 && integer_zerop (DR_OFFSET (first_dr_info
->dr
))
7147 && integer_zerop (DR_INIT (first_dr_info
->dr
))
7148 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7149 get_alias_set (TREE_TYPE (ref_type
))))
7151 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
7152 dataref_offset
= build_int_cst (ref_type
, 0);
7154 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7155 vect_get_gather_scatter_ops (loop
, stmt_info
, &gs_info
,
7156 &dataref_ptr
, &vec_offset
);
7159 = vect_create_data_ref_ptr (first_stmt_info
, aggr_type
,
7160 simd_lane_access_p
? loop
: NULL
,
7161 offset
, &dummy
, gsi
, &ptr_incr
,
7162 simd_lane_access_p
, NULL_TREE
, bump
);
7166 /* For interleaved stores we created vectorized defs for all the
7167 defs stored in OPRNDS in the previous iteration (previous copy).
7168 DR_CHAIN is then used as an input to vect_permute_store_chain(),
7169 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7171 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7172 OPRNDS are of size 1. */
7173 for (i
= 0; i
< group_size
; i
++)
7176 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, op
);
7177 dr_chain
[i
] = vec_oprnd
;
7178 oprnds
[i
] = vec_oprnd
;
7181 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
7184 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7185 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7186 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
7188 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7192 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7196 /* Get an array into which we can store the individual vectors. */
7197 vec_array
= create_vector_array (vectype
, vec_num
);
7199 /* Invalidate the current contents of VEC_ARRAY. This should
7200 become an RTL clobber too, which prevents the vector registers
7201 from being upward-exposed. */
7202 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
7204 /* Store the individual vectors into the array. */
7205 for (i
= 0; i
< vec_num
; i
++)
7207 vec_oprnd
= dr_chain
[i
];
7208 write_vector_array (stmt_info
, gsi
, vec_oprnd
, vec_array
, i
);
7211 tree final_mask
= NULL
;
7213 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
7216 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7223 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7225 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
7226 tree alias_ptr
= build_int_cst (ref_type
, align
);
7227 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
7228 dataref_ptr
, alias_ptr
,
7229 final_mask
, vec_array
);
7234 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7235 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7236 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
7238 gimple_call_set_lhs (call
, data_ref
);
7240 gimple_call_set_nothrow (call
, true);
7241 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, call
, gsi
);
7243 /* Record that VEC_ARRAY is now dead. */
7244 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
7248 new_stmt_info
= NULL
;
7252 result_chain
.create (group_size
);
7254 vect_permute_store_chain (dr_chain
, group_size
, stmt_info
, gsi
,
7258 stmt_vec_info next_stmt_info
= first_stmt_info
;
7259 for (i
= 0; i
< vec_num
; i
++)
7262 unsigned HOST_WIDE_INT align
;
7264 tree final_mask
= NULL_TREE
;
7266 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
7268 vectype
, vec_num
* j
+ i
);
7270 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7273 if (memory_access_type
== VMAT_GATHER_SCATTER
)
7275 tree scale
= size_int (gs_info
.scale
);
7278 call
= gimple_build_call_internal
7279 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
7280 scale
, vec_oprnd
, final_mask
);
7282 call
= gimple_build_call_internal
7283 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
7285 gimple_call_set_nothrow (call
, true);
7287 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
7292 /* Bump the vector pointer. */
7293 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7297 vec_oprnd
= vec_oprnds
[i
];
7298 else if (grouped_store
)
7299 /* For grouped stores vectorized defs are interleaved in
7300 vect_permute_store_chain(). */
7301 vec_oprnd
= result_chain
[i
];
7303 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
7304 if (aligned_access_p (first_dr_info
))
7306 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
7308 align
= dr_alignment (vect_dr_behavior (first_dr_info
));
7312 misalign
= DR_MISALIGNMENT (first_dr_info
);
7313 if (dataref_offset
== NULL_TREE
7314 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7315 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
7318 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7320 tree perm_mask
= perm_mask_for_reverse (vectype
);
7321 tree perm_dest
= vect_create_destination_var
7322 (vect_get_store_rhs (stmt_info
), vectype
);
7323 tree new_temp
= make_ssa_name (perm_dest
);
7325 /* Generate the permute statement. */
7327 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
7328 vec_oprnd
, perm_mask
);
7329 vect_finish_stmt_generation (stmt_info
, perm_stmt
, gsi
);
7331 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7332 vec_oprnd
= new_temp
;
7335 /* Arguments are ready. Create the new vector stmt. */
7338 align
= least_bit_hwi (misalign
| align
);
7339 tree ptr
= build_int_cst (ref_type
, align
);
7341 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
7343 final_mask
, vec_oprnd
);
7344 gimple_call_set_nothrow (call
, true);
7346 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
7350 data_ref
= fold_build2 (MEM_REF
, vectype
,
7354 : build_int_cst (ref_type
, 0));
7355 if (aligned_access_p (first_dr_info
))
7357 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
7358 TREE_TYPE (data_ref
)
7359 = build_aligned_type (TREE_TYPE (data_ref
),
7360 align
* BITS_PER_UNIT
);
7362 TREE_TYPE (data_ref
)
7363 = build_aligned_type (TREE_TYPE (data_ref
),
7364 TYPE_ALIGN (elem_type
));
7365 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
7367 = gimple_build_assign (data_ref
, vec_oprnd
);
7369 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7375 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7376 if (!next_stmt_info
)
7383 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7385 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7386 prev_stmt_info
= new_stmt_info
;
7391 result_chain
.release ();
7392 vec_oprnds
.release ();
7397 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7398 VECTOR_CST mask. No checks are made that the target platform supports the
7399 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7400 vect_gen_perm_mask_checked. */
7403 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
7407 poly_uint64 nunits
= sel
.length ();
7408 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
7410 mask_type
= build_vector_type (ssizetype
, nunits
);
7411 return vec_perm_indices_to_tree (mask_type
, sel
);
7414 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7415 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7418 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
7420 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
7421 return vect_gen_perm_mask_any (vectype
, sel
);
7424 /* Given a vector variable X and Y, that was generated for the scalar
7425 STMT_INFO, generate instructions to permute the vector elements of X and Y
7426 using permutation mask MASK_VEC, insert them at *GSI and return the
7427 permuted vector variable. */
7430 permute_vec_elements (tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
7431 gimple_stmt_iterator
*gsi
)
7433 tree vectype
= TREE_TYPE (x
);
7434 tree perm_dest
, data_ref
;
7437 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
7438 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
7439 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7441 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
7442 data_ref
= make_ssa_name (perm_dest
);
7444 /* Generate the permute statement. */
7445 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
7446 vect_finish_stmt_generation (stmt_info
, perm_stmt
, gsi
);
7451 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
7452 inserting them on the loops preheader edge. Returns true if we
7453 were successful in doing so (and thus STMT_INFO can be moved then),
7454 otherwise returns false. */
7457 hoist_defs_of_uses (stmt_vec_info stmt_info
, struct loop
*loop
)
7463 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
7465 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
7466 if (!gimple_nop_p (def_stmt
)
7467 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
7469 /* Make sure we don't need to recurse. While we could do
7470 so in simple cases when there are more complex use webs
7471 we don't have an easy way to preserve stmt order to fulfil
7472 dependencies within them. */
7475 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
7477 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
7479 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
7480 if (!gimple_nop_p (def_stmt2
)
7481 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
7491 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
7493 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
7494 if (!gimple_nop_p (def_stmt
)
7495 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
7497 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
7498 gsi_remove (&gsi
, false);
7499 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
7506 /* vectorizable_load.
7508 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
7509 that can be vectorized.
7510 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7511 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7512 Return true if STMT_INFO is vectorizable in this way. */
7515 vectorizable_load (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7516 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
7517 slp_instance slp_node_instance
,
7518 stmt_vector_for_cost
*cost_vec
)
7521 tree vec_dest
= NULL
;
7522 tree data_ref
= NULL
;
7523 stmt_vec_info prev_stmt_info
;
7524 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7525 struct loop
*loop
= NULL
;
7526 struct loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
7527 bool nested_in_vect_loop
= false;
7532 enum dr_alignment_support alignment_support_scheme
;
7533 tree dataref_ptr
= NULL_TREE
;
7534 tree dataref_offset
= NULL_TREE
;
7535 gimple
*ptr_incr
= NULL
;
7538 unsigned int group_size
;
7539 poly_uint64 group_gap_adj
;
7540 tree msq
= NULL_TREE
, lsq
;
7541 tree offset
= NULL_TREE
;
7542 tree byte_offset
= NULL_TREE
;
7543 tree realignment_token
= NULL_TREE
;
7545 vec
<tree
> dr_chain
= vNULL
;
7546 bool grouped_load
= false;
7547 stmt_vec_info first_stmt_info
;
7548 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
7549 bool compute_in_loop
= false;
7550 struct loop
*at_loop
;
7552 bool slp
= (slp_node
!= NULL
);
7553 bool slp_perm
= false;
7554 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7557 gather_scatter_info gs_info
;
7558 vec_info
*vinfo
= stmt_info
->vinfo
;
7560 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7562 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7565 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7569 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7570 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7572 scalar_dest
= gimple_assign_lhs (assign
);
7573 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
7576 tree_code code
= gimple_assign_rhs_code (assign
);
7577 if (code
!= ARRAY_REF
7578 && code
!= BIT_FIELD_REF
7579 && code
!= INDIRECT_REF
7580 && code
!= COMPONENT_REF
7581 && code
!= IMAGPART_EXPR
7582 && code
!= REALPART_EXPR
7584 && TREE_CODE_CLASS (code
) != tcc_declaration
)
7589 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7590 if (!call
|| !gimple_call_internal_p (call
))
7593 internal_fn ifn
= gimple_call_internal_fn (call
);
7594 if (!internal_load_fn_p (ifn
))
7597 scalar_dest
= gimple_call_lhs (call
);
7601 if (slp_node
!= NULL
)
7603 if (dump_enabled_p ())
7604 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7605 "SLP of masked loads not supported.\n");
7609 int mask_index
= internal_fn_mask_index (ifn
);
7610 if (mask_index
>= 0)
7612 mask
= gimple_call_arg (call
, mask_index
);
7613 if (!vect_check_load_store_mask (stmt_info
, mask
, &mask_dt
,
7619 if (!STMT_VINFO_DATA_REF (stmt_info
))
7622 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7623 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7627 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7628 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
7629 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7634 /* Multiple types in SLP are handled by creating the appropriate number of
7635 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7640 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7642 gcc_assert (ncopies
>= 1);
7644 /* FORNOW. This restriction should be relaxed. */
7645 if (nested_in_vect_loop
&& ncopies
> 1)
7647 if (dump_enabled_p ())
7648 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7649 "multiple types in nested loop.\n");
7653 /* Invalidate assumptions made by dependence analysis when vectorization
7654 on the unrolled body effectively re-orders stmts. */
7656 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
7657 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
7658 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
7660 if (dump_enabled_p ())
7661 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7662 "cannot perform implicit CSE when unrolling "
7663 "with negative dependence distance\n");
7667 elem_type
= TREE_TYPE (vectype
);
7668 mode
= TYPE_MODE (vectype
);
7670 /* FORNOW. In some cases can vectorize even if data-type not supported
7671 (e.g. - data copies). */
7672 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
7674 if (dump_enabled_p ())
7675 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7676 "Aligned load, but unsupported type.\n");
7680 /* Check if the load is a part of an interleaving chain. */
7681 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7683 grouped_load
= true;
7685 gcc_assert (!nested_in_vect_loop
);
7686 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
7688 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7689 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7691 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7694 /* Invalidate assumptions made by dependence analysis when vectorization
7695 on the unrolled body effectively re-orders stmts. */
7696 if (!PURE_SLP_STMT (stmt_info
)
7697 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
7698 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
7699 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
7701 if (dump_enabled_p ())
7702 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7703 "cannot perform implicit CSE when performing "
7704 "group loads with negative dependence distance\n");
7711 vect_memory_access_type memory_access_type
;
7712 if (!get_load_store_type (stmt_info
, vectype
, slp
, mask
, VLS_LOAD
, ncopies
,
7713 &memory_access_type
, &gs_info
))
7718 if (memory_access_type
== VMAT_CONTIGUOUS
)
7720 machine_mode vec_mode
= TYPE_MODE (vectype
);
7721 if (!VECTOR_MODE_P (vec_mode
)
7722 || !can_vec_mask_load_store_p (vec_mode
,
7723 TYPE_MODE (mask_vectype
), true))
7726 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7727 && memory_access_type
!= VMAT_GATHER_SCATTER
)
7729 if (dump_enabled_p ())
7730 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7731 "unsupported access type for masked load.\n");
7736 if (!vec_stmt
) /* transformation not required. */
7739 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7742 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7743 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
7744 memory_access_type
, &gs_info
);
7746 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
7747 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
7748 slp_node_instance
, slp_node
, cost_vec
);
7753 gcc_assert (memory_access_type
7754 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7756 if (dump_enabled_p ())
7757 dump_printf_loc (MSG_NOTE
, vect_location
,
7758 "transform load. ncopies = %d\n", ncopies
);
7762 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7763 ensure_base_align (dr_info
);
7765 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7767 vect_build_gather_load_calls (stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
7771 if (memory_access_type
== VMAT_INVARIANT
)
7773 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
7774 /* If we have versioned for aliasing or the loop doesn't
7775 have any data dependencies that would preclude this,
7776 then we are sure this is a loop invariant load and
7777 thus we can insert it on the preheader edge. */
7778 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7779 && !nested_in_vect_loop
7780 && hoist_defs_of_uses (stmt_info
, loop
));
7783 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
7784 if (dump_enabled_p ())
7785 dump_printf_loc (MSG_NOTE
, vect_location
,
7786 "hoisting out of the vectorized loop: %G", stmt
);
7787 scalar_dest
= copy_ssa_name (scalar_dest
);
7788 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
7789 gsi_insert_on_edge_immediate
7790 (loop_preheader_edge (loop
),
7791 gimple_build_assign (scalar_dest
, rhs
));
7793 /* These copies are all equivalent, but currently the representation
7794 requires a separate STMT_VINFO_VEC_STMT for each one. */
7795 prev_stmt_info
= NULL
;
7796 gimple_stmt_iterator gsi2
= *gsi
;
7798 for (j
= 0; j
< ncopies
; j
++)
7800 stmt_vec_info new_stmt_info
;
7803 new_temp
= vect_init_vector (stmt_info
, scalar_dest
,
7805 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7806 new_stmt_info
= vinfo
->add_stmt (new_stmt
);
7810 new_temp
= vect_init_vector (stmt_info
, scalar_dest
,
7812 new_stmt_info
= vinfo
->lookup_def (new_temp
);
7815 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
7817 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7819 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7820 prev_stmt_info
= new_stmt_info
;
7825 if (memory_access_type
== VMAT_ELEMENTWISE
7826 || memory_access_type
== VMAT_STRIDED_SLP
)
7828 gimple_stmt_iterator incr_gsi
;
7834 vec
<constructor_elt
, va_gc
> *v
= NULL
;
7835 tree stride_base
, stride_step
, alias_off
;
7836 /* Checked by get_load_store_type. */
7837 unsigned int const_nunits
= nunits
.to_constant ();
7838 unsigned HOST_WIDE_INT cst_offset
= 0;
7840 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7841 gcc_assert (!nested_in_vect_loop
);
7845 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7846 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7850 first_stmt_info
= stmt_info
;
7851 first_dr_info
= dr_info
;
7853 if (slp
&& grouped_load
)
7855 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7856 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7862 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
7863 * vect_get_place_in_interleaving_chain (stmt_info
,
7866 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7870 = fold_build_pointer_plus
7871 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7872 size_binop (PLUS_EXPR
,
7873 convert_to_ptrofftype (DR_OFFSET (first_dr_info
->dr
)),
7874 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7875 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7877 /* For a load with loop-invariant (but other than power-of-2)
7878 stride (i.e. not a grouped access) like so:
7880 for (i = 0; i < n; i += stride)
7883 we generate a new induction variable and new accesses to
7884 form a new vector (or vectors, depending on ncopies):
7886 for (j = 0; ; j += VF*stride)
7888 tmp2 = array[j + stride];
7890 vectemp = {tmp1, tmp2, ...}
7893 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
7894 build_int_cst (TREE_TYPE (stride_step
), vf
));
7896 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7898 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7899 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7900 create_iv (stride_base
, ivstep
, NULL
,
7901 loop
, &incr_gsi
, insert_after
,
7903 incr
= gsi_stmt (incr_gsi
);
7904 loop_vinfo
->add_stmt (incr
);
7906 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7908 prev_stmt_info
= NULL
;
7909 running_off
= offvar
;
7910 alias_off
= build_int_cst (ref_type
, 0);
7911 int nloads
= const_nunits
;
7913 tree ltype
= TREE_TYPE (vectype
);
7914 tree lvectype
= vectype
;
7915 auto_vec
<tree
> dr_chain
;
7916 if (memory_access_type
== VMAT_STRIDED_SLP
)
7918 if (group_size
< const_nunits
)
7920 /* First check if vec_init optab supports construction from
7921 vector elts directly. */
7922 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
7924 if (mode_for_vector (elmode
, group_size
).exists (&vmode
)
7925 && VECTOR_MODE_P (vmode
)
7926 && targetm
.vector_mode_supported_p (vmode
)
7927 && (convert_optab_handler (vec_init_optab
,
7928 TYPE_MODE (vectype
), vmode
)
7929 != CODE_FOR_nothing
))
7931 nloads
= const_nunits
/ group_size
;
7933 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
7937 /* Otherwise avoid emitting a constructor of vector elements
7938 by performing the loads using an integer type of the same
7939 size, constructing a vector of those and then
7940 re-interpreting it as the original vector type.
7941 This avoids a huge runtime penalty due to the general
7942 inability to perform store forwarding from smaller stores
7943 to a larger load. */
7945 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
7946 unsigned int lnunits
= const_nunits
/ group_size
;
7947 /* If we can't construct such a vector fall back to
7948 element loads of the original vector type. */
7949 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7950 && mode_for_vector (elmode
, lnunits
).exists (&vmode
)
7951 && VECTOR_MODE_P (vmode
)
7952 && targetm
.vector_mode_supported_p (vmode
)
7953 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
7954 != CODE_FOR_nothing
))
7958 ltype
= build_nonstandard_integer_type (lsize
, 1);
7959 lvectype
= build_vector_type (ltype
, nloads
);
7966 lnel
= const_nunits
;
7969 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
7971 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7972 else if (nloads
== 1)
7977 /* For SLP permutation support we need to load the whole group,
7978 not only the number of vector stmts the permutation result
7982 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7984 unsigned int const_vf
= vf
.to_constant ();
7985 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
7986 dr_chain
.create (ncopies
);
7989 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7991 unsigned int group_el
= 0;
7992 unsigned HOST_WIDE_INT
7993 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7994 for (j
= 0; j
< ncopies
; j
++)
7997 vec_alloc (v
, nloads
);
7998 stmt_vec_info new_stmt_info
= NULL
;
7999 for (i
= 0; i
< nloads
; i
++)
8001 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8002 group_el
* elsz
+ cst_offset
);
8003 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
8004 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8006 = gimple_build_assign (make_ssa_name (ltype
), data_ref
);
8008 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8010 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
8011 gimple_assign_lhs (new_stmt
));
8015 || group_el
== group_size
)
8017 tree newoff
= copy_ssa_name (running_off
);
8018 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8019 running_off
, stride_step
);
8020 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
8022 running_off
= newoff
;
8028 tree vec_inv
= build_constructor (lvectype
, v
);
8029 new_temp
= vect_init_vector (stmt_info
, vec_inv
, lvectype
, gsi
);
8030 new_stmt_info
= vinfo
->lookup_def (new_temp
);
8031 if (lvectype
!= vectype
)
8034 = gimple_build_assign (make_ssa_name (vectype
),
8036 build1 (VIEW_CONVERT_EXPR
,
8037 vectype
, new_temp
));
8039 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8046 dr_chain
.quick_push (gimple_assign_lhs (new_stmt_info
->stmt
));
8048 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
8053 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8055 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8056 prev_stmt_info
= new_stmt_info
;
8062 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
8063 slp_node_instance
, false, &n_perms
);
8068 if (memory_access_type
== VMAT_GATHER_SCATTER
8069 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
8070 grouped_load
= false;
8074 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8075 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8076 /* For SLP vectorization we directly vectorize a subchain
8077 without permutation. */
8078 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8079 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8080 /* For BB vectorization always use the first stmt to base
8081 the data ref pointer on. */
8083 first_stmt_info_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8085 /* Check if the chain of loads is already vectorized. */
8086 if (STMT_VINFO_VEC_STMT (first_stmt_info
)
8087 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8088 ??? But we can only do so if there is exactly one
8089 as we have no way to get at the rest. Leave the CSE
8091 ??? With the group load eventually participating
8092 in multiple different permutations (having multiple
8093 slp nodes which refer to the same group) the CSE
8094 is even wrong code. See PR56270. */
8097 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8100 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8103 /* VEC_NUM is the number of vect stmts to be created for this group. */
8106 grouped_load
= false;
8107 /* If an SLP permutation is from N elements to N elements,
8108 and if one vector holds a whole number of N, we can load
8109 the inputs to the permutation in the same way as an
8110 unpermuted sequence. In other cases we need to load the
8111 whole group, not only the number of vector stmts the
8112 permutation result fits in. */
8114 && (group_size
!= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
)
8115 || !multiple_p (nunits
, group_size
)))
8117 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8118 variable VF; see vect_transform_slp_perm_load. */
8119 unsigned int const_vf
= vf
.to_constant ();
8120 unsigned int const_nunits
= nunits
.to_constant ();
8121 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
8122 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
8126 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8128 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
8132 vec_num
= group_size
;
8134 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8138 first_stmt_info
= stmt_info
;
8139 first_dr_info
= dr_info
;
8140 group_size
= vec_num
= 1;
8142 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
8145 alignment_support_scheme
8146 = vect_supportable_dr_alignment (first_dr_info
, false);
8147 gcc_assert (alignment_support_scheme
);
8148 vec_loop_masks
*loop_masks
8149 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8150 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8152 /* Targets with store-lane instructions must not require explicit
8153 realignment. vect_supportable_dr_alignment always returns either
8154 dr_aligned or dr_unaligned_supported for masked operations. */
8155 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8158 || alignment_support_scheme
== dr_aligned
8159 || alignment_support_scheme
== dr_unaligned_supported
);
8161 /* In case the vectorization factor (VF) is bigger than the number
8162 of elements that we can fit in a vectype (nunits), we have to generate
8163 more than one vector stmt - i.e - we need to "unroll" the
8164 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8165 from one copy of the vector stmt to the next, in the field
8166 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8167 stages to find the correct vector defs to be used when vectorizing
8168 stmts that use the defs of the current stmt. The example below
8169 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8170 need to create 4 vectorized stmts):
8172 before vectorization:
8173 RELATED_STMT VEC_STMT
8177 step 1: vectorize stmt S1:
8178 We first create the vector stmt VS1_0, and, as usual, record a
8179 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8180 Next, we create the vector stmt VS1_1, and record a pointer to
8181 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8182 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8184 RELATED_STMT VEC_STMT
8185 VS1_0: vx0 = memref0 VS1_1 -
8186 VS1_1: vx1 = memref1 VS1_2 -
8187 VS1_2: vx2 = memref2 VS1_3 -
8188 VS1_3: vx3 = memref3 - -
8189 S1: x = load - VS1_0
8192 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8193 information we recorded in RELATED_STMT field is used to vectorize
8196 /* In case of interleaving (non-unit grouped access):
8203 Vectorized loads are created in the order of memory accesses
8204 starting from the access of the first stmt of the chain:
8207 VS2: vx1 = &base + vec_size*1
8208 VS3: vx3 = &base + vec_size*2
8209 VS4: vx4 = &base + vec_size*3
8211 Then permutation statements are generated:
8213 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8214 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8217 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8218 (the order of the data-refs in the output of vect_permute_load_chain
8219 corresponds to the order of scalar stmts in the interleaving chain - see
8220 the documentation of vect_permute_load_chain()).
8221 The generation of permutation stmts and recording them in
8222 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8224 In case of both multiple types and interleaving, the vector loads and
8225 permutation stmts above are created for every copy. The result vector
8226 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8227 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8229 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8230 on a target that supports unaligned accesses (dr_unaligned_supported)
8231 we generate the following code:
8235 p = p + indx * vectype_size;
8240 Otherwise, the data reference is potentially unaligned on a target that
8241 does not support unaligned accesses (dr_explicit_realign_optimized) -
8242 then generate the following code, in which the data in each iteration is
8243 obtained by two vector loads, one from the previous iteration, and one
8244 from the current iteration:
8246 msq_init = *(floor(p1))
8247 p2 = initial_addr + VS - 1;
8248 realignment_token = call target_builtin;
8251 p2 = p2 + indx * vectype_size
8253 vec_dest = realign_load (msq, lsq, realignment_token)
8258 /* If the misalignment remains the same throughout the execution of the
8259 loop, we can create the init_addr and permutation mask at the loop
8260 preheader. Otherwise, it needs to be created inside the loop.
8261 This can only occur when vectorizing memory accesses in the inner-loop
8262 nested within an outer-loop that is being vectorized. */
8264 if (nested_in_vect_loop
8265 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
8266 GET_MODE_SIZE (TYPE_MODE (vectype
))))
8268 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
8269 compute_in_loop
= true;
8272 if ((alignment_support_scheme
== dr_explicit_realign_optimized
8273 || alignment_support_scheme
== dr_explicit_realign
)
8274 && !compute_in_loop
)
8276 msq
= vect_setup_realignment (first_stmt_info
, gsi
, &realignment_token
,
8277 alignment_support_scheme
, NULL_TREE
,
8279 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
8281 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
8282 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
8289 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8290 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
8293 tree vec_offset
= NULL_TREE
;
8294 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8296 aggr_type
= NULL_TREE
;
8299 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8301 aggr_type
= elem_type
;
8302 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8303 &bump
, &vec_offset
);
8307 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8308 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8310 aggr_type
= vectype
;
8311 bump
= vect_get_data_ptr_increment (dr_info
, aggr_type
,
8312 memory_access_type
);
8315 tree vec_mask
= NULL_TREE
;
8316 prev_stmt_info
= NULL
;
8317 poly_uint64 group_elt
= 0;
8318 for (j
= 0; j
< ncopies
; j
++)
8320 stmt_vec_info new_stmt_info
= NULL
;
8321 /* 1. Create the vector or array pointer update chain. */
8324 bool simd_lane_access_p
8325 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
8326 if (simd_lane_access_p
8327 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8328 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8329 && integer_zerop (DR_OFFSET (first_dr_info
->dr
))
8330 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8331 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8332 get_alias_set (TREE_TYPE (ref_type
)))
8333 && (alignment_support_scheme
== dr_aligned
8334 || alignment_support_scheme
== dr_unaligned_supported
))
8336 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8337 dataref_offset
= build_int_cst (ref_type
, 0);
8339 else if (first_stmt_info_for_drptr
8340 && first_stmt_info
!= first_stmt_info_for_drptr
)
8343 = vect_create_data_ref_ptr (first_stmt_info_for_drptr
,
8344 aggr_type
, at_loop
, offset
, &dummy
,
8345 gsi
, &ptr_incr
, simd_lane_access_p
,
8347 /* Adjust the pointer by the difference to first_stmt. */
8348 data_reference_p ptrdr
8349 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
8351 = fold_convert (sizetype
,
8352 size_binop (MINUS_EXPR
,
8353 DR_INIT (first_dr_info
->dr
),
8355 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8358 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8359 vect_get_gather_scatter_ops (loop
, stmt_info
, &gs_info
,
8360 &dataref_ptr
, &vec_offset
);
8363 = vect_create_data_ref_ptr (first_stmt_info
, aggr_type
, at_loop
,
8364 offset
, &dummy
, gsi
, &ptr_incr
,
8368 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
,
8374 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
8376 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8377 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
8379 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8382 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
8385 if (grouped_load
|| slp_perm
)
8386 dr_chain
.create (vec_num
);
8388 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8392 vec_array
= create_vector_array (vectype
, vec_num
);
8394 tree final_mask
= NULL_TREE
;
8396 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8399 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8406 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8408 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
8409 tree alias_ptr
= build_int_cst (ref_type
, align
);
8410 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
8411 dataref_ptr
, alias_ptr
,
8417 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8418 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8419 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
8421 gimple_call_set_lhs (call
, vec_array
);
8422 gimple_call_set_nothrow (call
, true);
8423 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8425 /* Extract each vector into an SSA_NAME. */
8426 for (i
= 0; i
< vec_num
; i
++)
8428 new_temp
= read_vector_array (stmt_info
, gsi
, scalar_dest
,
8430 dr_chain
.quick_push (new_temp
);
8433 /* Record the mapping between SSA_NAMEs and statements. */
8434 vect_record_grouped_load_vectors (stmt_info
, dr_chain
);
8436 /* Record that VEC_ARRAY is now dead. */
8437 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
8441 for (i
= 0; i
< vec_num
; i
++)
8443 tree final_mask
= NULL_TREE
;
8445 && memory_access_type
!= VMAT_INVARIANT
)
8446 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8448 vectype
, vec_num
* j
+ i
);
8450 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8454 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8457 /* 2. Create the vector-load in the loop. */
8458 gimple
*new_stmt
= NULL
;
8459 switch (alignment_support_scheme
)
8462 case dr_unaligned_supported
:
8464 unsigned int misalign
;
8465 unsigned HOST_WIDE_INT align
;
8467 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8469 tree scale
= size_int (gs_info
.scale
);
8472 call
= gimple_build_call_internal
8473 (IFN_MASK_GATHER_LOAD
, 4, dataref_ptr
,
8474 vec_offset
, scale
, final_mask
);
8476 call
= gimple_build_call_internal
8477 (IFN_GATHER_LOAD
, 3, dataref_ptr
,
8479 gimple_call_set_nothrow (call
, true);
8481 data_ref
= NULL_TREE
;
8486 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8487 if (alignment_support_scheme
== dr_aligned
)
8489 gcc_assert (aligned_access_p (first_dr_info
));
8492 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8494 align
= dr_alignment
8495 (vect_dr_behavior (first_dr_info
));
8499 misalign
= DR_MISALIGNMENT (first_dr_info
);
8500 if (dataref_offset
== NULL_TREE
8501 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8502 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
8507 align
= least_bit_hwi (misalign
| align
);
8508 tree ptr
= build_int_cst (ref_type
, align
);
8510 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
8513 gimple_call_set_nothrow (call
, true);
8515 data_ref
= NULL_TREE
;
8520 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
8523 : build_int_cst (ref_type
, 0));
8524 if (alignment_support_scheme
== dr_aligned
)
8526 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8527 TREE_TYPE (data_ref
)
8528 = build_aligned_type (TREE_TYPE (data_ref
),
8529 align
* BITS_PER_UNIT
);
8531 TREE_TYPE (data_ref
)
8532 = build_aligned_type (TREE_TYPE (data_ref
),
8533 TYPE_ALIGN (elem_type
));
8537 case dr_explicit_realign
:
8541 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
8543 if (compute_in_loop
)
8544 msq
= vect_setup_realignment (first_stmt_info
, gsi
,
8546 dr_explicit_realign
,
8549 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
8550 ptr
= copy_ssa_name (dataref_ptr
);
8552 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
8553 // For explicit realign the target alignment should be
8554 // known at compile time.
8555 unsigned HOST_WIDE_INT align
=
8556 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
8557 new_stmt
= gimple_build_assign
8558 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
8560 (TREE_TYPE (dataref_ptr
),
8561 -(HOST_WIDE_INT
) align
));
8562 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8564 = build2 (MEM_REF
, vectype
, ptr
,
8565 build_int_cst (ref_type
, 0));
8566 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8567 vec_dest
= vect_create_destination_var (scalar_dest
,
8569 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
8570 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8571 gimple_assign_set_lhs (new_stmt
, new_temp
);
8572 gimple_set_vdef (new_stmt
, gimple_vdef (stmt_info
->stmt
));
8573 gimple_set_vuse (new_stmt
, gimple_vuse (stmt_info
->stmt
));
8574 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8577 bump
= size_binop (MULT_EXPR
, vs
,
8578 TYPE_SIZE_UNIT (elem_type
));
8579 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
8580 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
,
8582 new_stmt
= gimple_build_assign
8583 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
8585 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
8586 ptr
= copy_ssa_name (ptr
, new_stmt
);
8587 gimple_assign_set_lhs (new_stmt
, ptr
);
8588 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8590 = build2 (MEM_REF
, vectype
, ptr
,
8591 build_int_cst (ref_type
, 0));
8594 case dr_explicit_realign_optimized
:
8596 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
8597 new_temp
= copy_ssa_name (dataref_ptr
);
8599 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
8600 // We should only be doing this if we know the target
8601 // alignment at compile time.
8602 unsigned HOST_WIDE_INT align
=
8603 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
8604 new_stmt
= gimple_build_assign
8605 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
8606 build_int_cst (TREE_TYPE (dataref_ptr
),
8607 -(HOST_WIDE_INT
) align
));
8608 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8610 = build2 (MEM_REF
, vectype
, new_temp
,
8611 build_int_cst (ref_type
, 0));
8617 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8618 /* DATA_REF is null if we've already built the statement. */
8621 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8622 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
8624 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8625 gimple_set_lhs (new_stmt
, new_temp
);
8627 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8629 /* 3. Handle explicit realignment if necessary/supported.
8631 vec_dest = realign_load (msq, lsq, realignment_token) */
8632 if (alignment_support_scheme
== dr_explicit_realign_optimized
8633 || alignment_support_scheme
== dr_explicit_realign
)
8635 lsq
= gimple_assign_lhs (new_stmt
);
8636 if (!realignment_token
)
8637 realignment_token
= dataref_ptr
;
8638 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8639 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
8640 msq
, lsq
, realignment_token
);
8641 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8642 gimple_assign_set_lhs (new_stmt
, new_temp
);
8644 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8646 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
8649 if (i
== vec_num
- 1 && j
== ncopies
- 1)
8650 add_phi_arg (phi
, lsq
,
8651 loop_latch_edge (containing_loop
),
8657 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8659 tree perm_mask
= perm_mask_for_reverse (vectype
);
8660 new_temp
= permute_vec_elements (new_temp
, new_temp
,
8661 perm_mask
, stmt_info
, gsi
);
8662 new_stmt_info
= vinfo
->lookup_def (new_temp
);
8665 /* Collect vector loads and later create their permutation in
8666 vect_transform_grouped_load (). */
8667 if (grouped_load
|| slp_perm
)
8668 dr_chain
.quick_push (new_temp
);
8670 /* Store vector loads in the corresponding SLP_NODE. */
8671 if (slp
&& !slp_perm
)
8672 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
8674 /* With SLP permutation we load the gaps as well, without
8675 we need to skip the gaps after we manage to fully load
8676 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8677 group_elt
+= nunits
;
8678 if (maybe_ne (group_gap_adj
, 0U)
8680 && known_eq (group_elt
, group_size
- group_gap_adj
))
8682 poly_wide_int bump_val
8683 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
8685 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
8686 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8691 /* Bump the vector pointer to account for a gap or for excess
8692 elements loaded for a permuted SLP load. */
8693 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
8695 poly_wide_int bump_val
8696 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
8698 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
8699 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8704 if (slp
&& !slp_perm
)
8710 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
8711 slp_node_instance
, false,
8714 dr_chain
.release ();
8722 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
8723 vect_transform_grouped_load (stmt_info
, dr_chain
,
8725 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8730 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8732 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8733 prev_stmt_info
= new_stmt_info
;
8736 dr_chain
.release ();
8742 /* Function vect_is_simple_cond.
8745 LOOP - the loop that is being vectorized.
8746 COND - Condition that is checked for simple use.
8749 *COMP_VECTYPE - the vector type for the comparison.
8750 *DTS - The def types for the arguments of the comparison
8752 Returns whether a COND can be vectorized. Checks whether
8753 condition operands are supportable using vec_is_simple_use. */
8756 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
8757 tree
*comp_vectype
, enum vect_def_type
*dts
,
8761 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8764 if (TREE_CODE (cond
) == SSA_NAME
8765 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
8767 if (!vect_is_simple_use (cond
, vinfo
, &dts
[0], comp_vectype
)
8769 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
8774 if (!COMPARISON_CLASS_P (cond
))
8777 lhs
= TREE_OPERAND (cond
, 0);
8778 rhs
= TREE_OPERAND (cond
, 1);
8780 if (TREE_CODE (lhs
) == SSA_NAME
)
8782 if (!vect_is_simple_use (lhs
, vinfo
, &dts
[0], &vectype1
))
8785 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
8786 || TREE_CODE (lhs
) == FIXED_CST
)
8787 dts
[0] = vect_constant_def
;
8791 if (TREE_CODE (rhs
) == SSA_NAME
)
8793 if (!vect_is_simple_use (rhs
, vinfo
, &dts
[1], &vectype2
))
8796 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
8797 || TREE_CODE (rhs
) == FIXED_CST
)
8798 dts
[1] = vect_constant_def
;
8802 if (vectype1
&& vectype2
8803 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
8804 TYPE_VECTOR_SUBPARTS (vectype2
)))
8807 *comp_vectype
= vectype1
? vectype1
: vectype2
;
8808 /* Invariant comparison. */
8809 if (! *comp_vectype
&& vectype
)
8811 tree scalar_type
= TREE_TYPE (lhs
);
8812 /* If we can widen the comparison to match vectype do so. */
8813 if (INTEGRAL_TYPE_P (scalar_type
)
8814 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
8815 TYPE_SIZE (TREE_TYPE (vectype
))))
8816 scalar_type
= build_nonstandard_integer_type
8817 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype
))),
8818 TYPE_UNSIGNED (scalar_type
));
8819 *comp_vectype
= get_vectype_for_scalar_type (scalar_type
);
8825 /* vectorizable_condition.
8827 Check if STMT_INFO is conditional modify expression that can be vectorized.
8828 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8829 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8832 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
8834 Return true if STMT_INFO is vectorizable in this way. */
8837 vectorizable_condition (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8838 stmt_vec_info
*vec_stmt
, bool for_reduction
,
8839 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
8841 vec_info
*vinfo
= stmt_info
->vinfo
;
8842 tree scalar_dest
= NULL_TREE
;
8843 tree vec_dest
= NULL_TREE
;
8844 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
8845 tree then_clause
, else_clause
;
8846 tree comp_vectype
= NULL_TREE
;
8847 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
8848 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
8851 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8852 enum vect_def_type dts
[4]
8853 = {vect_unknown_def_type
, vect_unknown_def_type
,
8854 vect_unknown_def_type
, vect_unknown_def_type
};
8857 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8858 stmt_vec_info prev_stmt_info
= NULL
;
8860 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8861 vec
<tree
> vec_oprnds0
= vNULL
;
8862 vec
<tree
> vec_oprnds1
= vNULL
;
8863 vec
<tree
> vec_oprnds2
= vNULL
;
8864 vec
<tree
> vec_oprnds3
= vNULL
;
8866 bool masked
= false;
8868 if (for_reduction
&& STMT_SLP_TYPE (stmt_info
))
8871 vect_reduction_type reduction_type
8872 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
);
8873 if (reduction_type
== TREE_CODE_REDUCTION
)
8875 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8878 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8879 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8883 /* FORNOW: not yet supported. */
8884 if (STMT_VINFO_LIVE_P (stmt_info
))
8886 if (dump_enabled_p ())
8887 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8888 "value used after loop.\n");
8893 /* Is vectorizable conditional operation? */
8894 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
8898 code
= gimple_assign_rhs_code (stmt
);
8900 if (code
!= COND_EXPR
)
8903 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8904 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8909 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8911 gcc_assert (ncopies
>= 1);
8912 if (for_reduction
&& ncopies
> 1)
8913 return false; /* FORNOW */
8915 cond_expr
= gimple_assign_rhs1 (stmt
);
8916 then_clause
= gimple_assign_rhs2 (stmt
);
8917 else_clause
= gimple_assign_rhs3 (stmt
);
8919 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
8920 &comp_vectype
, &dts
[0], slp_node
? NULL
: vectype
)
8924 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &dts
[2], &vectype1
))
8926 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &dts
[3], &vectype2
))
8929 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
8932 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
8935 masked
= !COMPARISON_CLASS_P (cond_expr
);
8936 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
8938 if (vec_cmp_type
== NULL_TREE
)
8941 cond_code
= TREE_CODE (cond_expr
);
8944 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
8945 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
8948 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
8950 /* Boolean values may have another representation in vectors
8951 and therefore we prefer bit operations over comparison for
8952 them (which also works for scalar masks). We store opcodes
8953 to use in bitop1 and bitop2. Statement is vectorized as
8954 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8955 depending on bitop1 and bitop2 arity. */
8959 bitop1
= BIT_NOT_EXPR
;
8960 bitop2
= BIT_AND_EXPR
;
8963 bitop1
= BIT_NOT_EXPR
;
8964 bitop2
= BIT_IOR_EXPR
;
8967 bitop1
= BIT_NOT_EXPR
;
8968 bitop2
= BIT_AND_EXPR
;
8969 std::swap (cond_expr0
, cond_expr1
);
8972 bitop1
= BIT_NOT_EXPR
;
8973 bitop2
= BIT_IOR_EXPR
;
8974 std::swap (cond_expr0
, cond_expr1
);
8977 bitop1
= BIT_XOR_EXPR
;
8980 bitop1
= BIT_XOR_EXPR
;
8981 bitop2
= BIT_NOT_EXPR
;
8986 cond_code
= SSA_NAME
;
8991 if (bitop1
!= NOP_EXPR
)
8993 machine_mode mode
= TYPE_MODE (comp_vectype
);
8996 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
8997 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9000 if (bitop2
!= NOP_EXPR
)
9002 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
9004 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9008 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
9011 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
9012 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, slp_node
,
9023 vec_oprnds0
.create (1);
9024 vec_oprnds1
.create (1);
9025 vec_oprnds2
.create (1);
9026 vec_oprnds3
.create (1);
9030 scalar_dest
= gimple_assign_lhs (stmt
);
9031 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
9032 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9034 /* Handle cond expr. */
9035 for (j
= 0; j
< ncopies
; j
++)
9037 stmt_vec_info new_stmt_info
= NULL
;
9042 auto_vec
<tree
, 4> ops
;
9043 auto_vec
<vec
<tree
>, 4> vec_defs
;
9046 ops
.safe_push (cond_expr
);
9049 ops
.safe_push (cond_expr0
);
9050 ops
.safe_push (cond_expr1
);
9052 ops
.safe_push (then_clause
);
9053 ops
.safe_push (else_clause
);
9054 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
9055 vec_oprnds3
= vec_defs
.pop ();
9056 vec_oprnds2
= vec_defs
.pop ();
9058 vec_oprnds1
= vec_defs
.pop ();
9059 vec_oprnds0
= vec_defs
.pop ();
9066 = vect_get_vec_def_for_operand (cond_expr
, stmt_info
,
9072 = vect_get_vec_def_for_operand (cond_expr0
,
9073 stmt_info
, comp_vectype
);
9075 = vect_get_vec_def_for_operand (cond_expr1
,
9076 stmt_info
, comp_vectype
);
9078 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
9080 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
9081 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
9088 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds0
.pop ());
9091 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds1
.pop ());
9093 vec_then_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
9094 vec_oprnds2
.pop ());
9095 vec_else_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
9096 vec_oprnds3
.pop ());
9101 vec_oprnds0
.quick_push (vec_cond_lhs
);
9103 vec_oprnds1
.quick_push (vec_cond_rhs
);
9104 vec_oprnds2
.quick_push (vec_then_clause
);
9105 vec_oprnds3
.quick_push (vec_else_clause
);
9108 /* Arguments are ready. Create the new vector stmt. */
9109 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
9111 vec_then_clause
= vec_oprnds2
[i
];
9112 vec_else_clause
= vec_oprnds3
[i
];
9115 vec_compare
= vec_cond_lhs
;
9118 vec_cond_rhs
= vec_oprnds1
[i
];
9119 if (bitop1
== NOP_EXPR
)
9120 vec_compare
= build2 (cond_code
, vec_cmp_type
,
9121 vec_cond_lhs
, vec_cond_rhs
);
9124 new_temp
= make_ssa_name (vec_cmp_type
);
9126 if (bitop1
== BIT_NOT_EXPR
)
9127 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
9131 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
9133 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9134 if (bitop2
== NOP_EXPR
)
9135 vec_compare
= new_temp
;
9136 else if (bitop2
== BIT_NOT_EXPR
)
9138 /* Instead of doing ~x ? y : z do x ? z : y. */
9139 vec_compare
= new_temp
;
9140 std::swap (vec_then_clause
, vec_else_clause
);
9144 vec_compare
= make_ssa_name (vec_cmp_type
);
9146 = gimple_build_assign (vec_compare
, bitop2
,
9147 vec_cond_lhs
, new_temp
);
9148 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9152 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
9154 if (!is_gimple_val (vec_compare
))
9156 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
9157 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
9159 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9160 vec_compare
= vec_compare_name
;
9162 gcall
*new_stmt
= gimple_build_call_internal
9163 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
9165 gimple_call_set_lhs (new_stmt
, scalar_dest
);
9166 SSA_NAME_DEF_STMT (scalar_dest
) = new_stmt
;
9167 if (stmt_info
->stmt
== gsi_stmt (*gsi
))
9168 new_stmt_info
= vect_finish_replace_stmt (stmt_info
, new_stmt
);
9171 /* In this case we're moving the definition to later in the
9172 block. That doesn't matter because the only uses of the
9173 lhs are in phi statements. */
9174 gimple_stmt_iterator old_gsi
9175 = gsi_for_stmt (stmt_info
->stmt
);
9176 gsi_remove (&old_gsi
, true);
9178 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9183 new_temp
= make_ssa_name (vec_dest
);
9185 = gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
9186 vec_then_clause
, vec_else_clause
);
9188 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9191 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9198 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9200 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9202 prev_stmt_info
= new_stmt_info
;
9205 vec_oprnds0
.release ();
9206 vec_oprnds1
.release ();
9207 vec_oprnds2
.release ();
9208 vec_oprnds3
.release ();
9213 /* vectorizable_comparison.
9215 Check if STMT_INFO is comparison expression that can be vectorized.
9216 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9217 comparison, put it in VEC_STMT, and insert it at GSI.
9219 Return true if STMT_INFO is vectorizable in this way. */
9222 vectorizable_comparison (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9223 stmt_vec_info
*vec_stmt
,
9224 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
9226 vec_info
*vinfo
= stmt_info
->vinfo
;
9227 tree lhs
, rhs1
, rhs2
;
9228 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9229 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9230 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
9232 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9233 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
9237 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
9238 stmt_vec_info prev_stmt_info
= NULL
;
9240 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
9241 vec
<tree
> vec_oprnds0
= vNULL
;
9242 vec
<tree
> vec_oprnds1
= vNULL
;
9246 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9249 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
9252 mask_type
= vectype
;
9253 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
9258 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9260 gcc_assert (ncopies
>= 1);
9261 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
9264 if (STMT_VINFO_LIVE_P (stmt_info
))
9266 if (dump_enabled_p ())
9267 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9268 "value used after loop.\n");
9272 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
9276 code
= gimple_assign_rhs_code (stmt
);
9278 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
9281 rhs1
= gimple_assign_rhs1 (stmt
);
9282 rhs2
= gimple_assign_rhs2 (stmt
);
9284 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &dts
[0], &vectype1
))
9287 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &dts
[1], &vectype2
))
9290 if (vectype1
&& vectype2
9291 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
9292 TYPE_VECTOR_SUBPARTS (vectype2
)))
9295 vectype
= vectype1
? vectype1
: vectype2
;
9297 /* Invariant comparison. */
9300 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
9301 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
9304 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
9307 /* Can't compare mask and non-mask types. */
9308 if (vectype1
&& vectype2
9309 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
9312 /* Boolean values may have another representation in vectors
9313 and therefore we prefer bit operations over comparison for
9314 them (which also works for scalar masks). We store opcodes
9315 to use in bitop1 and bitop2. Statement is vectorized as
9316 BITOP2 (rhs1 BITOP1 rhs2) or
9317 rhs1 BITOP2 (BITOP1 rhs2)
9318 depending on bitop1 and bitop2 arity. */
9319 bool swap_p
= false;
9320 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
9322 if (code
== GT_EXPR
)
9324 bitop1
= BIT_NOT_EXPR
;
9325 bitop2
= BIT_AND_EXPR
;
9327 else if (code
== GE_EXPR
)
9329 bitop1
= BIT_NOT_EXPR
;
9330 bitop2
= BIT_IOR_EXPR
;
9332 else if (code
== LT_EXPR
)
9334 bitop1
= BIT_NOT_EXPR
;
9335 bitop2
= BIT_AND_EXPR
;
9338 else if (code
== LE_EXPR
)
9340 bitop1
= BIT_NOT_EXPR
;
9341 bitop2
= BIT_IOR_EXPR
;
9346 bitop1
= BIT_XOR_EXPR
;
9347 if (code
== EQ_EXPR
)
9348 bitop2
= BIT_NOT_EXPR
;
9354 if (bitop1
== NOP_EXPR
)
9356 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
9361 machine_mode mode
= TYPE_MODE (vectype
);
9364 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
9365 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9368 if (bitop2
!= NOP_EXPR
)
9370 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
9371 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9376 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
9377 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
9378 dts
, ndts
, slp_node
, cost_vec
);
9385 vec_oprnds0
.create (1);
9386 vec_oprnds1
.create (1);
9390 lhs
= gimple_assign_lhs (stmt
);
9391 mask
= vect_create_destination_var (lhs
, mask_type
);
9393 /* Handle cmp expr. */
9394 for (j
= 0; j
< ncopies
; j
++)
9396 stmt_vec_info new_stmt_info
= NULL
;
9401 auto_vec
<tree
, 2> ops
;
9402 auto_vec
<vec
<tree
>, 2> vec_defs
;
9404 ops
.safe_push (rhs1
);
9405 ops
.safe_push (rhs2
);
9406 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
9407 vec_oprnds1
= vec_defs
.pop ();
9408 vec_oprnds0
= vec_defs
.pop ();
9410 std::swap (vec_oprnds0
, vec_oprnds1
);
9414 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt_info
,
9416 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt_info
,
9422 vec_rhs1
= vect_get_vec_def_for_stmt_copy (vinfo
,
9423 vec_oprnds0
.pop ());
9424 vec_rhs2
= vect_get_vec_def_for_stmt_copy (vinfo
,
9425 vec_oprnds1
.pop ());
9431 std::swap (vec_rhs1
, vec_rhs2
);
9432 vec_oprnds0
.quick_push (vec_rhs1
);
9433 vec_oprnds1
.quick_push (vec_rhs2
);
9436 /* Arguments are ready. Create the new vector stmt. */
9437 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
9439 vec_rhs2
= vec_oprnds1
[i
];
9441 new_temp
= make_ssa_name (mask
);
9442 if (bitop1
== NOP_EXPR
)
9444 gassign
*new_stmt
= gimple_build_assign (new_temp
, code
,
9445 vec_rhs1
, vec_rhs2
);
9447 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9452 if (bitop1
== BIT_NOT_EXPR
)
9453 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
9455 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
9458 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9459 if (bitop2
!= NOP_EXPR
)
9461 tree res
= make_ssa_name (mask
);
9462 if (bitop2
== BIT_NOT_EXPR
)
9463 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
9465 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
9468 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9472 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9479 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9481 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9483 prev_stmt_info
= new_stmt_info
;
9486 vec_oprnds0
.release ();
9487 vec_oprnds1
.release ();
9492 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9493 can handle all live statements in the node. Otherwise return true
9494 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
9495 GSI and VEC_STMT are as for vectorizable_live_operation. */
9498 can_vectorize_live_stmts (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9499 slp_tree slp_node
, stmt_vec_info
*vec_stmt
,
9500 stmt_vector_for_cost
*cost_vec
)
9504 stmt_vec_info slp_stmt_info
;
9506 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
9508 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
9509 && !vectorizable_live_operation (slp_stmt_info
, gsi
, slp_node
, i
,
9510 vec_stmt
, cost_vec
))
9514 else if (STMT_VINFO_LIVE_P (stmt_info
)
9515 && !vectorizable_live_operation (stmt_info
, gsi
, slp_node
, -1,
9516 vec_stmt
, cost_vec
))
9522 /* Make sure the statement is vectorizable. */
9525 vect_analyze_stmt (stmt_vec_info stmt_info
, bool *need_to_vectorize
,
9526 slp_tree node
, slp_instance node_instance
,
9527 stmt_vector_for_cost
*cost_vec
)
9529 vec_info
*vinfo
= stmt_info
->vinfo
;
9530 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
9531 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
9533 gimple_seq pattern_def_seq
;
9535 if (dump_enabled_p ())
9536 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
9539 if (gimple_has_volatile_ops (stmt_info
->stmt
))
9540 return opt_result::failure_at (stmt_info
->stmt
,
9542 " stmt has volatile operands: %G\n",
9545 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9547 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
9549 gimple_stmt_iterator si
;
9551 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
9553 stmt_vec_info pattern_def_stmt_info
9554 = vinfo
->lookup_stmt (gsi_stmt (si
));
9555 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
9556 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
9558 /* Analyze def stmt of STMT if it's a pattern stmt. */
9559 if (dump_enabled_p ())
9560 dump_printf_loc (MSG_NOTE
, vect_location
,
9561 "==> examining pattern def statement: %G",
9562 pattern_def_stmt_info
->stmt
);
9565 = vect_analyze_stmt (pattern_def_stmt_info
,
9566 need_to_vectorize
, node
, node_instance
,
9574 /* Skip stmts that do not need to be vectorized. In loops this is expected
9576 - the COND_EXPR which is the loop exit condition
9577 - any LABEL_EXPRs in the loop
9578 - computations that are used only for array indexing or loop control.
9579 In basic blocks we only analyze statements that are a part of some SLP
9580 instance, therefore, all the statements are relevant.
9582 Pattern statement needs to be analyzed instead of the original statement
9583 if the original statement is not relevant. Otherwise, we analyze both
9584 statements. In basic blocks we are called from some SLP instance
9585 traversal, don't analyze pattern stmts instead, the pattern stmts
9586 already will be part of SLP instance. */
9588 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
9589 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
9590 && !STMT_VINFO_LIVE_P (stmt_info
))
9592 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9593 && pattern_stmt_info
9594 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
9595 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
9597 /* Analyze PATTERN_STMT instead of the original stmt. */
9598 stmt_info
= pattern_stmt_info
;
9599 if (dump_enabled_p ())
9600 dump_printf_loc (MSG_NOTE
, vect_location
,
9601 "==> examining pattern statement: %G",
9606 if (dump_enabled_p ())
9607 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
9609 return opt_result::success ();
9612 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9614 && pattern_stmt_info
9615 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
9616 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
9618 /* Analyze PATTERN_STMT too. */
9619 if (dump_enabled_p ())
9620 dump_printf_loc (MSG_NOTE
, vect_location
,
9621 "==> examining pattern statement: %G",
9622 pattern_stmt_info
->stmt
);
9625 = vect_analyze_stmt (pattern_stmt_info
, need_to_vectorize
, node
,
9626 node_instance
, cost_vec
);
9631 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
9633 case vect_internal_def
:
9636 case vect_reduction_def
:
9637 case vect_nested_cycle
:
9638 gcc_assert (!bb_vinfo
9639 && (relevance
== vect_used_in_outer
9640 || relevance
== vect_used_in_outer_by_reduction
9641 || relevance
== vect_used_by_reduction
9642 || relevance
== vect_unused_in_scope
9643 || relevance
== vect_used_only_live
));
9646 case vect_induction_def
:
9647 gcc_assert (!bb_vinfo
);
9650 case vect_constant_def
:
9651 case vect_external_def
:
9652 case vect_unknown_def_type
:
9657 if (STMT_VINFO_RELEVANT_P (stmt_info
))
9659 tree type
= gimple_expr_type (stmt_info
->stmt
);
9660 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type
)));
9661 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
9662 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
9663 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
9664 *need_to_vectorize
= true;
9667 if (PURE_SLP_STMT (stmt_info
) && !node
)
9669 if (dump_enabled_p ())
9670 dump_printf_loc (MSG_NOTE
, vect_location
,
9671 "handled only by SLP analysis\n");
9672 return opt_result::success ();
9677 && (STMT_VINFO_RELEVANT_P (stmt_info
)
9678 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
9679 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
9680 -mveclibabi= takes preference over library functions with
9681 the simd attribute. */
9682 ok
= (vectorizable_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9683 || vectorizable_simd_clone_call (stmt_info
, NULL
, NULL
, node
,
9685 || vectorizable_conversion (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9686 || vectorizable_operation (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9687 || vectorizable_assignment (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9688 || vectorizable_load (stmt_info
, NULL
, NULL
, node
, node_instance
,
9690 || vectorizable_store (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9691 || vectorizable_reduction (stmt_info
, NULL
, NULL
, node
,
9692 node_instance
, cost_vec
)
9693 || vectorizable_induction (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9694 || vectorizable_shift (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9695 || vectorizable_condition (stmt_info
, NULL
, NULL
, false, node
,
9697 || vectorizable_comparison (stmt_info
, NULL
, NULL
, node
,
9702 ok
= (vectorizable_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9703 || vectorizable_simd_clone_call (stmt_info
, NULL
, NULL
, node
,
9705 || vectorizable_conversion (stmt_info
, NULL
, NULL
, node
,
9707 || vectorizable_shift (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9708 || vectorizable_operation (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9709 || vectorizable_assignment (stmt_info
, NULL
, NULL
, node
,
9711 || vectorizable_load (stmt_info
, NULL
, NULL
, node
, node_instance
,
9713 || vectorizable_store (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9714 || vectorizable_condition (stmt_info
, NULL
, NULL
, false, node
,
9716 || vectorizable_comparison (stmt_info
, NULL
, NULL
, node
,
9721 return opt_result::failure_at (stmt_info
->stmt
,
9723 " relevant stmt not supported: %G",
9726 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9727 need extra handling, except for vectorizable reductions. */
9729 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
9730 && !can_vectorize_live_stmts (stmt_info
, NULL
, node
, NULL
, cost_vec
))
9731 return opt_result::failure_at (stmt_info
->stmt
,
9733 " live stmt not supported: %G",
9736 return opt_result::success ();
9740 /* Function vect_transform_stmt.
9742 Create a vectorized stmt to replace STMT_INFO, and insert it at BSI. */
9745 vect_transform_stmt (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9746 slp_tree slp_node
, slp_instance slp_node_instance
)
9748 vec_info
*vinfo
= stmt_info
->vinfo
;
9749 bool is_store
= false;
9750 stmt_vec_info vec_stmt
= NULL
;
9753 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
9754 stmt_vec_info old_vec_stmt_info
= STMT_VINFO_VEC_STMT (stmt_info
);
9756 bool nested_p
= (STMT_VINFO_LOOP_VINFO (stmt_info
)
9757 && nested_in_vect_loop_p
9758 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info
)),
9761 gimple
*stmt
= stmt_info
->stmt
;
9762 switch (STMT_VINFO_TYPE (stmt_info
))
9764 case type_demotion_vec_info_type
:
9765 case type_promotion_vec_info_type
:
9766 case type_conversion_vec_info_type
:
9767 done
= vectorizable_conversion (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9772 case induc_vec_info_type
:
9773 done
= vectorizable_induction (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9778 case shift_vec_info_type
:
9779 done
= vectorizable_shift (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
9783 case op_vec_info_type
:
9784 done
= vectorizable_operation (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9789 case assignment_vec_info_type
:
9790 done
= vectorizable_assignment (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9795 case load_vec_info_type
:
9796 done
= vectorizable_load (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9797 slp_node_instance
, NULL
);
9801 case store_vec_info_type
:
9802 done
= vectorizable_store (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
9804 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
9806 /* In case of interleaving, the whole chain is vectorized when the
9807 last store in the chain is reached. Store stmts before the last
9808 one are skipped, and there vec_stmt_info shouldn't be freed
9810 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9811 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
9818 case condition_vec_info_type
:
9819 done
= vectorizable_condition (stmt_info
, gsi
, &vec_stmt
, false,
9824 case comparison_vec_info_type
:
9825 done
= vectorizable_comparison (stmt_info
, gsi
, &vec_stmt
,
9830 case call_vec_info_type
:
9831 done
= vectorizable_call (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
9832 stmt
= gsi_stmt (*gsi
);
9835 case call_simd_clone_vec_info_type
:
9836 done
= vectorizable_simd_clone_call (stmt_info
, gsi
, &vec_stmt
,
9838 stmt
= gsi_stmt (*gsi
);
9841 case reduc_vec_info_type
:
9842 done
= vectorizable_reduction (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9843 slp_node_instance
, NULL
);
9848 if (!STMT_VINFO_LIVE_P (stmt_info
))
9850 if (dump_enabled_p ())
9851 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9852 "stmt not supported.\n");
9857 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9858 This would break hybrid SLP vectorization. */
9860 gcc_assert (!vec_stmt
9861 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt_info
);
9863 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9864 is being vectorized, but outside the immediately enclosing loop. */
9867 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
9868 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
9869 || STMT_VINFO_RELEVANT (stmt_info
) ==
9870 vect_used_in_outer_by_reduction
))
9872 struct loop
*innerloop
= LOOP_VINFO_LOOP (
9873 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
9874 imm_use_iterator imm_iter
;
9875 use_operand_p use_p
;
9878 if (dump_enabled_p ())
9879 dump_printf_loc (MSG_NOTE
, vect_location
,
9880 "Record the vdef for outer-loop vectorization.\n");
9882 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9883 (to be used when vectorizing outer-loop stmts that use the DEF of
9885 if (gimple_code (stmt
) == GIMPLE_PHI
)
9886 scalar_dest
= PHI_RESULT (stmt
);
9888 scalar_dest
= gimple_get_lhs (stmt
);
9890 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
9891 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
9893 stmt_vec_info exit_phi_info
9894 = vinfo
->lookup_stmt (USE_STMT (use_p
));
9895 STMT_VINFO_VEC_STMT (exit_phi_info
) = vec_stmt
;
9899 /* Handle stmts whose DEF is used outside the loop-nest that is
9900 being vectorized. */
9901 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
9903 done
= can_vectorize_live_stmts (stmt_info
, gsi
, slp_node
, &vec_stmt
,
9909 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
9915 /* Remove a group of stores (for SLP or interleaving), free their
9919 vect_remove_stores (stmt_vec_info first_stmt_info
)
9921 vec_info
*vinfo
= first_stmt_info
->vinfo
;
9922 stmt_vec_info next_stmt_info
= first_stmt_info
;
9924 while (next_stmt_info
)
9926 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9927 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
9928 /* Free the attached stmt_vec_info and remove the stmt. */
9929 vinfo
->remove_stmt (next_stmt_info
);
9930 next_stmt_info
= tmp
;
9934 /* Function get_vectype_for_scalar_type_and_size.
9936 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9940 get_vectype_for_scalar_type_and_size (tree scalar_type
, poly_uint64 size
)
9942 tree orig_scalar_type
= scalar_type
;
9943 scalar_mode inner_mode
;
9944 machine_mode simd_mode
;
9948 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
9949 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
9952 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
9954 /* For vector types of elements whose mode precision doesn't
9955 match their types precision we use a element type of mode
9956 precision. The vectorization routines will have to make sure
9957 they support the proper result truncation/extension.
9958 We also make sure to build vector types with INTEGER_TYPE
9959 component type only. */
9960 if (INTEGRAL_TYPE_P (scalar_type
)
9961 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
9962 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
9963 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
9964 TYPE_UNSIGNED (scalar_type
));
9966 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9967 When the component mode passes the above test simply use a type
9968 corresponding to that mode. The theory is that any use that
9969 would cause problems with this will disable vectorization anyway. */
9970 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
9971 && !INTEGRAL_TYPE_P (scalar_type
))
9972 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
9974 /* We can't build a vector type of elements with alignment bigger than
9976 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
9977 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
9978 TYPE_UNSIGNED (scalar_type
));
9980 /* If we felt back to using the mode fail if there was
9981 no scalar type for it. */
9982 if (scalar_type
== NULL_TREE
)
9985 /* If no size was supplied use the mode the target prefers. Otherwise
9986 lookup a vector mode of the specified size. */
9987 if (known_eq (size
, 0U))
9988 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
9989 else if (!multiple_p (size
, nbytes
, &nunits
)
9990 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
9992 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9993 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
))
9996 vectype
= build_vector_type (scalar_type
, nunits
);
9998 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
9999 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
10002 /* Re-attach the address-space qualifier if we canonicalized the scalar
10004 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
10005 return build_qualified_type
10006 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
10011 poly_uint64 current_vector_size
;
10013 /* Function get_vectype_for_scalar_type.
10015 Returns the vector type corresponding to SCALAR_TYPE as supported
10019 get_vectype_for_scalar_type (tree scalar_type
)
10022 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
10023 current_vector_size
);
10025 && known_eq (current_vector_size
, 0U))
10026 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
10030 /* Function get_mask_type_for_scalar_type.
10032 Returns the mask type corresponding to a result of comparison
10033 of vectors of specified SCALAR_TYPE as supported by target. */
10036 get_mask_type_for_scalar_type (tree scalar_type
)
10038 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
10043 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
10044 current_vector_size
);
10047 /* Function get_same_sized_vectype
10049 Returns a vector type corresponding to SCALAR_TYPE of size
10050 VECTOR_TYPE if supported by the target. */
10053 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
10055 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10056 return build_same_sized_truth_vector_type (vector_type
);
10058 return get_vectype_for_scalar_type_and_size
10059 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
10062 /* Function vect_is_simple_use.
10065 VINFO - the vect info of the loop or basic block that is being vectorized.
10066 OPERAND - operand in the loop or bb.
10068 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
10069 case OPERAND is an SSA_NAME that is defined in the vectorizable region
10070 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
10071 the definition could be anywhere in the function
10072 DT - the type of definition
10074 Returns whether a stmt with OPERAND can be vectorized.
10075 For loops, supportable operands are constants, loop invariants, and operands
10076 that are defined by the current iteration of the loop. Unsupportable
10077 operands are those that are defined by a previous iteration of the loop (as
10078 is the case in reduction/induction computations).
10079 For basic blocks, supportable operands are constants and bb invariants.
10080 For now, operands defined outside the basic block are not supported. */
10083 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
10084 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
10086 if (def_stmt_info_out
)
10087 *def_stmt_info_out
= NULL
;
10089 *def_stmt_out
= NULL
;
10090 *dt
= vect_unknown_def_type
;
10092 if (dump_enabled_p ())
10094 dump_printf_loc (MSG_NOTE
, vect_location
,
10095 "vect_is_simple_use: operand ");
10096 if (TREE_CODE (operand
) == SSA_NAME
10097 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
10098 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
10100 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
10103 if (CONSTANT_CLASS_P (operand
))
10104 *dt
= vect_constant_def
;
10105 else if (is_gimple_min_invariant (operand
))
10106 *dt
= vect_external_def
;
10107 else if (TREE_CODE (operand
) != SSA_NAME
)
10108 *dt
= vect_unknown_def_type
;
10109 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
10110 *dt
= vect_external_def
;
10113 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
10114 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
10116 *dt
= vect_external_def
;
10119 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
10120 def_stmt
= stmt_vinfo
->stmt
;
10121 switch (gimple_code (def_stmt
))
10124 case GIMPLE_ASSIGN
:
10126 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
10129 *dt
= vect_unknown_def_type
;
10132 if (def_stmt_info_out
)
10133 *def_stmt_info_out
= stmt_vinfo
;
10136 *def_stmt_out
= def_stmt
;
10139 if (dump_enabled_p ())
10141 dump_printf (MSG_NOTE
, ", type of def: ");
10144 case vect_uninitialized_def
:
10145 dump_printf (MSG_NOTE
, "uninitialized\n");
10147 case vect_constant_def
:
10148 dump_printf (MSG_NOTE
, "constant\n");
10150 case vect_external_def
:
10151 dump_printf (MSG_NOTE
, "external\n");
10153 case vect_internal_def
:
10154 dump_printf (MSG_NOTE
, "internal\n");
10156 case vect_induction_def
:
10157 dump_printf (MSG_NOTE
, "induction\n");
10159 case vect_reduction_def
:
10160 dump_printf (MSG_NOTE
, "reduction\n");
10162 case vect_double_reduction_def
:
10163 dump_printf (MSG_NOTE
, "double reduction\n");
10165 case vect_nested_cycle
:
10166 dump_printf (MSG_NOTE
, "nested cycle\n");
10168 case vect_unknown_def_type
:
10169 dump_printf (MSG_NOTE
, "unknown\n");
10174 if (*dt
== vect_unknown_def_type
)
10176 if (dump_enabled_p ())
10177 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10178 "Unsupported pattern.\n");
10185 /* Function vect_is_simple_use.
10187 Same as vect_is_simple_use but also determines the vector operand
10188 type of OPERAND and stores it to *VECTYPE. If the definition of
10189 OPERAND is vect_uninitialized_def, vect_constant_def or
10190 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10191 is responsible to compute the best suited vector type for the
10195 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
10196 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
10197 gimple
**def_stmt_out
)
10199 stmt_vec_info def_stmt_info
;
10201 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
10205 *def_stmt_out
= def_stmt
;
10206 if (def_stmt_info_out
)
10207 *def_stmt_info_out
= def_stmt_info
;
10209 /* Now get a vector type if the def is internal, otherwise supply
10210 NULL_TREE and leave it up to the caller to figure out a proper
10211 type for the use stmt. */
10212 if (*dt
== vect_internal_def
10213 || *dt
== vect_induction_def
10214 || *dt
== vect_reduction_def
10215 || *dt
== vect_double_reduction_def
10216 || *dt
== vect_nested_cycle
)
10218 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
10219 gcc_assert (*vectype
!= NULL_TREE
);
10220 if (dump_enabled_p ())
10221 dump_printf_loc (MSG_NOTE
, vect_location
,
10222 "vect_is_simple_use: vectype %T\n", *vectype
);
10224 else if (*dt
== vect_uninitialized_def
10225 || *dt
== vect_constant_def
10226 || *dt
== vect_external_def
)
10227 *vectype
= NULL_TREE
;
10229 gcc_unreachable ();
10235 /* Function supportable_widening_operation
10237 Check whether an operation represented by the code CODE is a
10238 widening operation that is supported by the target platform in
10239 vector form (i.e., when operating on arguments of type VECTYPE_IN
10240 producing a result of type VECTYPE_OUT).
10242 Widening operations we currently support are NOP (CONVERT), FLOAT,
10243 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10244 are supported by the target platform either directly (via vector
10245 tree-codes), or via target builtins.
10248 - CODE1 and CODE2 are codes of vector operations to be used when
10249 vectorizing the operation, if available.
10250 - MULTI_STEP_CVT determines the number of required intermediate steps in
10251 case of multi-step conversion (like char->short->int - in that case
10252 MULTI_STEP_CVT will be 1).
10253 - INTERM_TYPES contains the intermediate type required to perform the
10254 widening operation (short in the above example). */
10257 supportable_widening_operation (enum tree_code code
, stmt_vec_info stmt_info
,
10258 tree vectype_out
, tree vectype_in
,
10259 enum tree_code
*code1
, enum tree_code
*code2
,
10260 int *multi_step_cvt
,
10261 vec
<tree
> *interm_types
)
10263 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
10264 struct loop
*vect_loop
= NULL
;
10265 machine_mode vec_mode
;
10266 enum insn_code icode1
, icode2
;
10267 optab optab1
, optab2
;
10268 tree vectype
= vectype_in
;
10269 tree wide_vectype
= vectype_out
;
10270 enum tree_code c1
, c2
;
10272 tree prev_type
, intermediate_type
;
10273 machine_mode intermediate_mode
, prev_mode
;
10274 optab optab3
, optab4
;
10276 *multi_step_cvt
= 0;
10278 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
10282 case WIDEN_MULT_EXPR
:
10283 /* The result of a vectorized widening operation usually requires
10284 two vectors (because the widened results do not fit into one vector).
10285 The generated vector results would normally be expected to be
10286 generated in the same order as in the original scalar computation,
10287 i.e. if 8 results are generated in each vector iteration, they are
10288 to be organized as follows:
10289 vect1: [res1,res2,res3,res4],
10290 vect2: [res5,res6,res7,res8].
10292 However, in the special case that the result of the widening
10293 operation is used in a reduction computation only, the order doesn't
10294 matter (because when vectorizing a reduction we change the order of
10295 the computation). Some targets can take advantage of this and
10296 generate more efficient code. For example, targets like Altivec,
10297 that support widen_mult using a sequence of {mult_even,mult_odd}
10298 generate the following vectors:
10299 vect1: [res1,res3,res5,res7],
10300 vect2: [res2,res4,res6,res8].
10302 When vectorizing outer-loops, we execute the inner-loop sequentially
10303 (each vectorized inner-loop iteration contributes to VF outer-loop
10304 iterations in parallel). We therefore don't allow to change the
10305 order of the computation in the inner-loop during outer-loop
10307 /* TODO: Another case in which order doesn't *really* matter is when we
10308 widen and then contract again, e.g. (short)((int)x * y >> 8).
10309 Normally, pack_trunc performs an even/odd permute, whereas the
10310 repack from an even/odd expansion would be an interleave, which
10311 would be significantly simpler for e.g. AVX2. */
10312 /* In any case, in order to avoid duplicating the code below, recurse
10313 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10314 are properly set up for the caller. If we fail, we'll continue with
10315 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10317 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
10318 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
10319 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
10320 stmt_info
, vectype_out
,
10321 vectype_in
, code1
, code2
,
10322 multi_step_cvt
, interm_types
))
10324 /* Elements in a vector with vect_used_by_reduction property cannot
10325 be reordered if the use chain with this property does not have the
10326 same operation. One such an example is s += a * b, where elements
10327 in a and b cannot be reordered. Here we check if the vector defined
10328 by STMT is only directly used in the reduction statement. */
10329 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
10330 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
10332 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
10335 c1
= VEC_WIDEN_MULT_LO_EXPR
;
10336 c2
= VEC_WIDEN_MULT_HI_EXPR
;
10339 case DOT_PROD_EXPR
:
10340 c1
= DOT_PROD_EXPR
;
10341 c2
= DOT_PROD_EXPR
;
10349 case VEC_WIDEN_MULT_EVEN_EXPR
:
10350 /* Support the recursion induced just above. */
10351 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
10352 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
10355 case WIDEN_LSHIFT_EXPR
:
10356 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
10357 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
10361 c1
= VEC_UNPACK_LO_EXPR
;
10362 c2
= VEC_UNPACK_HI_EXPR
;
10366 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
10367 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
10370 case FIX_TRUNC_EXPR
:
10371 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
10372 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
10376 gcc_unreachable ();
10379 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
10380 std::swap (c1
, c2
);
10382 if (code
== FIX_TRUNC_EXPR
)
10384 /* The signedness is determined from output operand. */
10385 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
10386 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
10388 else if (CONVERT_EXPR_CODE_P (code
)
10389 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
10390 && VECTOR_BOOLEAN_TYPE_P (vectype
)
10391 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
10392 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
10394 /* If the input and result modes are the same, a different optab
10395 is needed where we pass in the number of units in vectype. */
10396 optab1
= vec_unpacks_sbool_lo_optab
;
10397 optab2
= vec_unpacks_sbool_hi_optab
;
10401 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
10402 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
10405 if (!optab1
|| !optab2
)
10408 vec_mode
= TYPE_MODE (vectype
);
10409 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
10410 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
10416 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
10417 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
10419 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
10421 /* For scalar masks we may have different boolean
10422 vector types having the same QImode. Thus we
10423 add additional check for elements number. */
10424 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
10425 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
10429 /* Check if it's a multi-step conversion that can be done using intermediate
10432 prev_type
= vectype
;
10433 prev_mode
= vec_mode
;
10435 if (!CONVERT_EXPR_CODE_P (code
))
10438 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10439 intermediate steps in promotion sequence. We try
10440 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10442 interm_types
->create (MAX_INTERM_CVT_STEPS
);
10443 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
10445 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
10446 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
10448 intermediate_type
= vect_halve_mask_nunits (prev_type
);
10449 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
10454 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
10455 TYPE_UNSIGNED (prev_type
));
10457 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
10458 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
10459 && intermediate_mode
== prev_mode
10460 && SCALAR_INT_MODE_P (prev_mode
))
10462 /* If the input and result modes are the same, a different optab
10463 is needed where we pass in the number of units in vectype. */
10464 optab3
= vec_unpacks_sbool_lo_optab
;
10465 optab4
= vec_unpacks_sbool_hi_optab
;
10469 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
10470 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
10473 if (!optab3
|| !optab4
10474 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
10475 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
10476 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
10477 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
10478 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
10479 == CODE_FOR_nothing
)
10480 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
10481 == CODE_FOR_nothing
))
10484 interm_types
->quick_push (intermediate_type
);
10485 (*multi_step_cvt
)++;
10487 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
10488 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
10490 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
10492 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
10493 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
10497 prev_type
= intermediate_type
;
10498 prev_mode
= intermediate_mode
;
10501 interm_types
->release ();
10506 /* Function supportable_narrowing_operation
10508 Check whether an operation represented by the code CODE is a
10509 narrowing operation that is supported by the target platform in
10510 vector form (i.e., when operating on arguments of type VECTYPE_IN
10511 and producing a result of type VECTYPE_OUT).
10513 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10514 and FLOAT. This function checks if these operations are supported by
10515 the target platform directly via vector tree-codes.
10518 - CODE1 is the code of a vector operation to be used when
10519 vectorizing the operation, if available.
10520 - MULTI_STEP_CVT determines the number of required intermediate steps in
10521 case of multi-step conversion (like int->short->char - in that case
10522 MULTI_STEP_CVT will be 1).
10523 - INTERM_TYPES contains the intermediate type required to perform the
10524 narrowing operation (short in the above example). */
10527 supportable_narrowing_operation (enum tree_code code
,
10528 tree vectype_out
, tree vectype_in
,
10529 enum tree_code
*code1
, int *multi_step_cvt
,
10530 vec
<tree
> *interm_types
)
10532 machine_mode vec_mode
;
10533 enum insn_code icode1
;
10534 optab optab1
, interm_optab
;
10535 tree vectype
= vectype_in
;
10536 tree narrow_vectype
= vectype_out
;
10538 tree intermediate_type
, prev_type
;
10539 machine_mode intermediate_mode
, prev_mode
;
10543 *multi_step_cvt
= 0;
10547 c1
= VEC_PACK_TRUNC_EXPR
;
10548 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
10549 && VECTOR_BOOLEAN_TYPE_P (vectype
)
10550 && TYPE_MODE (narrow_vectype
) == TYPE_MODE (vectype
)
10551 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
10552 optab1
= vec_pack_sbool_trunc_optab
;
10554 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
10557 case FIX_TRUNC_EXPR
:
10558 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
10559 /* The signedness is determined from output operand. */
10560 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
10564 c1
= VEC_PACK_FLOAT_EXPR
;
10565 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
10569 gcc_unreachable ();
10575 vec_mode
= TYPE_MODE (vectype
);
10576 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
10581 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
10583 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
10585 /* For scalar masks we may have different boolean
10586 vector types having the same QImode. Thus we
10587 add additional check for elements number. */
10588 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
10589 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
10593 if (code
== FLOAT_EXPR
)
10596 /* Check if it's a multi-step conversion that can be done using intermediate
10598 prev_mode
= vec_mode
;
10599 prev_type
= vectype
;
10600 if (code
== FIX_TRUNC_EXPR
)
10601 uns
= TYPE_UNSIGNED (vectype_out
);
10603 uns
= TYPE_UNSIGNED (vectype
);
10605 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10606 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10607 costly than signed. */
10608 if (code
== FIX_TRUNC_EXPR
&& uns
)
10610 enum insn_code icode2
;
10613 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
10615 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
10616 if (interm_optab
!= unknown_optab
10617 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
10618 && insn_data
[icode1
].operand
[0].mode
10619 == insn_data
[icode2
].operand
[0].mode
)
10622 optab1
= interm_optab
;
10627 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10628 intermediate steps in promotion sequence. We try
10629 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10630 interm_types
->create (MAX_INTERM_CVT_STEPS
);
10631 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
10633 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
10634 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
10636 intermediate_type
= vect_double_mask_nunits (prev_type
);
10637 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
10642 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
10643 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
10644 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
10645 && intermediate_mode
== prev_mode
10646 && SCALAR_INT_MODE_P (prev_mode
))
10647 interm_optab
= vec_pack_sbool_trunc_optab
;
10650 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
10653 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
10654 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
10655 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
10656 == CODE_FOR_nothing
))
10659 interm_types
->quick_push (intermediate_type
);
10660 (*multi_step_cvt
)++;
10662 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
10664 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
10666 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
10667 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
10671 prev_mode
= intermediate_mode
;
10672 prev_type
= intermediate_type
;
10673 optab1
= interm_optab
;
10676 interm_types
->release ();
10680 /* Generate and return a statement that sets vector mask MASK such that
10681 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10684 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
10686 tree cmp_type
= TREE_TYPE (start_index
);
10687 tree mask_type
= TREE_TYPE (mask
);
10688 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
10689 cmp_type
, mask_type
,
10690 OPTIMIZE_FOR_SPEED
));
10691 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
10692 start_index
, end_index
,
10693 build_zero_cst (mask_type
));
10694 gimple_call_set_lhs (call
, mask
);
10698 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10699 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10702 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
10705 tree tmp
= make_ssa_name (mask_type
);
10706 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
10707 gimple_seq_add_stmt (seq
, call
);
10708 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
10711 /* Try to compute the vector types required to vectorize STMT_INFO,
10712 returning true on success and false if vectorization isn't possible.
10716 - Set *STMT_VECTYPE_OUT to:
10717 - NULL_TREE if the statement doesn't need to be vectorized;
10718 - boolean_type_node if the statement is a boolean operation whose
10719 vector type can only be determined once all the other vector types
10721 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10723 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10724 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10725 statement does not help to determine the overall number of units. */
10728 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info
,
10729 tree
*stmt_vectype_out
,
10730 tree
*nunits_vectype_out
)
10732 gimple
*stmt
= stmt_info
->stmt
;
10734 *stmt_vectype_out
= NULL_TREE
;
10735 *nunits_vectype_out
= NULL_TREE
;
10737 if (gimple_get_lhs (stmt
) == NULL_TREE
10738 /* MASK_STORE has no lhs, but is ok. */
10739 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
10741 if (is_a
<gcall
*> (stmt
))
10743 /* Ignore calls with no lhs. These must be calls to
10744 #pragma omp simd functions, and what vectorization factor
10745 it really needs can't be determined until
10746 vectorizable_simd_clone_call. */
10747 if (dump_enabled_p ())
10748 dump_printf_loc (MSG_NOTE
, vect_location
,
10749 "defer to SIMD clone analysis.\n");
10750 return opt_result::success ();
10753 return opt_result::failure_at (stmt
,
10754 "not vectorized: irregular stmt.%G", stmt
);
10757 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
10758 return opt_result::failure_at (stmt
,
10759 "not vectorized: vector stmt in loop:%G",
10763 tree scalar_type
= NULL_TREE
;
10764 if (STMT_VINFO_VECTYPE (stmt_info
))
10765 *stmt_vectype_out
= vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10768 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info
));
10769 if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
10770 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
10772 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
10774 /* Pure bool ops don't participate in number-of-units computation.
10775 For comparisons use the types being compared. */
10776 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
)
10777 && is_gimple_assign (stmt
)
10778 && gimple_assign_rhs_code (stmt
) != COND_EXPR
)
10780 *stmt_vectype_out
= boolean_type_node
;
10782 tree rhs1
= gimple_assign_rhs1 (stmt
);
10783 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
10784 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10785 scalar_type
= TREE_TYPE (rhs1
);
10788 if (dump_enabled_p ())
10789 dump_printf_loc (MSG_NOTE
, vect_location
,
10790 "pure bool operation.\n");
10791 return opt_result::success ();
10795 if (dump_enabled_p ())
10796 dump_printf_loc (MSG_NOTE
, vect_location
,
10797 "get vectype for scalar type: %T\n", scalar_type
);
10798 vectype
= get_vectype_for_scalar_type (scalar_type
);
10800 return opt_result::failure_at (stmt
,
10802 " unsupported data-type %T\n",
10805 if (!*stmt_vectype_out
)
10806 *stmt_vectype_out
= vectype
;
10808 if (dump_enabled_p ())
10809 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
10812 /* Don't try to compute scalar types if the stmt produces a boolean
10813 vector; use the existing vector type instead. */
10814 tree nunits_vectype
;
10815 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10816 nunits_vectype
= vectype
;
10819 /* The number of units is set according to the smallest scalar
10820 type (or the largest vector size, but we only support one
10821 vector size per vectorization). */
10822 if (*stmt_vectype_out
!= boolean_type_node
)
10824 HOST_WIDE_INT dummy
;
10825 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
10828 if (dump_enabled_p ())
10829 dump_printf_loc (MSG_NOTE
, vect_location
,
10830 "get vectype for scalar type: %T\n", scalar_type
);
10831 nunits_vectype
= get_vectype_for_scalar_type (scalar_type
);
10833 if (!nunits_vectype
)
10834 return opt_result::failure_at (stmt
,
10835 "not vectorized: unsupported data-type %T\n",
10838 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
10839 GET_MODE_SIZE (TYPE_MODE (nunits_vectype
))))
10840 return opt_result::failure_at (stmt
,
10841 "not vectorized: different sized vector "
10842 "types in statement, %T and %T\n",
10843 vectype
, nunits_vectype
);
10845 if (dump_enabled_p ())
10847 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n",
10850 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
10851 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
10852 dump_printf (MSG_NOTE
, "\n");
10855 *nunits_vectype_out
= nunits_vectype
;
10856 return opt_result::success ();
10859 /* Try to determine the correct vector type for STMT_INFO, which is a
10860 statement that produces a scalar boolean result. Return the vector
10861 type on success, otherwise return NULL_TREE. */
10864 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info
)
10866 gimple
*stmt
= stmt_info
->stmt
;
10867 tree mask_type
= NULL
;
10868 tree vectype
, scalar_type
;
10870 if (is_gimple_assign (stmt
)
10871 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
10872 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt
))))
10874 scalar_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
10875 mask_type
= get_mask_type_for_scalar_type (scalar_type
);
10878 return opt_tree::failure_at (stmt
,
10879 "not vectorized: unsupported mask\n");
10885 enum vect_def_type dt
;
10887 FOR_EACH_SSA_TREE_OPERAND (rhs
, stmt
, iter
, SSA_OP_USE
)
10889 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &dt
, &vectype
))
10890 return opt_tree::failure_at (stmt
,
10891 "not vectorized:can't compute mask"
10892 " type for statement, %G", stmt
);
10894 /* No vectype probably means external definition.
10895 Allow it in case there is another operand which
10896 allows to determine mask type. */
10901 mask_type
= vectype
;
10902 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type
),
10903 TYPE_VECTOR_SUBPARTS (vectype
)))
10904 return opt_tree::failure_at (stmt
,
10905 "not vectorized: different sized mask"
10906 " types in statement, %T and %T\n",
10907 mask_type
, vectype
);
10908 else if (VECTOR_BOOLEAN_TYPE_P (mask_type
)
10909 != VECTOR_BOOLEAN_TYPE_P (vectype
))
10910 return opt_tree::failure_at (stmt
,
10911 "not vectorized: mixed mask and "
10912 "nonmask vector types in statement, "
10914 mask_type
, vectype
);
10917 /* We may compare boolean value loaded as vector of integers.
10918 Fix mask_type in such case. */
10920 && !VECTOR_BOOLEAN_TYPE_P (mask_type
)
10921 && gimple_code (stmt
) == GIMPLE_ASSIGN
10922 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
)
10923 mask_type
= build_same_sized_truth_vector_type (mask_type
);
10926 /* No mask_type should mean loop invariant predicate.
10927 This is probably a subject for optimization in if-conversion. */
10929 return opt_tree::failure_at (stmt
,
10930 "not vectorized: can't compute mask type "
10931 "for statement: %G", stmt
);
10933 return opt_tree::success (mask_type
);