1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
,
95 stmt_vec_info stmt_info
, slp_tree node
,
96 tree vectype
, int misalign
,
97 enum vect_cost_model_location where
)
99 if ((kind
== vector_load
|| kind
== unaligned_load
)
100 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
101 kind
= vector_gather_load
;
102 if ((kind
== vector_store
|| kind
== unaligned_store
)
103 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
104 kind
= vector_scatter_store
;
106 stmt_info_for_cost si
107 = { count
, kind
, where
, stmt_info
, node
, vectype
, misalign
};
108 body_cost_vec
->safe_push (si
);
111 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
115 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
116 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
117 tree vectype
, int misalign
,
118 enum vect_cost_model_location where
)
120 return record_stmt_cost (body_cost_vec
, count
, kind
, stmt_info
, NULL
,
121 vectype
, misalign
, where
);
125 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
126 enum vect_cost_for_stmt kind
, slp_tree node
,
127 tree vectype
, int misalign
,
128 enum vect_cost_model_location where
)
130 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, node
,
131 vectype
, misalign
, where
);
135 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
136 enum vect_cost_for_stmt kind
,
137 enum vect_cost_model_location where
)
139 gcc_assert (kind
== cond_branch_taken
|| kind
== cond_branch_not_taken
140 || kind
== scalar_stmt
);
141 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, NULL
,
142 NULL_TREE
, 0, where
);
145 /* Return a variable of type ELEM_TYPE[NELEMS]. */
148 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
150 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
154 /* ARRAY is an array of vectors created by create_vector_array.
155 Return an SSA_NAME for the vector in index N. The reference
156 is part of the vectorization of STMT_INFO and the vector is associated
157 with scalar destination SCALAR_DEST. */
160 read_vector_array (vec_info
*vinfo
,
161 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
162 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
164 tree vect_type
, vect
, vect_name
, array_ref
;
167 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
168 vect_type
= TREE_TYPE (TREE_TYPE (array
));
169 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
170 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
171 build_int_cst (size_type_node
, n
),
172 NULL_TREE
, NULL_TREE
);
174 new_stmt
= gimple_build_assign (vect
, array_ref
);
175 vect_name
= make_ssa_name (vect
, new_stmt
);
176 gimple_assign_set_lhs (new_stmt
, vect_name
);
177 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
182 /* ARRAY is an array of vectors created by create_vector_array.
183 Emit code to store SSA_NAME VECT in index N of the array.
184 The store is part of the vectorization of STMT_INFO. */
187 write_vector_array (vec_info
*vinfo
,
188 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
189 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
194 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
195 build_int_cst (size_type_node
, n
),
196 NULL_TREE
, NULL_TREE
);
198 new_stmt
= gimple_build_assign (array_ref
, vect
);
199 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
202 /* PTR is a pointer to an array of type TYPE. Return a representation
203 of *PTR. The memory reference replaces those in FIRST_DR
207 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
211 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
212 /* Arrays have the same alignment as their type. */
213 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
217 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
218 Emit the clobber before *GSI. */
221 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
222 gimple_stmt_iterator
*gsi
, tree var
)
224 tree clobber
= build_clobber (TREE_TYPE (var
));
225 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
226 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
229 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
231 /* Function vect_mark_relevant.
233 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
236 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
237 enum vect_relevant relevant
, bool live_p
)
239 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
240 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
242 if (dump_enabled_p ())
243 dump_printf_loc (MSG_NOTE
, vect_location
,
244 "mark relevant %d, live %d: %G", relevant
, live_p
,
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
253 /* This is the last stmt in a sequence that was detected as a
254 pattern that can potentially be vectorized. Don't mark the stmt
255 as relevant/live because it's not going to be vectorized.
256 Instead mark the pattern-stmt that replaces it. */
258 if (dump_enabled_p ())
259 dump_printf_loc (MSG_NOTE
, vect_location
,
260 "last stmt in pattern. don't mark"
261 " relevant/live.\n");
262 stmt_vec_info old_stmt_info
= stmt_info
;
263 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
264 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
265 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
266 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
269 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
270 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
271 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
273 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
274 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
276 if (dump_enabled_p ())
277 dump_printf_loc (MSG_NOTE
, vect_location
,
278 "already marked relevant/live.\n");
282 worklist
->safe_push (stmt_info
);
286 /* Function is_simple_and_all_uses_invariant
288 Return true if STMT_INFO is simple and all uses of it are invariant. */
291 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
292 loop_vec_info loop_vinfo
)
297 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
301 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
303 enum vect_def_type dt
= vect_uninitialized_def
;
305 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
309 "use not simple.\n");
313 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
319 /* Function vect_stmt_relevant_p.
321 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
322 is "relevant for vectorization".
324 A stmt is considered "relevant for vectorization" if:
325 - it has uses outside the loop.
326 - it has vdefs (it alters memory).
327 - control stmts in the loop (except for the exit condition).
329 CHECKME: what other side effects would the vectorizer allow? */
332 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
333 enum vect_relevant
*relevant
, bool *live_p
)
335 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
337 imm_use_iterator imm_iter
;
341 *relevant
= vect_unused_in_scope
;
344 /* cond stmt other than loop exit cond. */
345 if (is_ctrl_stmt (stmt_info
->stmt
)
346 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
347 *relevant
= vect_used_in_scope
;
349 /* changing memory. */
350 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
351 if (gimple_vdef (stmt_info
->stmt
)
352 && !gimple_clobber_p (stmt_info
->stmt
))
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE
, vect_location
,
356 "vec_stmt_relevant_p: stmt has vdefs.\n");
357 *relevant
= vect_used_in_scope
;
360 /* uses outside the loop. */
361 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
363 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
365 basic_block bb
= gimple_bb (USE_STMT (use_p
));
366 if (!flow_bb_inside_loop_p (loop
, bb
))
368 if (is_gimple_debug (USE_STMT (use_p
)))
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE
, vect_location
,
373 "vec_stmt_relevant_p: used out of loop.\n");
375 /* We expect all such uses to be in the loop exit phis
376 (because of loop closed form) */
377 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
378 gcc_assert (bb
== single_exit (loop
)->dest
);
385 if (*live_p
&& *relevant
== vect_unused_in_scope
386 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE
, vect_location
,
390 "vec_stmt_relevant_p: stmt live but not relevant.\n");
391 *relevant
= vect_used_only_live
;
394 return (*live_p
|| *relevant
);
398 /* Function exist_non_indexing_operands_for_use_p
400 USE is one of the uses attached to STMT_INFO. Check if USE is
401 used in STMT_INFO for anything other than indexing an array. */
404 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
408 /* USE corresponds to some operand in STMT. If there is no data
409 reference in STMT, then any operand that corresponds to USE
410 is not indexing an array. */
411 if (!STMT_VINFO_DATA_REF (stmt_info
))
414 /* STMT has a data_ref. FORNOW this means that its of one of
418 (This should have been verified in analyze_data_refs).
420 'var' in the second case corresponds to a def, not a use,
421 so USE cannot correspond to any operands that are not used
424 Therefore, all we need to check is if STMT falls into the
425 first case, and whether var corresponds to USE. */
427 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
428 if (!assign
|| !gimple_assign_copy_p (assign
))
430 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
431 if (call
&& gimple_call_internal_p (call
))
433 internal_fn ifn
= gimple_call_internal_fn (call
);
434 int mask_index
= internal_fn_mask_index (ifn
);
436 && use
== gimple_call_arg (call
, mask_index
))
438 int stored_value_index
= internal_fn_stored_value_index (ifn
);
439 if (stored_value_index
>= 0
440 && use
== gimple_call_arg (call
, stored_value_index
))
442 if (internal_gather_scatter_fn_p (ifn
)
443 && use
== gimple_call_arg (call
, 1))
449 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
451 operand
= gimple_assign_rhs1 (assign
);
452 if (TREE_CODE (operand
) != SSA_NAME
)
463 Function process_use.
466 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
467 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
468 that defined USE. This is done by calling mark_relevant and passing it
469 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
470 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
474 Generally, LIVE_P and RELEVANT are used to define the liveness and
475 relevance info of the DEF_STMT of this USE:
476 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
477 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
479 - case 1: If USE is used only for address computations (e.g. array indexing),
480 which does not need to be directly vectorized, then the liveness/relevance
481 of the respective DEF_STMT is left unchanged.
482 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
483 we skip DEF_STMT cause it had already been processed.
484 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
485 "relevant" will be modified accordingly.
487 Return true if everything is as expected. Return false otherwise. */
490 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
491 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
494 stmt_vec_info dstmt_vinfo
;
495 enum vect_def_type dt
;
497 /* case 1: we are only interested in uses that need to be vectorized. Uses
498 that are used for address computation are not considered relevant. */
499 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
500 return opt_result::success ();
502 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
503 return opt_result::failure_at (stmt_vinfo
->stmt
,
505 " unsupported use in stmt.\n");
508 return opt_result::success ();
510 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
511 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
513 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
514 We have to force the stmt live since the epilogue loop needs it to
515 continue computing the reduction. */
516 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
517 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
518 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
519 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
520 && bb
->loop_father
== def_bb
->loop_father
)
522 if (dump_enabled_p ())
523 dump_printf_loc (MSG_NOTE
, vect_location
,
524 "reduc-stmt defining reduc-phi in the same nest.\n");
525 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
526 return opt_result::success ();
529 /* case 3a: outer-loop stmt defining an inner-loop stmt:
530 outer-loop-header-bb:
536 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE
, vect_location
,
540 "outer-loop def-stmt defining inner-loop stmt.\n");
544 case vect_unused_in_scope
:
545 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
546 vect_used_in_scope
: vect_unused_in_scope
;
549 case vect_used_in_outer_by_reduction
:
550 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
551 relevant
= vect_used_by_reduction
;
554 case vect_used_in_outer
:
555 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
556 relevant
= vect_used_in_scope
;
559 case vect_used_in_scope
:
567 /* case 3b: inner-loop stmt defining an outer-loop stmt:
568 outer-loop-header-bb:
572 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
574 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
576 if (dump_enabled_p ())
577 dump_printf_loc (MSG_NOTE
, vect_location
,
578 "inner-loop def-stmt defining outer-loop stmt.\n");
582 case vect_unused_in_scope
:
583 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
584 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
585 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
588 case vect_used_by_reduction
:
589 case vect_used_only_live
:
590 relevant
= vect_used_in_outer_by_reduction
;
593 case vect_used_in_scope
:
594 relevant
= vect_used_in_outer
;
601 /* We are also not interested in uses on loop PHI backedges that are
602 inductions. Otherwise we'll needlessly vectorize the IV increment
603 and cause hybrid SLP for SLP inductions. Unless the PHI is live
605 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
606 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
607 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
608 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
609 loop_latch_edge (bb
->loop_father
))
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_NOTE
, vect_location
,
614 "induction value on backedge.\n");
615 return opt_result::success ();
619 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
620 return opt_result::success ();
624 /* Function vect_mark_stmts_to_be_vectorized.
626 Not all stmts in the loop need to be vectorized. For example:
635 Stmt 1 and 3 do not need to be vectorized, because loop control and
636 addressing of vectorized data-refs are handled differently.
638 This pass detects such stmts. */
641 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
643 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
644 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
645 unsigned int nbbs
= loop
->num_nodes
;
646 gimple_stmt_iterator si
;
650 enum vect_relevant relevant
;
652 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
654 auto_vec
<stmt_vec_info
, 64> worklist
;
656 /* 1. Init worklist. */
657 for (i
= 0; i
< nbbs
; i
++)
660 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
662 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
667 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
668 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
670 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
672 if (is_gimple_debug (gsi_stmt (si
)))
674 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
675 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE
, vect_location
,
677 "init: stmt relevant? %G", stmt_info
->stmt
);
679 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
680 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
684 /* 2. Process_worklist */
685 while (worklist
.length () > 0)
690 stmt_vec_info stmt_vinfo
= worklist
.pop ();
691 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE
, vect_location
,
693 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
695 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696 (DEF_STMT) as relevant/irrelevant according to the relevance property
698 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
700 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
701 propagated as is to the DEF_STMTs of its USEs.
703 One exception is when STMT has been identified as defining a reduction
704 variable; in this case we set the relevance to vect_used_by_reduction.
705 This is because we distinguish between two kinds of relevant stmts -
706 those that are used by a reduction computation, and those that are
707 (also) used by a regular computation. This allows us later on to
708 identify stmts that are used solely by a reduction, and therefore the
709 order of the results that they produce does not have to be kept. */
711 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
713 case vect_reduction_def
:
714 gcc_assert (relevant
!= vect_unused_in_scope
);
715 if (relevant
!= vect_unused_in_scope
716 && relevant
!= vect_used_in_scope
717 && relevant
!= vect_used_by_reduction
718 && relevant
!= vect_used_only_live
)
719 return opt_result::failure_at
720 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
723 case vect_nested_cycle
:
724 if (relevant
!= vect_unused_in_scope
725 && relevant
!= vect_used_in_outer_by_reduction
726 && relevant
!= vect_used_in_outer
)
727 return opt_result::failure_at
728 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
731 case vect_double_reduction_def
:
732 if (relevant
!= vect_unused_in_scope
733 && relevant
!= vect_used_by_reduction
734 && relevant
!= vect_used_only_live
)
735 return opt_result::failure_at
736 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
743 if (is_pattern_stmt_p (stmt_vinfo
))
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
750 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
751 tree op
= gimple_assign_rhs1 (assign
);
754 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
757 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
758 loop_vinfo
, relevant
, &worklist
, false);
761 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
762 loop_vinfo
, relevant
, &worklist
, false);
767 for (; i
< gimple_num_ops (assign
); i
++)
769 op
= gimple_op (assign
, i
);
770 if (TREE_CODE (op
) == SSA_NAME
)
773 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
780 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
782 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
784 tree arg
= gimple_call_arg (call
, i
);
786 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
794 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
796 tree op
= USE_FROM_PTR (use_p
);
798 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
806 gather_scatter_info gs_info
;
807 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
810 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
819 } /* while worklist */
821 return opt_result::success ();
824 /* Function vect_model_simple_cost.
826 Models cost for simple operations, i.e. those that only emit ncopies of a
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
831 vect_model_simple_cost (vec_info
*,
832 stmt_vec_info stmt_info
, int ncopies
,
833 enum vect_def_type
*dt
,
836 stmt_vector_for_cost
*cost_vec
,
837 vect_cost_for_stmt kind
= vector_stmt
)
839 int inside_cost
= 0, prologue_cost
= 0;
841 gcc_assert (cost_vec
!= NULL
);
843 /* ??? Somehow we need to fix this at the callers. */
845 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
848 /* Cost the "broadcast" of a scalar operand in to a vector operand.
849 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
851 for (int i
= 0; i
< ndts
; i
++)
852 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
853 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
854 stmt_info
, 0, vect_prologue
);
856 /* Pass the inside-of-loop statements to the target-specific cost model. */
857 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
858 stmt_info
, 0, vect_body
);
860 if (dump_enabled_p ())
861 dump_printf_loc (MSG_NOTE
, vect_location
,
862 "vect_model_simple_cost: inside_cost = %d, "
863 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
867 /* Model cost for type demotion and promotion operations. PWR is
868 normally zero for single-step promotions and demotions. It will be
869 one if two-step promotion/demotion is required, and so on. NCOPIES
870 is the number of vector results (and thus number of instructions)
871 for the narrowest end of the operation chain. Each additional
872 step doubles the number of instructions required. If WIDEN_ARITH
873 is true the stmt is doing widening arithmetic. */
876 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
877 enum vect_def_type
*dt
,
878 unsigned int ncopies
, int pwr
,
879 stmt_vector_for_cost
*cost_vec
,
883 int inside_cost
= 0, prologue_cost
= 0;
885 for (i
= 0; i
< pwr
+ 1; i
++)
887 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
,
889 ? vector_stmt
: vec_promote_demote
,
890 stmt_info
, 0, vect_body
);
894 /* FORNOW: Assuming maximum 2 args per stmts. */
895 for (i
= 0; i
< 2; i
++)
896 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
897 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
898 stmt_info
, 0, vect_prologue
);
900 if (dump_enabled_p ())
901 dump_printf_loc (MSG_NOTE
, vect_location
,
902 "vect_model_promotion_demotion_cost: inside_cost = %d, "
903 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
906 /* Returns true if the current function returns DECL. */
909 cfun_returns (tree decl
)
913 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
915 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
918 if (gimple_return_retval (ret
) == decl
)
920 /* We often end up with an aggregate copy to the result decl,
921 handle that case as well. First skip intermediate clobbers
926 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
928 while (gimple_clobber_p (def
));
929 if (is_a
<gassign
*> (def
)
930 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
931 && gimple_assign_rhs1 (def
) == decl
)
937 /* Function vect_model_store_cost
939 Models cost for stores. In the case of grouped accesses, one access
940 has the overhead of the grouped access attributed to it. */
943 vect_model_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
944 vect_memory_access_type memory_access_type
,
945 dr_alignment_support alignment_support_scheme
,
947 vec_load_store_type vls_type
, slp_tree slp_node
,
948 stmt_vector_for_cost
*cost_vec
)
950 unsigned int inside_cost
= 0, prologue_cost
= 0;
951 stmt_vec_info first_stmt_info
= stmt_info
;
952 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
954 /* ??? Somehow we need to fix this at the callers. */
956 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
958 if (vls_type
== VLS_STORE_INVARIANT
)
961 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
962 stmt_info
, 0, vect_prologue
);
965 /* Grouped stores update all elements in the group at once,
966 so we want the DR for the first statement. */
967 if (!slp_node
&& grouped_access_p
)
968 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
970 /* True if we should include any once-per-group costs as well as
971 the cost of the statement itself. For SLP we only get called
972 once per group anyhow. */
973 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
975 /* We assume that the cost of a single store-lanes instruction is
976 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
977 access is instead being provided by a permute-and-store operation,
978 include the cost of the permutes. */
980 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
982 /* Uses a high and low interleave or shuffle operations for each
984 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
985 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
986 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
987 stmt_info
, 0, vect_body
);
989 if (dump_enabled_p ())
990 dump_printf_loc (MSG_NOTE
, vect_location
,
991 "vect_model_store_cost: strided group_size = %d .\n",
995 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
996 /* Costs of the stores. */
997 if (memory_access_type
== VMAT_ELEMENTWISE
998 || memory_access_type
== VMAT_GATHER_SCATTER
)
1000 /* N scalar stores plus extracting the elements. */
1001 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1002 inside_cost
+= record_stmt_cost (cost_vec
,
1003 ncopies
* assumed_nunits
,
1004 scalar_store
, stmt_info
, 0, vect_body
);
1007 vect_get_store_cost (vinfo
, stmt_info
, ncopies
, alignment_support_scheme
,
1008 misalignment
, &inside_cost
, cost_vec
);
1010 if (memory_access_type
== VMAT_ELEMENTWISE
1011 || memory_access_type
== VMAT_STRIDED_SLP
)
1013 /* N scalar stores plus extracting the elements. */
1014 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1015 inside_cost
+= record_stmt_cost (cost_vec
,
1016 ncopies
* assumed_nunits
,
1017 vec_to_scalar
, stmt_info
, 0, vect_body
);
1020 /* When vectorizing a store into the function result assign
1021 a penalty if the function returns in a multi-register location.
1022 In this case we assume we'll end up with having to spill the
1023 vector result and do piecewise loads as a conservative estimate. */
1024 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
1026 && (TREE_CODE (base
) == RESULT_DECL
1027 || (DECL_P (base
) && cfun_returns (base
)))
1028 && !aggregate_value_p (base
, cfun
->decl
))
1030 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
1031 /* ??? Handle PARALLEL in some way. */
1034 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1035 /* Assume that a single reg-reg move is possible and cheap,
1036 do not account for vector to gp register move cost. */
1040 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1042 stmt_info
, 0, vect_epilogue
);
1044 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1046 stmt_info
, 0, vect_epilogue
);
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_NOTE
, vect_location
,
1053 "vect_model_store_cost: inside_cost = %d, "
1054 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1058 /* Calculate cost of DR's memory access. */
1060 vect_get_store_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1061 dr_alignment_support alignment_support_scheme
,
1063 unsigned int *inside_cost
,
1064 stmt_vector_for_cost
*body_cost_vec
)
1066 switch (alignment_support_scheme
)
1070 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1071 vector_store
, stmt_info
, 0,
1074 if (dump_enabled_p ())
1075 dump_printf_loc (MSG_NOTE
, vect_location
,
1076 "vect_model_store_cost: aligned.\n");
1080 case dr_unaligned_supported
:
1082 /* Here, we assign an additional cost for the unaligned store. */
1083 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1084 unaligned_store
, stmt_info
,
1085 misalignment
, vect_body
);
1086 if (dump_enabled_p ())
1087 dump_printf_loc (MSG_NOTE
, vect_location
,
1088 "vect_model_store_cost: unaligned supported by "
1093 case dr_unaligned_unsupported
:
1095 *inside_cost
= VECT_MAX_COST
;
1097 if (dump_enabled_p ())
1098 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1099 "vect_model_store_cost: unsupported access.\n");
1109 /* Function vect_model_load_cost
1111 Models cost for loads. In the case of grouped accesses, one access has
1112 the overhead of the grouped access attributed to it. Since unaligned
1113 accesses are supported for loads, we also account for the costs of the
1114 access scheme chosen. */
1117 vect_model_load_cost (vec_info
*vinfo
,
1118 stmt_vec_info stmt_info
, unsigned ncopies
, poly_uint64 vf
,
1119 vect_memory_access_type memory_access_type
,
1120 dr_alignment_support alignment_support_scheme
,
1122 gather_scatter_info
*gs_info
,
1124 stmt_vector_for_cost
*cost_vec
)
1126 unsigned int inside_cost
= 0, prologue_cost
= 0;
1127 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1129 gcc_assert (cost_vec
);
1131 /* ??? Somehow we need to fix this at the callers. */
1133 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1135 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1137 /* If the load is permuted then the alignment is determined by
1138 the first group element not by the first scalar stmt DR. */
1139 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1140 /* Record the cost for the permutation. */
1141 unsigned n_perms
, n_loads
;
1142 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
,
1143 vf
, true, &n_perms
, &n_loads
);
1144 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1145 first_stmt_info
, 0, vect_body
);
1147 /* And adjust the number of loads performed. This handles
1148 redundancies as well as loads that are later dead. */
1152 /* Grouped loads read all elements in the group at once,
1153 so we want the DR for the first statement. */
1154 stmt_vec_info first_stmt_info
= stmt_info
;
1155 if (!slp_node
&& grouped_access_p
)
1156 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1158 /* True if we should include any once-per-group costs as well as
1159 the cost of the statement itself. For SLP we only get called
1160 once per group anyhow. */
1161 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1163 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1164 ones we actually need. Account for the cost of unused results. */
1165 if (first_stmt_p
&& !slp_node
&& memory_access_type
== VMAT_LOAD_STORE_LANES
)
1167 unsigned int gaps
= DR_GROUP_SIZE (first_stmt_info
);
1168 stmt_vec_info next_stmt_info
= first_stmt_info
;
1172 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
1174 while (next_stmt_info
);
1177 if (dump_enabled_p ())
1178 dump_printf_loc (MSG_NOTE
, vect_location
,
1179 "vect_model_load_cost: %d unused vectors.\n",
1181 vect_get_load_cost (vinfo
, stmt_info
, ncopies
* gaps
,
1182 alignment_support_scheme
, misalignment
, false,
1183 &inside_cost
, &prologue_cost
,
1184 cost_vec
, cost_vec
, true);
1188 /* We assume that the cost of a single load-lanes instruction is
1189 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1190 access is instead being provided by a load-and-permute operation,
1191 include the cost of the permutes. */
1193 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1195 /* Uses an even and odd extract operations or shuffle operations
1196 for each needed permute. */
1197 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1198 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1199 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1200 stmt_info
, 0, vect_body
);
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE
, vect_location
,
1204 "vect_model_load_cost: strided group_size = %d .\n",
1208 /* The loads themselves. */
1209 if (memory_access_type
== VMAT_ELEMENTWISE
1210 || memory_access_type
== VMAT_GATHER_SCATTER
)
1212 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1213 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1214 if (memory_access_type
== VMAT_GATHER_SCATTER
1215 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
1216 /* For emulated gathers N offset vector element extracts
1217 (we assume the scalar scaling and ptr + offset add is consumed by
1219 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
* assumed_nunits
,
1220 vec_to_scalar
, stmt_info
, 0,
1222 /* N scalar loads plus gathering them into a vector. */
1223 inside_cost
+= record_stmt_cost (cost_vec
,
1224 ncopies
* assumed_nunits
,
1225 scalar_load
, stmt_info
, 0, vect_body
);
1227 else if (memory_access_type
== VMAT_INVARIANT
)
1229 /* Invariant loads will ideally be hoisted and splat to a vector. */
1230 prologue_cost
+= record_stmt_cost (cost_vec
, 1,
1231 scalar_load
, stmt_info
, 0,
1233 prologue_cost
+= record_stmt_cost (cost_vec
, 1,
1234 scalar_to_vec
, stmt_info
, 0,
1238 vect_get_load_cost (vinfo
, stmt_info
, ncopies
,
1239 alignment_support_scheme
, misalignment
, first_stmt_p
,
1240 &inside_cost
, &prologue_cost
,
1241 cost_vec
, cost_vec
, true);
1242 if (memory_access_type
== VMAT_ELEMENTWISE
1243 || memory_access_type
== VMAT_STRIDED_SLP
1244 || (memory_access_type
== VMAT_GATHER_SCATTER
1245 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
))
1246 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1247 stmt_info
, 0, vect_body
);
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE
, vect_location
,
1251 "vect_model_load_cost: inside_cost = %d, "
1252 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1256 /* Calculate cost of DR's memory access. */
1258 vect_get_load_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1259 dr_alignment_support alignment_support_scheme
,
1261 bool add_realign_cost
, unsigned int *inside_cost
,
1262 unsigned int *prologue_cost
,
1263 stmt_vector_for_cost
*prologue_cost_vec
,
1264 stmt_vector_for_cost
*body_cost_vec
,
1265 bool record_prologue_costs
)
1267 switch (alignment_support_scheme
)
1271 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1272 stmt_info
, 0, vect_body
);
1274 if (dump_enabled_p ())
1275 dump_printf_loc (MSG_NOTE
, vect_location
,
1276 "vect_model_load_cost: aligned.\n");
1280 case dr_unaligned_supported
:
1282 /* Here, we assign an additional cost for the unaligned load. */
1283 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1284 unaligned_load
, stmt_info
,
1285 misalignment
, vect_body
);
1287 if (dump_enabled_p ())
1288 dump_printf_loc (MSG_NOTE
, vect_location
,
1289 "vect_model_load_cost: unaligned supported by "
1294 case dr_explicit_realign
:
1296 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1297 vector_load
, stmt_info
, 0, vect_body
);
1298 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1299 vec_perm
, stmt_info
, 0, vect_body
);
1301 /* FIXME: If the misalignment remains fixed across the iterations of
1302 the containing loop, the following cost should be added to the
1304 if (targetm
.vectorize
.builtin_mask_for_load
)
1305 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1306 stmt_info
, 0, vect_body
);
1308 if (dump_enabled_p ())
1309 dump_printf_loc (MSG_NOTE
, vect_location
,
1310 "vect_model_load_cost: explicit realign\n");
1314 case dr_explicit_realign_optimized
:
1316 if (dump_enabled_p ())
1317 dump_printf_loc (MSG_NOTE
, vect_location
,
1318 "vect_model_load_cost: unaligned software "
1321 /* Unaligned software pipeline has a load of an address, an initial
1322 load, and possibly a mask operation to "prime" the loop. However,
1323 if this is an access in a group of loads, which provide grouped
1324 access, then the above cost should only be considered for one
1325 access in the group. Inside the loop, there is a load op
1326 and a realignment op. */
1328 if (add_realign_cost
&& record_prologue_costs
)
1330 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1331 vector_stmt
, stmt_info
,
1333 if (targetm
.vectorize
.builtin_mask_for_load
)
1334 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1335 vector_stmt
, stmt_info
,
1339 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1340 stmt_info
, 0, vect_body
);
1341 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1342 stmt_info
, 0, vect_body
);
1344 if (dump_enabled_p ())
1345 dump_printf_loc (MSG_NOTE
, vect_location
,
1346 "vect_model_load_cost: explicit realign optimized"
1352 case dr_unaligned_unsupported
:
1354 *inside_cost
= VECT_MAX_COST
;
1356 if (dump_enabled_p ())
1357 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1358 "vect_model_load_cost: unsupported access.\n");
1367 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1368 the loop preheader for the vectorized stmt STMT_VINFO. */
1371 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1372 gimple_stmt_iterator
*gsi
)
1375 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1377 vinfo
->insert_on_entry (stmt_vinfo
, new_stmt
);
1379 if (dump_enabled_p ())
1380 dump_printf_loc (MSG_NOTE
, vect_location
,
1381 "created new init_stmt: %G", new_stmt
);
1384 /* Function vect_init_vector.
1386 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1387 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1388 vector type a vector with all elements equal to VAL is created first.
1389 Place the initialization at GSI if it is not NULL. Otherwise, place the
1390 initialization at the loop preheader.
1391 Return the DEF of INIT_STMT.
1392 It will be used in the vectorization of STMT_INFO. */
1395 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1396 gimple_stmt_iterator
*gsi
)
1401 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1402 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1404 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1405 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1407 /* Scalar boolean value should be transformed into
1408 all zeros or all ones value before building a vector. */
1409 if (VECTOR_BOOLEAN_TYPE_P (type
))
1411 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1412 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1414 if (CONSTANT_CLASS_P (val
))
1415 val
= integer_zerop (val
) ? false_val
: true_val
;
1418 new_temp
= make_ssa_name (TREE_TYPE (type
));
1419 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1420 val
, true_val
, false_val
);
1421 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1427 gimple_seq stmts
= NULL
;
1428 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1429 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1430 TREE_TYPE (type
), val
);
1432 /* ??? Condition vectorization expects us to do
1433 promotion of invariant/external defs. */
1434 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1435 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1436 !gsi_end_p (gsi2
); )
1438 init_stmt
= gsi_stmt (gsi2
);
1439 gsi_remove (&gsi2
, false);
1440 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1444 val
= build_vector_from_val (type
, val
);
1447 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1448 init_stmt
= gimple_build_assign (new_temp
, val
);
1449 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1454 /* Function vect_get_vec_defs_for_operand.
1456 OP is an operand in STMT_VINFO. This function returns a vector of
1457 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1459 In the case that OP is an SSA_NAME which is defined in the loop, then
1460 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1462 In case OP is an invariant or constant, a new stmt that creates a vector def
1463 needs to be introduced. VECTYPE may be used to specify a required type for
1464 vector invariant. */
1467 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1469 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1472 enum vect_def_type dt
;
1474 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1476 if (dump_enabled_p ())
1477 dump_printf_loc (MSG_NOTE
, vect_location
,
1478 "vect_get_vec_defs_for_operand: %T\n", op
);
1480 stmt_vec_info def_stmt_info
;
1481 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1482 &def_stmt_info
, &def_stmt
);
1483 gcc_assert (is_simple_use
);
1484 if (def_stmt
&& dump_enabled_p ())
1485 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1487 vec_oprnds
->create (ncopies
);
1488 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1490 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1494 vector_type
= vectype
;
1495 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1496 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1497 vector_type
= truth_type_for (stmt_vectype
);
1499 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1501 gcc_assert (vector_type
);
1502 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1504 vec_oprnds
->quick_push (vop
);
1508 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1509 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1510 for (unsigned i
= 0; i
< ncopies
; ++i
)
1511 vec_oprnds
->quick_push (gimple_get_lhs
1512 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1517 /* Get vectorized definitions for OP0 and OP1. */
1520 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1522 tree op0
, vec
<tree
> *vec_oprnds0
, tree vectype0
,
1523 tree op1
, vec
<tree
> *vec_oprnds1
, tree vectype1
,
1524 tree op2
, vec
<tree
> *vec_oprnds2
, tree vectype2
,
1525 tree op3
, vec
<tree
> *vec_oprnds3
, tree vectype3
)
1530 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1532 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1534 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1536 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1541 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1542 op0
, vec_oprnds0
, vectype0
);
1544 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1545 op1
, vec_oprnds1
, vectype1
);
1547 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1548 op2
, vec_oprnds2
, vectype2
);
1550 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1551 op3
, vec_oprnds3
, vectype3
);
1556 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1558 tree op0
, vec
<tree
> *vec_oprnds0
,
1559 tree op1
, vec
<tree
> *vec_oprnds1
,
1560 tree op2
, vec
<tree
> *vec_oprnds2
,
1561 tree op3
, vec
<tree
> *vec_oprnds3
)
1563 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1564 op0
, vec_oprnds0
, NULL_TREE
,
1565 op1
, vec_oprnds1
, NULL_TREE
,
1566 op2
, vec_oprnds2
, NULL_TREE
,
1567 op3
, vec_oprnds3
, NULL_TREE
);
1570 /* Helper function called by vect_finish_replace_stmt and
1571 vect_finish_stmt_generation. Set the location of the new
1572 statement and create and return a stmt_vec_info for it. */
1575 vect_finish_stmt_generation_1 (vec_info
*,
1576 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1578 if (dump_enabled_p ())
1579 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1583 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1585 /* While EH edges will generally prevent vectorization, stmt might
1586 e.g. be in a must-not-throw region. Ensure newly created stmts
1587 that could throw are part of the same region. */
1588 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1589 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1590 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1593 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1596 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1597 which sets the same scalar result as STMT_INFO did. Create and return a
1598 stmt_vec_info for VEC_STMT. */
1601 vect_finish_replace_stmt (vec_info
*vinfo
,
1602 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1604 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1605 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1607 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1608 gsi_replace (&gsi
, vec_stmt
, true);
1610 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1613 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1614 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1617 vect_finish_stmt_generation (vec_info
*vinfo
,
1618 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1619 gimple_stmt_iterator
*gsi
)
1621 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1623 if (!gsi_end_p (*gsi
)
1624 && gimple_has_mem_ops (vec_stmt
))
1626 gimple
*at_stmt
= gsi_stmt (*gsi
);
1627 tree vuse
= gimple_vuse (at_stmt
);
1628 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1630 tree vdef
= gimple_vdef (at_stmt
);
1631 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1632 gimple_set_modified (vec_stmt
, true);
1633 /* If we have an SSA vuse and insert a store, update virtual
1634 SSA form to avoid triggering the renamer. Do so only
1635 if we can easily see all uses - which is what almost always
1636 happens with the way vectorized stmts are inserted. */
1637 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1638 && ((is_gimple_assign (vec_stmt
)
1639 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1640 || (is_gimple_call (vec_stmt
)
1641 && !(gimple_call_flags (vec_stmt
)
1642 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1644 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1645 gimple_set_vdef (vec_stmt
, new_vdef
);
1646 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1650 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1651 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1654 /* We want to vectorize a call to combined function CFN with function
1655 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1656 as the types of all inputs. Check whether this is possible using
1657 an internal function, returning its code if so or IFN_LAST if not. */
1660 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1661 tree vectype_out
, tree vectype_in
)
1664 if (internal_fn_p (cfn
))
1665 ifn
= as_internal_fn (cfn
);
1667 ifn
= associated_internal_fn (fndecl
);
1668 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1670 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1671 if (info
.vectorizable
)
1673 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1674 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1675 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1676 OPTIMIZE_FOR_SPEED
))
1684 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1685 gimple_stmt_iterator
*);
1687 /* Check whether a load or store statement in the loop described by
1688 LOOP_VINFO is possible in a loop using partial vectors. This is
1689 testing whether the vectorizer pass has the appropriate support,
1690 as well as whether the target does.
1692 VLS_TYPE says whether the statement is a load or store and VECTYPE
1693 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1694 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1695 says how the load or store is going to be implemented and GROUP_SIZE
1696 is the number of load or store statements in the containing group.
1697 If the access is a gather load or scatter store, GS_INFO describes
1698 its arguments. If the load or store is conditional, SCALAR_MASK is the
1699 condition under which it occurs.
1701 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1702 vectors is not supported, otherwise record the required rgroup control
1706 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo
, tree vectype
,
1708 vec_load_store_type vls_type
,
1710 vect_memory_access_type
1712 gather_scatter_info
*gs_info
,
1715 /* Invariant loads need no special support. */
1716 if (memory_access_type
== VMAT_INVARIANT
)
1719 unsigned int nvectors
;
1721 nvectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1723 nvectors
= vect_get_num_copies (loop_vinfo
, vectype
);
1725 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1726 machine_mode vecmode
= TYPE_MODE (vectype
);
1727 bool is_load
= (vls_type
== VLS_LOAD
);
1728 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1731 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1732 : !vect_store_lanes_supported (vectype
, group_size
, true))
1734 if (dump_enabled_p ())
1735 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1736 "can't operate on partial vectors because"
1737 " the target doesn't have an appropriate"
1738 " load/store-lanes instruction.\n");
1739 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1742 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1747 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1749 internal_fn ifn
= (is_load
1750 ? IFN_MASK_GATHER_LOAD
1751 : IFN_MASK_SCATTER_STORE
);
1752 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1753 gs_info
->memory_type
,
1754 gs_info
->offset_vectype
,
1757 if (dump_enabled_p ())
1758 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1759 "can't operate on partial vectors because"
1760 " the target doesn't have an appropriate"
1761 " gather load or scatter store instruction.\n");
1762 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1765 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1770 if (memory_access_type
!= VMAT_CONTIGUOUS
1771 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1773 /* Element X of the data must come from iteration i * VF + X of the
1774 scalar loop. We need more work to support other mappings. */
1775 if (dump_enabled_p ())
1776 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1777 "can't operate on partial vectors because an"
1778 " access isn't contiguous.\n");
1779 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1783 if (!VECTOR_MODE_P (vecmode
))
1785 if (dump_enabled_p ())
1786 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1787 "can't operate on partial vectors when emulating"
1788 " vector operations.\n");
1789 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1793 /* We might load more scalars than we need for permuting SLP loads.
1794 We checked in get_group_load_store_type that the extra elements
1795 don't leak into a new vector. */
1796 auto group_memory_nvectors
= [](poly_uint64 size
, poly_uint64 nunits
)
1798 unsigned int nvectors
;
1799 if (can_div_away_from_zero_p (size
, nunits
, &nvectors
))
1804 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1805 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1806 machine_mode mask_mode
;
1807 bool using_partial_vectors_p
= false;
1808 if (targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1809 && can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1811 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1812 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1813 using_partial_vectors_p
= true;
1817 if (get_len_load_store_mode (vecmode
, is_load
).exists (&vmode
))
1819 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1820 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
1821 unsigned factor
= (vecmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vecmode
);
1822 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, factor
);
1823 using_partial_vectors_p
= true;
1826 if (!using_partial_vectors_p
)
1828 if (dump_enabled_p ())
1829 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1830 "can't operate on partial vectors because the"
1831 " target doesn't have the appropriate partial"
1832 " vectorization load or store.\n");
1833 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1837 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1838 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1839 that needs to be applied to all loads and stores in a vectorized loop.
1840 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1841 otherwise return VEC_MASK & LOOP_MASK.
1843 MASK_TYPE is the type of both masks. If new statements are needed,
1844 insert them before GSI. */
1847 prepare_vec_mask (loop_vec_info loop_vinfo
, tree mask_type
, tree loop_mask
,
1848 tree vec_mask
, gimple_stmt_iterator
*gsi
)
1850 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1854 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1856 if (loop_vinfo
->vec_cond_masked_set
.contains ({ vec_mask
, loop_mask
}))
1859 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1860 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1861 vec_mask
, loop_mask
);
1863 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1867 /* Determine whether we can use a gather load or scatter store to vectorize
1868 strided load or store STMT_INFO by truncating the current offset to a
1869 smaller width. We need to be able to construct an offset vector:
1871 { 0, X, X*2, X*3, ... }
1873 without loss of precision, where X is STMT_INFO's DR_STEP.
1875 Return true if this is possible, describing the gather load or scatter
1876 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1879 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1880 loop_vec_info loop_vinfo
, bool masked_p
,
1881 gather_scatter_info
*gs_info
)
1883 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1884 data_reference
*dr
= dr_info
->dr
;
1885 tree step
= DR_STEP (dr
);
1886 if (TREE_CODE (step
) != INTEGER_CST
)
1888 /* ??? Perhaps we could use range information here? */
1889 if (dump_enabled_p ())
1890 dump_printf_loc (MSG_NOTE
, vect_location
,
1891 "cannot truncate variable step.\n");
1895 /* Get the number of bits in an element. */
1896 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1897 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1898 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1900 /* Set COUNT to the upper limit on the number of elements - 1.
1901 Start with the maximum vectorization factor. */
1902 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1904 /* Try lowering COUNT to the number of scalar latch iterations. */
1905 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1906 widest_int max_iters
;
1907 if (max_loop_iterations (loop
, &max_iters
)
1908 && max_iters
< count
)
1909 count
= max_iters
.to_shwi ();
1911 /* Try scales of 1 and the element size. */
1912 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1913 wi::overflow_type overflow
= wi::OVF_NONE
;
1914 for (int i
= 0; i
< 2; ++i
)
1916 int scale
= scales
[i
];
1918 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1921 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1922 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1925 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1926 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1928 /* Find the narrowest viable offset type. */
1929 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1930 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1933 /* See whether the target supports the operation with an offset
1934 no narrower than OFFSET_TYPE. */
1935 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1936 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1937 vectype
, memory_type
, offset_type
, scale
,
1938 &gs_info
->ifn
, &gs_info
->offset_vectype
)
1939 || gs_info
->ifn
== IFN_LAST
)
1942 gs_info
->decl
= NULL_TREE
;
1943 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1944 but we don't need to store that here. */
1945 gs_info
->base
= NULL_TREE
;
1946 gs_info
->element_type
= TREE_TYPE (vectype
);
1947 gs_info
->offset
= fold_convert (offset_type
, step
);
1948 gs_info
->offset_dt
= vect_constant_def
;
1949 gs_info
->scale
= scale
;
1950 gs_info
->memory_type
= memory_type
;
1954 if (overflow
&& dump_enabled_p ())
1955 dump_printf_loc (MSG_NOTE
, vect_location
,
1956 "truncating gather/scatter offset to %d bits"
1957 " might change its value.\n", element_bits
);
1962 /* Return true if we can use gather/scatter internal functions to
1963 vectorize STMT_INFO, which is a grouped or strided load or store.
1964 MASKED_P is true if load or store is conditional. When returning
1965 true, fill in GS_INFO with the information required to perform the
1969 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1970 loop_vec_info loop_vinfo
, bool masked_p
,
1971 gather_scatter_info
*gs_info
)
1973 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1974 || gs_info
->ifn
== IFN_LAST
)
1975 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1978 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1979 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1981 gcc_assert (TYPE_PRECISION (new_offset_type
)
1982 >= TYPE_PRECISION (old_offset_type
));
1983 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1985 if (dump_enabled_p ())
1986 dump_printf_loc (MSG_NOTE
, vect_location
,
1987 "using gather/scatter for strided/grouped access,"
1988 " scale = %d\n", gs_info
->scale
);
1993 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1994 elements with a known constant step. Return -1 if that step
1995 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1998 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
2000 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2001 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
2005 /* If the target supports a permute mask that reverses the elements in
2006 a vector of type VECTYPE, return that mask, otherwise return null. */
2009 perm_mask_for_reverse (tree vectype
)
2011 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2013 /* The encoding has a single stepped pattern. */
2014 vec_perm_builder
sel (nunits
, 1, 3);
2015 for (int i
= 0; i
< 3; ++i
)
2016 sel
.quick_push (nunits
- 1 - i
);
2018 vec_perm_indices
indices (sel
, 1, nunits
);
2019 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), TYPE_MODE (vectype
),
2022 return vect_gen_perm_mask_checked (vectype
, indices
);
2025 /* A subroutine of get_load_store_type, with a subset of the same
2026 arguments. Handle the case where STMT_INFO is a load or store that
2027 accesses consecutive elements with a negative step. Sets *POFFSET
2028 to the offset to be applied to the DR for the first access. */
2030 static vect_memory_access_type
2031 get_negative_load_store_type (vec_info
*vinfo
,
2032 stmt_vec_info stmt_info
, tree vectype
,
2033 vec_load_store_type vls_type
,
2034 unsigned int ncopies
, poly_int64
*poffset
)
2036 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2037 dr_alignment_support alignment_support_scheme
;
2041 if (dump_enabled_p ())
2042 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2043 "multiple types with negative step.\n");
2044 return VMAT_ELEMENTWISE
;
2047 /* For backward running DRs the first access in vectype actually is
2048 N-1 elements before the address of the DR. */
2049 *poffset
= ((-TYPE_VECTOR_SUBPARTS (vectype
) + 1)
2050 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype
))));
2052 int misalignment
= dr_misalignment (dr_info
, vectype
, *poffset
);
2053 alignment_support_scheme
2054 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
, misalignment
);
2055 if (alignment_support_scheme
!= dr_aligned
2056 && alignment_support_scheme
!= dr_unaligned_supported
)
2058 if (dump_enabled_p ())
2059 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2060 "negative step but alignment required.\n");
2062 return VMAT_ELEMENTWISE
;
2065 if (vls_type
== VLS_STORE_INVARIANT
)
2067 if (dump_enabled_p ())
2068 dump_printf_loc (MSG_NOTE
, vect_location
,
2069 "negative step with invariant source;"
2070 " no permute needed.\n");
2071 return VMAT_CONTIGUOUS_DOWN
;
2074 if (!perm_mask_for_reverse (vectype
))
2076 if (dump_enabled_p ())
2077 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2078 "negative step and reversing not supported.\n");
2080 return VMAT_ELEMENTWISE
;
2083 return VMAT_CONTIGUOUS_REVERSE
;
2086 /* STMT_INFO is either a masked or unconditional store. Return the value
2090 vect_get_store_rhs (stmt_vec_info stmt_info
)
2092 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2094 gcc_assert (gimple_assign_single_p (assign
));
2095 return gimple_assign_rhs1 (assign
);
2097 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2099 internal_fn ifn
= gimple_call_internal_fn (call
);
2100 int index
= internal_fn_stored_value_index (ifn
);
2101 gcc_assert (index
>= 0);
2102 return gimple_call_arg (call
, index
);
2107 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2109 This function returns a vector type which can be composed with NETLS pieces,
2110 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2111 same vector size as the return vector. It checks target whether supports
2112 pieces-size vector mode for construction firstly, if target fails to, check
2113 pieces-size scalar mode for construction further. It returns NULL_TREE if
2114 fails to find the available composition.
2116 For example, for (vtype=V16QI, nelts=4), we can probably get:
2117 - V16QI with PTYPE V4QI.
2118 - V4SI with PTYPE SI.
2122 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
2124 gcc_assert (VECTOR_TYPE_P (vtype
));
2125 gcc_assert (known_gt (nelts
, 0U));
2127 machine_mode vmode
= TYPE_MODE (vtype
);
2128 if (!VECTOR_MODE_P (vmode
))
2131 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
2132 unsigned int pbsize
;
2133 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
2135 /* First check if vec_init optab supports construction from
2136 vector pieces directly. */
2137 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
2138 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
2140 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
2141 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
2142 != CODE_FOR_nothing
))
2144 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
2148 /* Otherwise check if exists an integer type of the same piece size and
2149 if vec_init optab supports construction from it directly. */
2150 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
2151 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
2152 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
2153 != CODE_FOR_nothing
))
2155 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
2156 return build_vector_type (*ptype
, nelts
);
2163 /* A subroutine of get_load_store_type, with a subset of the same
2164 arguments. Handle the case where STMT_INFO is part of a grouped load
2167 For stores, the statements in the group are all consecutive
2168 and there is no gap at the end. For loads, the statements in the
2169 group might not be consecutive; there can be gaps between statements
2170 as well as at the end. */
2173 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2174 tree vectype
, slp_tree slp_node
,
2175 bool masked_p
, vec_load_store_type vls_type
,
2176 vect_memory_access_type
*memory_access_type
,
2177 poly_int64
*poffset
,
2178 dr_alignment_support
*alignment_support_scheme
,
2180 gather_scatter_info
*gs_info
)
2182 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2183 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2184 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2185 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2186 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2187 bool single_element_p
= (stmt_info
== first_stmt_info
2188 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2189 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2190 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2192 /* True if the vectorized statements would access beyond the last
2193 statement in the group. */
2194 bool overrun_p
= false;
2196 /* True if we can cope with such overrun by peeling for gaps, so that
2197 there is at least one final scalar iteration after the vector loop. */
2198 bool can_overrun_p
= (!masked_p
2199 && vls_type
== VLS_LOAD
2203 /* There can only be a gap at the end of the group if the stride is
2204 known at compile time. */
2205 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2207 /* Stores can't yet have gaps. */
2208 gcc_assert (slp_node
|| vls_type
== VLS_LOAD
|| gap
== 0);
2212 /* For SLP vectorization we directly vectorize a subchain
2213 without permutation. */
2214 if (! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
2216 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node
)[0]);
2217 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2219 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2220 separated by the stride, until we have a complete vector.
2221 Fall back to scalar accesses if that isn't possible. */
2222 if (multiple_p (nunits
, group_size
))
2223 *memory_access_type
= VMAT_STRIDED_SLP
;
2225 *memory_access_type
= VMAT_ELEMENTWISE
;
2229 overrun_p
= loop_vinfo
&& gap
!= 0;
2230 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2232 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2233 "Grouped store with gaps requires"
2234 " non-consecutive accesses\n");
2237 /* An overrun is fine if the trailing elements are smaller
2238 than the alignment boundary B. Every vector access will
2239 be a multiple of B and so we are guaranteed to access a
2240 non-gap element in the same B-sized block. */
2242 && gap
< (vect_known_alignment_in_bytes (first_dr_info
,
2244 / vect_get_scalar_dr_size (first_dr_info
)))
2247 /* If the gap splits the vector in half and the target
2248 can do half-vector operations avoid the epilogue peeling
2249 by simply loading half of the vector only. Usually
2250 the construction with an upper zero half will be elided. */
2251 dr_alignment_support alss
;
2252 int misalign
= dr_misalignment (first_dr_info
, vectype
);
2256 && (((alss
= vect_supportable_dr_alignment (vinfo
, first_dr_info
,
2257 vectype
, misalign
)))
2259 || alss
== dr_unaligned_supported
)
2260 && known_eq (nunits
, (group_size
- gap
) * 2)
2261 && known_eq (nunits
, group_size
)
2262 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2266 if (overrun_p
&& !can_overrun_p
)
2268 if (dump_enabled_p ())
2269 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2270 "Peeling for outer loop is not supported\n");
2273 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2276 if (single_element_p
)
2277 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2278 only correct for single element "interleaving" SLP. */
2279 *memory_access_type
= get_negative_load_store_type
2280 (vinfo
, stmt_info
, vectype
, vls_type
, 1, poffset
);
2283 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2284 separated by the stride, until we have a complete vector.
2285 Fall back to scalar accesses if that isn't possible. */
2286 if (multiple_p (nunits
, group_size
))
2287 *memory_access_type
= VMAT_STRIDED_SLP
;
2289 *memory_access_type
= VMAT_ELEMENTWISE
;
2294 gcc_assert (!loop_vinfo
|| cmp
> 0);
2295 *memory_access_type
= VMAT_CONTIGUOUS
;
2298 /* When we have a contiguous access across loop iterations
2299 but the access in the loop doesn't cover the full vector
2300 we can end up with no gap recorded but still excess
2301 elements accessed, see PR103116. Make sure we peel for
2302 gaps if necessary and sufficient and give up if not. */
2304 && *memory_access_type
== VMAT_CONTIGUOUS
2305 && SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
2306 && !multiple_p (group_size
* LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
2309 unsigned HOST_WIDE_INT cnunits
, cvf
;
2311 || !nunits
.is_constant (&cnunits
)
2312 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&cvf
)
2313 /* Peeling for gaps assumes that a single scalar iteration
2314 is enough to make sure the last vector iteration doesn't
2315 access excess elements.
2316 ??? Enhancements include peeling multiple iterations
2317 or using masked loads with a static mask. */
2318 || (group_size
* cvf
) % cnunits
+ group_size
< cnunits
)
2320 if (dump_enabled_p ())
2321 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2322 "peeling for gaps insufficient for "
2332 /* We can always handle this case using elementwise accesses,
2333 but see if something more efficient is available. */
2334 *memory_access_type
= VMAT_ELEMENTWISE
;
2336 /* If there is a gap at the end of the group then these optimizations
2337 would access excess elements in the last iteration. */
2338 bool would_overrun_p
= (gap
!= 0);
2339 /* An overrun is fine if the trailing elements are smaller than the
2340 alignment boundary B. Every vector access will be a multiple of B
2341 and so we are guaranteed to access a non-gap element in the
2342 same B-sized block. */
2345 && gap
< (vect_known_alignment_in_bytes (first_dr_info
, vectype
)
2346 / vect_get_scalar_dr_size (first_dr_info
)))
2347 would_overrun_p
= false;
2349 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2350 && (can_overrun_p
|| !would_overrun_p
)
2351 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2353 /* First cope with the degenerate case of a single-element
2355 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2358 /* Otherwise try using LOAD/STORE_LANES. */
2359 else if (vls_type
== VLS_LOAD
2360 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2361 : vect_store_lanes_supported (vectype
, group_size
,
2364 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2365 overrun_p
= would_overrun_p
;
2368 /* If that fails, try using permuting loads. */
2369 else if (vls_type
== VLS_LOAD
2370 ? vect_grouped_load_supported (vectype
, single_element_p
,
2372 : vect_grouped_store_supported (vectype
, group_size
))
2374 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2375 overrun_p
= would_overrun_p
;
2379 /* As a last resort, trying using a gather load or scatter store.
2381 ??? Although the code can handle all group sizes correctly,
2382 it probably isn't a win to use separate strided accesses based
2383 on nearby locations. Or, even if it's a win over scalar code,
2384 it might not be a win over vectorizing at a lower VF, if that
2385 allows us to use contiguous accesses. */
2386 if (*memory_access_type
== VMAT_ELEMENTWISE
2389 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2391 *memory_access_type
= VMAT_GATHER_SCATTER
;
2394 if (*memory_access_type
== VMAT_GATHER_SCATTER
2395 || *memory_access_type
== VMAT_ELEMENTWISE
)
2397 *alignment_support_scheme
= dr_unaligned_supported
;
2398 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2402 *misalignment
= dr_misalignment (first_dr_info
, vectype
, *poffset
);
2403 *alignment_support_scheme
2404 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, vectype
,
2408 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2410 /* STMT is the leader of the group. Check the operands of all the
2411 stmts of the group. */
2412 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2413 while (next_stmt_info
)
2415 tree op
= vect_get_store_rhs (next_stmt_info
);
2416 enum vect_def_type dt
;
2417 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2419 if (dump_enabled_p ())
2420 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2421 "use not simple.\n");
2424 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2430 gcc_assert (can_overrun_p
);
2431 if (dump_enabled_p ())
2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2433 "Data access with gaps requires scalar "
2435 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2441 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2442 if there is a memory access type that the vectorized form can use,
2443 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2444 or scatters, fill in GS_INFO accordingly. In addition
2445 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2446 the target does not support the alignment scheme. *MISALIGNMENT
2447 is set according to the alignment of the access (including
2448 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2450 SLP says whether we're performing SLP rather than loop vectorization.
2451 MASKED_P is true if the statement is conditional on a vectorized mask.
2452 VECTYPE is the vector type that the vectorized statements will use.
2453 NCOPIES is the number of vector statements that will be needed. */
2456 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2457 tree vectype
, slp_tree slp_node
,
2458 bool masked_p
, vec_load_store_type vls_type
,
2459 unsigned int ncopies
,
2460 vect_memory_access_type
*memory_access_type
,
2461 poly_int64
*poffset
,
2462 dr_alignment_support
*alignment_support_scheme
,
2464 gather_scatter_info
*gs_info
)
2466 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2467 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2468 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2470 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2472 *memory_access_type
= VMAT_GATHER_SCATTER
;
2473 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2475 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2476 &gs_info
->offset_dt
,
2477 &gs_info
->offset_vectype
))
2479 if (dump_enabled_p ())
2480 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2481 "%s index use not simple.\n",
2482 vls_type
== VLS_LOAD
? "gather" : "scatter");
2485 else if (gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
2487 if (vls_type
!= VLS_LOAD
)
2489 if (dump_enabled_p ())
2490 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2491 "unsupported emulated scatter.\n");
2494 else if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant ()
2495 || !TYPE_VECTOR_SUBPARTS
2496 (gs_info
->offset_vectype
).is_constant ()
2497 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2498 (gs_info
->offset_vectype
),
2499 TYPE_VECTOR_SUBPARTS (vectype
)))
2501 if (dump_enabled_p ())
2502 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2503 "unsupported vector types for emulated "
2508 /* Gather-scatter accesses perform only component accesses, alignment
2509 is irrelevant for them. */
2510 *alignment_support_scheme
= dr_unaligned_supported
;
2512 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2514 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
,
2516 vls_type
, memory_access_type
, poffset
,
2517 alignment_support_scheme
,
2518 misalignment
, gs_info
))
2521 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2523 gcc_assert (!slp_node
);
2525 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2527 *memory_access_type
= VMAT_GATHER_SCATTER
;
2529 *memory_access_type
= VMAT_ELEMENTWISE
;
2530 /* Alignment is irrelevant here. */
2531 *alignment_support_scheme
= dr_unaligned_supported
;
2535 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2538 gcc_assert (vls_type
== VLS_LOAD
);
2539 *memory_access_type
= VMAT_INVARIANT
;
2540 /* Invariant accesses perform only component accesses, alignment
2541 is irrelevant for them. */
2542 *alignment_support_scheme
= dr_unaligned_supported
;
2547 *memory_access_type
= get_negative_load_store_type
2548 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
, poffset
);
2550 *memory_access_type
= VMAT_CONTIGUOUS
;
2551 *misalignment
= dr_misalignment (STMT_VINFO_DR_INFO (stmt_info
),
2553 *alignment_support_scheme
2554 = vect_supportable_dr_alignment (vinfo
,
2555 STMT_VINFO_DR_INFO (stmt_info
),
2556 vectype
, *misalignment
);
2560 if ((*memory_access_type
== VMAT_ELEMENTWISE
2561 || *memory_access_type
== VMAT_STRIDED_SLP
)
2562 && !nunits
.is_constant ())
2564 if (dump_enabled_p ())
2565 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2566 "Not using elementwise accesses due to variable "
2567 "vectorization factor.\n");
2571 if (*alignment_support_scheme
== dr_unaligned_unsupported
)
2573 if (dump_enabled_p ())
2574 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2575 "unsupported unaligned access\n");
2579 /* FIXME: At the moment the cost model seems to underestimate the
2580 cost of using elementwise accesses. This check preserves the
2581 traditional behavior until that can be fixed. */
2582 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2583 if (!first_stmt_info
)
2584 first_stmt_info
= stmt_info
;
2585 if (*memory_access_type
== VMAT_ELEMENTWISE
2586 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2587 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2588 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2589 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2591 if (dump_enabled_p ())
2592 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2593 "not falling back to elementwise accesses\n");
2599 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2600 conditional operation STMT_INFO. When returning true, store the mask
2601 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2602 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2603 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2606 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2607 slp_tree slp_node
, unsigned mask_index
,
2608 tree
*mask
, slp_tree
*mask_node
,
2609 vect_def_type
*mask_dt_out
, tree
*mask_vectype_out
)
2611 enum vect_def_type mask_dt
;
2613 slp_tree mask_node_1
;
2614 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, mask_index
,
2615 mask
, &mask_node_1
, &mask_dt
, &mask_vectype
))
2617 if (dump_enabled_p ())
2618 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2619 "mask use not simple.\n");
2623 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask
)))
2625 if (dump_enabled_p ())
2626 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2627 "mask argument is not a boolean.\n");
2631 /* If the caller is not prepared for adjusting an external/constant
2632 SLP mask vector type fail. */
2635 && SLP_TREE_DEF_TYPE (mask_node_1
) != vect_internal_def
)
2637 if (dump_enabled_p ())
2638 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2639 "SLP mask argument is not vectorized.\n");
2643 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2645 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2647 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2649 if (dump_enabled_p ())
2650 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2651 "could not find an appropriate vector mask type.\n");
2655 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2656 TYPE_VECTOR_SUBPARTS (vectype
)))
2658 if (dump_enabled_p ())
2659 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2660 "vector mask type %T"
2661 " does not match vector data type %T.\n",
2662 mask_vectype
, vectype
);
2667 *mask_dt_out
= mask_dt
;
2668 *mask_vectype_out
= mask_vectype
;
2670 *mask_node
= mask_node_1
;
2674 /* Return true if stored value RHS is suitable for vectorizing store
2675 statement STMT_INFO. When returning true, store the type of the
2676 definition in *RHS_DT_OUT, the type of the vectorized store value in
2677 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2680 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2681 slp_tree slp_node
, tree rhs
,
2682 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2683 vec_load_store_type
*vls_type_out
)
2685 /* In the case this is a store from a constant make sure
2686 native_encode_expr can handle it. */
2687 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2689 if (dump_enabled_p ())
2690 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2691 "cannot encode constant as a byte sequence.\n");
2696 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2698 if (gimple_call_internal_p (call
)
2699 && internal_store_fn_p (gimple_call_internal_fn (call
)))
2700 op_no
= internal_fn_stored_value_index (gimple_call_internal_fn (call
));
2703 enum vect_def_type rhs_dt
;
2706 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, op_no
,
2707 &rhs
, &slp_op
, &rhs_dt
, &rhs_vectype
))
2709 if (dump_enabled_p ())
2710 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2711 "use not simple.\n");
2715 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2716 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2718 if (dump_enabled_p ())
2719 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2720 "incompatible vector types.\n");
2724 *rhs_dt_out
= rhs_dt
;
2725 *rhs_vectype_out
= rhs_vectype
;
2726 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2727 *vls_type_out
= VLS_STORE_INVARIANT
;
2729 *vls_type_out
= VLS_STORE
;
2733 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2734 Note that we support masks with floating-point type, in which case the
2735 floats are interpreted as a bitmask. */
2738 vect_build_all_ones_mask (vec_info
*vinfo
,
2739 stmt_vec_info stmt_info
, tree masktype
)
2741 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2742 return build_int_cst (masktype
, -1);
2743 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2745 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2746 mask
= build_vector_from_val (masktype
, mask
);
2747 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2749 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2753 for (int j
= 0; j
< 6; ++j
)
2755 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2756 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2757 mask
= build_vector_from_val (masktype
, mask
);
2758 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2763 /* Build an all-zero merge value of type VECTYPE while vectorizing
2764 STMT_INFO as a gather load. */
2767 vect_build_zero_merge_argument (vec_info
*vinfo
,
2768 stmt_vec_info stmt_info
, tree vectype
)
2771 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2772 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2773 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2777 for (int j
= 0; j
< 6; ++j
)
2779 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2780 merge
= build_real (TREE_TYPE (vectype
), r
);
2784 merge
= build_vector_from_val (vectype
, merge
);
2785 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2788 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2789 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2790 the gather load operation. If the load is conditional, MASK is the
2791 unvectorized condition and MASK_DT is its definition type, otherwise
2795 vect_build_gather_load_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2796 gimple_stmt_iterator
*gsi
,
2798 gather_scatter_info
*gs_info
,
2801 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2802 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2803 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2804 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2805 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2806 edge pe
= loop_preheader_edge (loop
);
2807 enum { NARROW
, NONE
, WIDEN
} modifier
;
2808 poly_uint64 gather_off_nunits
2809 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2811 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2812 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2813 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2814 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2815 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2816 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2817 tree scaletype
= TREE_VALUE (arglist
);
2818 tree real_masktype
= masktype
;
2819 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2821 || TREE_CODE (masktype
) == INTEGER_TYPE
2822 || types_compatible_p (srctype
, masktype
)));
2824 masktype
= truth_type_for (srctype
);
2826 tree mask_halftype
= masktype
;
2827 tree perm_mask
= NULL_TREE
;
2828 tree mask_perm_mask
= NULL_TREE
;
2829 if (known_eq (nunits
, gather_off_nunits
))
2831 else if (known_eq (nunits
* 2, gather_off_nunits
))
2835 /* Currently widening gathers and scatters are only supported for
2836 fixed-length vectors. */
2837 int count
= gather_off_nunits
.to_constant ();
2838 vec_perm_builder
sel (count
, count
, 1);
2839 for (int i
= 0; i
< count
; ++i
)
2840 sel
.quick_push (i
| (count
/ 2));
2842 vec_perm_indices
indices (sel
, 1, count
);
2843 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2846 else if (known_eq (nunits
, gather_off_nunits
* 2))
2850 /* Currently narrowing gathers and scatters are only supported for
2851 fixed-length vectors. */
2852 int count
= nunits
.to_constant ();
2853 vec_perm_builder
sel (count
, count
, 1);
2854 sel
.quick_grow (count
);
2855 for (int i
= 0; i
< count
; ++i
)
2856 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2857 vec_perm_indices
indices (sel
, 2, count
);
2858 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2862 if (mask
&& VECTOR_TYPE_P (real_masktype
))
2864 for (int i
= 0; i
< count
; ++i
)
2865 sel
[i
] = i
| (count
/ 2);
2866 indices
.new_vector (sel
, 2, count
);
2867 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2870 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2875 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2876 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2878 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2879 if (!is_gimple_min_invariant (ptr
))
2882 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2883 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2884 gcc_assert (!new_bb
);
2887 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2889 tree vec_oprnd0
= NULL_TREE
;
2890 tree vec_mask
= NULL_TREE
;
2891 tree src_op
= NULL_TREE
;
2892 tree mask_op
= NULL_TREE
;
2893 tree prev_res
= NULL_TREE
;
2897 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2898 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2901 auto_vec
<tree
> vec_oprnds0
;
2902 auto_vec
<tree
> vec_masks
;
2903 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2904 modifier
== WIDEN
? ncopies
/ 2 : ncopies
,
2905 gs_info
->offset
, &vec_oprnds0
);
2907 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2908 modifier
== NARROW
? ncopies
/ 2 : ncopies
,
2909 mask
, &vec_masks
, masktype
);
2910 for (int j
= 0; j
< ncopies
; ++j
)
2913 if (modifier
== WIDEN
&& (j
& 1))
2914 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
2915 perm_mask
, stmt_info
, gsi
);
2917 op
= vec_oprnd0
= vec_oprnds0
[modifier
== WIDEN
? j
/ 2 : j
];
2919 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2921 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2922 TYPE_VECTOR_SUBPARTS (idxtype
)));
2923 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2924 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2925 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2926 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2932 if (mask_perm_mask
&& (j
& 1))
2933 mask_op
= permute_vec_elements (vinfo
, mask_op
, mask_op
,
2934 mask_perm_mask
, stmt_info
, gsi
);
2937 if (modifier
== NARROW
)
2940 vec_mask
= vec_masks
[j
/ 2];
2943 vec_mask
= vec_masks
[j
];
2946 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2948 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2949 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2950 gcc_assert (known_eq (sub1
, sub2
));
2951 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2952 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2954 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2955 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2959 if (modifier
== NARROW
&& !VECTOR_TYPE_P (real_masktype
))
2961 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2963 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2964 : VEC_UNPACK_LO_EXPR
,
2966 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2972 tree mask_arg
= mask_op
;
2973 if (masktype
!= real_masktype
)
2975 tree utype
, optype
= TREE_TYPE (mask_op
);
2976 if (VECTOR_TYPE_P (real_masktype
)
2977 || TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2978 utype
= real_masktype
;
2980 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2981 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2982 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2984 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2985 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2987 if (!useless_type_conversion_p (real_masktype
, utype
))
2989 gcc_assert (TYPE_PRECISION (utype
)
2990 <= TYPE_PRECISION (real_masktype
));
2991 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2992 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2993 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2996 src_op
= build_zero_cst (srctype
);
2998 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
3001 if (!useless_type_conversion_p (vectype
, rettype
))
3003 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
3004 TYPE_VECTOR_SUBPARTS (rettype
)));
3005 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
3006 gimple_call_set_lhs (new_stmt
, op
);
3007 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3008 var
= make_ssa_name (vec_dest
);
3009 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
3010 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
3011 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3015 var
= make_ssa_name (vec_dest
, new_stmt
);
3016 gimple_call_set_lhs (new_stmt
, var
);
3017 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3020 if (modifier
== NARROW
)
3027 var
= permute_vec_elements (vinfo
, prev_res
, var
, perm_mask
,
3029 new_stmt
= SSA_NAME_DEF_STMT (var
);
3032 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3034 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3037 /* Prepare the base and offset in GS_INFO for vectorization.
3038 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3039 to the vectorized offset argument for the first copy of STMT_INFO.
3040 STMT_INFO is the statement described by GS_INFO and LOOP is the
3044 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo
,
3045 class loop
*loop
, stmt_vec_info stmt_info
,
3046 slp_tree slp_node
, gather_scatter_info
*gs_info
,
3047 tree
*dataref_ptr
, vec
<tree
> *vec_offset
)
3049 gimple_seq stmts
= NULL
;
3050 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
3054 edge pe
= loop_preheader_edge (loop
);
3055 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3056 gcc_assert (!new_bb
);
3059 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_offset
);
3063 = vect_get_num_copies (loop_vinfo
, gs_info
->offset_vectype
);
3064 vect_get_vec_defs_for_operand (loop_vinfo
, stmt_info
, ncopies
,
3065 gs_info
->offset
, vec_offset
,
3066 gs_info
->offset_vectype
);
3070 /* Prepare to implement a grouped or strided load or store using
3071 the gather load or scatter store operation described by GS_INFO.
3072 STMT_INFO is the load or store statement.
3074 Set *DATAREF_BUMP to the amount that should be added to the base
3075 address after each copy of the vectorized statement. Set *VEC_OFFSET
3076 to an invariant offset vector in which element I has the value
3077 I * DR_STEP / SCALE. */
3080 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
3081 loop_vec_info loop_vinfo
,
3082 gather_scatter_info
*gs_info
,
3083 tree
*dataref_bump
, tree
*vec_offset
)
3085 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3086 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3088 tree bump
= size_binop (MULT_EXPR
,
3089 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
3090 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
3091 *dataref_bump
= cse_and_gimplify_to_preheader (loop_vinfo
, bump
);
3093 /* The offset given in GS_INFO can have pointer type, so use the element
3094 type of the vector instead. */
3095 tree offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
3097 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3098 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
3099 ssize_int (gs_info
->scale
));
3100 step
= fold_convert (offset_type
, step
);
3102 /* Create {0, X, X*2, X*3, ...}. */
3103 tree offset
= fold_build2 (VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
3104 build_zero_cst (offset_type
), step
);
3105 *vec_offset
= cse_and_gimplify_to_preheader (loop_vinfo
, offset
);
3108 /* Return the amount that should be added to a vector pointer to move
3109 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3110 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3114 vect_get_data_ptr_increment (vec_info
*vinfo
,
3115 dr_vec_info
*dr_info
, tree aggr_type
,
3116 vect_memory_access_type memory_access_type
)
3118 if (memory_access_type
== VMAT_INVARIANT
)
3119 return size_zero_node
;
3121 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3122 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3123 if (tree_int_cst_sgn (step
) == -1)
3124 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3128 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3131 vectorizable_bswap (vec_info
*vinfo
,
3132 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3133 gimple
**vec_stmt
, slp_tree slp_node
,
3135 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3138 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3139 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3142 op
= gimple_call_arg (stmt
, 0);
3143 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3144 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3146 /* Multiple types in SLP are handled by creating the appropriate number of
3147 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3152 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3154 gcc_assert (ncopies
>= 1);
3156 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3160 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3161 unsigned word_bytes
;
3162 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3165 /* The encoding uses one stepped pattern for each byte in the word. */
3166 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3167 for (unsigned i
= 0; i
< 3; ++i
)
3168 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3169 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3171 vec_perm_indices
indices (elts
, 1, num_bytes
);
3172 machine_mode vmode
= TYPE_MODE (char_vectype
);
3173 if (!can_vec_perm_const_p (vmode
, vmode
, indices
))
3179 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3181 if (dump_enabled_p ())
3182 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3183 "incompatible vector types for invariants\n");
3187 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3188 DUMP_VECT_SCOPE ("vectorizable_bswap");
3189 record_stmt_cost (cost_vec
,
3190 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3191 record_stmt_cost (cost_vec
,
3193 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
,
3194 vec_perm
, stmt_info
, 0, vect_body
);
3198 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3201 vec
<tree
> vec_oprnds
= vNULL
;
3202 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
3204 /* Arguments are ready. create the new vector stmt. */
3207 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3210 tree tem
= make_ssa_name (char_vectype
);
3211 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3212 char_vectype
, vop
));
3213 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3214 tree tem2
= make_ssa_name (char_vectype
);
3215 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3216 tem
, tem
, bswap_vconst
);
3217 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3218 tem
= make_ssa_name (vectype
);
3219 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3221 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3223 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3225 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3229 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3231 vec_oprnds
.release ();
3235 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3236 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3237 in a single step. On success, store the binary pack code in
3241 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3242 tree_code
*convert_code
)
3244 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3245 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3249 int multi_step_cvt
= 0;
3250 auto_vec
<tree
, 8> interm_types
;
3251 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3252 &code
, &multi_step_cvt
, &interm_types
)
3256 *convert_code
= code
;
3260 /* Function vectorizable_call.
3262 Check if STMT_INFO performs a function call that can be vectorized.
3263 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3264 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3265 Return true if STMT_INFO is vectorizable in this way. */
3268 vectorizable_call (vec_info
*vinfo
,
3269 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3270 gimple
**vec_stmt
, slp_tree slp_node
,
3271 stmt_vector_for_cost
*cost_vec
)
3277 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3278 tree vectype_out
, vectype_in
;
3279 poly_uint64 nunits_in
;
3280 poly_uint64 nunits_out
;
3281 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3282 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3283 tree fndecl
, new_temp
, rhs_type
;
3284 enum vect_def_type dt
[4]
3285 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3286 vect_unknown_def_type
};
3287 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3288 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3289 int ndts
= ARRAY_SIZE (dt
);
3291 auto_vec
<tree
, 8> vargs
;
3292 enum { NARROW
, NONE
, WIDEN
} modifier
;
3296 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3299 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3303 /* Is STMT_INFO a vectorizable call? */
3304 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3308 if (gimple_call_internal_p (stmt
)
3309 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3310 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3311 /* Handled by vectorizable_load and vectorizable_store. */
3314 if (gimple_call_lhs (stmt
) == NULL_TREE
3315 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3318 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3320 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3322 /* Process function arguments. */
3323 rhs_type
= NULL_TREE
;
3324 vectype_in
= NULL_TREE
;
3325 nargs
= gimple_call_num_args (stmt
);
3327 /* Bail out if the function has more than four arguments, we do not have
3328 interesting builtin functions to vectorize with more than two arguments
3329 except for fma. No arguments is also not good. */
3330 if (nargs
== 0 || nargs
> 4)
3333 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3334 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3335 if (cfn
== CFN_GOMP_SIMD_LANE
)
3338 rhs_type
= unsigned_type_node
;
3342 if (internal_fn_p (cfn
))
3343 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3345 for (i
= 0; i
< nargs
; i
++)
3347 if ((int) i
== mask_opno
)
3349 if (!vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_opno
,
3350 &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3355 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3356 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3358 if (dump_enabled_p ())
3359 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3360 "use not simple.\n");
3364 /* We can only handle calls with arguments of the same type. */
3366 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3368 if (dump_enabled_p ())
3369 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3370 "argument types differ.\n");
3374 rhs_type
= TREE_TYPE (op
);
3377 vectype_in
= vectypes
[i
];
3378 else if (vectypes
[i
]
3379 && !types_compatible_p (vectypes
[i
], vectype_in
))
3381 if (dump_enabled_p ())
3382 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3383 "argument vector types differ.\n");
3387 /* If all arguments are external or constant defs, infer the vector type
3388 from the scalar type. */
3390 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3392 gcc_assert (vectype_in
);
3395 if (dump_enabled_p ())
3396 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3397 "no vectype for scalar type %T\n", rhs_type
);
3401 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3402 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3403 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3404 by a pack of the two vectors into an SI vector. We would need
3405 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3406 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3408 if (dump_enabled_p ())
3409 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3410 "mismatched vector sizes %T and %T\n",
3411 vectype_in
, vectype_out
);
3415 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3416 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3418 if (dump_enabled_p ())
3419 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3420 "mixed mask and nonmask vector types\n");
3425 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3426 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3427 if (known_eq (nunits_in
* 2, nunits_out
))
3429 else if (known_eq (nunits_out
, nunits_in
))
3431 else if (known_eq (nunits_out
* 2, nunits_in
))
3436 /* We only handle functions that do not read or clobber memory. */
3437 if (gimple_vuse (stmt
))
3439 if (dump_enabled_p ())
3440 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3441 "function reads from or writes to memory.\n");
3445 /* For now, we only vectorize functions if a target specific builtin
3446 is available. TODO -- in some cases, it might be profitable to
3447 insert the calls for pieces of the vector, in order to be able
3448 to vectorize other operations in the loop. */
3450 internal_fn ifn
= IFN_LAST
;
3451 tree callee
= gimple_call_fndecl (stmt
);
3453 /* First try using an internal function. */
3454 tree_code convert_code
= ERROR_MARK
;
3456 && (modifier
== NONE
3457 || (modifier
== NARROW
3458 && simple_integer_narrowing (vectype_out
, vectype_in
,
3460 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3463 /* If that fails, try asking for a target-specific built-in function. */
3464 if (ifn
== IFN_LAST
)
3466 if (cfn
!= CFN_LAST
)
3467 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3468 (cfn
, vectype_out
, vectype_in
);
3469 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3470 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3471 (callee
, vectype_out
, vectype_in
);
3474 if (ifn
== IFN_LAST
&& !fndecl
)
3476 if (cfn
== CFN_GOMP_SIMD_LANE
3479 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3480 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3481 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3482 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3484 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3485 { 0, 1, 2, ... vf - 1 } vector. */
3486 gcc_assert (nargs
== 0);
3488 else if (modifier
== NONE
3489 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3490 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3491 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3492 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3493 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3494 slp_op
, vectype_in
, cost_vec
);
3497 if (dump_enabled_p ())
3498 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3499 "function is not vectorizable.\n");
3506 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3507 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3509 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3511 /* Sanity check: make sure that at least one copy of the vectorized stmt
3512 needs to be generated. */
3513 gcc_assert (ncopies
>= 1);
3515 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
3516 internal_fn cond_fn
= get_conditional_internal_fn (ifn
);
3517 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3518 if (!vec_stmt
) /* transformation not required. */
3521 for (i
= 0; i
< nargs
; ++i
)
3522 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
],
3524 ? vectypes
[i
] : vectype_in
))
3526 if (dump_enabled_p ())
3527 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3528 "incompatible vector types for invariants\n");
3531 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3532 DUMP_VECT_SCOPE ("vectorizable_call");
3533 vect_model_simple_cost (vinfo
, stmt_info
,
3534 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3535 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3536 record_stmt_cost (cost_vec
, ncopies
/ 2,
3537 vec_promote_demote
, stmt_info
, 0, vect_body
);
3540 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
3541 && (reduc_idx
>= 0 || mask_opno
>= 0))
3544 && (cond_fn
== IFN_LAST
3545 || !direct_internal_fn_supported_p (cond_fn
, vectype_out
,
3546 OPTIMIZE_FOR_SPEED
)))
3548 if (dump_enabled_p ())
3549 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3550 "can't use a fully-masked loop because no"
3551 " conditional operation is available.\n");
3552 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
3556 unsigned int nvectors
3558 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3560 tree scalar_mask
= NULL_TREE
;
3562 scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3563 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3564 vectype_out
, scalar_mask
);
3572 if (dump_enabled_p ())
3573 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3576 scalar_dest
= gimple_call_lhs (stmt
);
3577 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3579 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3580 unsigned int vect_nargs
= nargs
;
3581 if (masked_loop_p
&& reduc_idx
>= 0)
3587 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3589 tree prev_res
= NULL_TREE
;
3590 vargs
.safe_grow (vect_nargs
, true);
3591 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3592 for (j
= 0; j
< ncopies
; ++j
)
3594 /* Build argument list for the vectorized call. */
3597 vec
<tree
> vec_oprnds0
;
3599 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3600 vec_oprnds0
= vec_defs
[0];
3602 /* Arguments are ready. Create the new vector stmt. */
3603 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3606 if (masked_loop_p
&& reduc_idx
>= 0)
3608 unsigned int vec_num
= vec_oprnds0
.length ();
3609 /* Always true for SLP. */
3610 gcc_assert (ncopies
== 1);
3611 vargs
[varg
++] = vect_get_loop_mask (gsi
, masks
, vec_num
,
3615 for (k
= 0; k
< nargs
; k
++)
3617 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3618 vargs
[varg
++] = vec_oprndsk
[i
];
3620 if (masked_loop_p
&& reduc_idx
>= 0)
3621 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3623 if (modifier
== NARROW
)
3625 /* We don't define any narrowing conditional functions
3627 gcc_assert (mask_opno
< 0);
3628 tree half_res
= make_ssa_name (vectype_in
);
3630 = gimple_build_call_internal_vec (ifn
, vargs
);
3631 gimple_call_set_lhs (call
, half_res
);
3632 gimple_call_set_nothrow (call
, true);
3633 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3636 prev_res
= half_res
;
3639 new_temp
= make_ssa_name (vec_dest
);
3640 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3641 prev_res
, half_res
);
3642 vect_finish_stmt_generation (vinfo
, stmt_info
,
3647 if (mask_opno
>= 0 && masked_loop_p
)
3649 unsigned int vec_num
= vec_oprnds0
.length ();
3650 /* Always true for SLP. */
3651 gcc_assert (ncopies
== 1);
3652 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3654 vargs
[mask_opno
] = prepare_vec_mask
3655 (loop_vinfo
, TREE_TYPE (mask
), mask
,
3656 vargs
[mask_opno
], gsi
);
3660 if (ifn
!= IFN_LAST
)
3661 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3663 call
= gimple_build_call_vec (fndecl
, vargs
);
3664 new_temp
= make_ssa_name (vec_dest
, call
);
3665 gimple_call_set_lhs (call
, new_temp
);
3666 gimple_call_set_nothrow (call
, true);
3667 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3670 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3676 if (masked_loop_p
&& reduc_idx
>= 0)
3677 vargs
[varg
++] = vect_get_loop_mask (gsi
, masks
, ncopies
,
3679 for (i
= 0; i
< nargs
; i
++)
3681 op
= gimple_call_arg (stmt
, i
);
3684 vec_defs
.quick_push (vNULL
);
3685 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3689 vargs
[varg
++] = vec_defs
[i
][j
];
3691 if (masked_loop_p
&& reduc_idx
>= 0)
3692 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3694 if (mask_opno
>= 0 && masked_loop_p
)
3696 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3699 = prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
3700 vargs
[mask_opno
], gsi
);
3704 if (cfn
== CFN_GOMP_SIMD_LANE
)
3706 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3708 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3709 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3710 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3711 new_temp
= make_ssa_name (vec_dest
);
3712 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3713 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3715 else if (modifier
== NARROW
)
3717 /* We don't define any narrowing conditional functions at
3719 gcc_assert (mask_opno
< 0);
3720 tree half_res
= make_ssa_name (vectype_in
);
3721 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3722 gimple_call_set_lhs (call
, half_res
);
3723 gimple_call_set_nothrow (call
, true);
3724 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3727 prev_res
= half_res
;
3730 new_temp
= make_ssa_name (vec_dest
);
3731 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3732 prev_res
, half_res
);
3733 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3738 if (ifn
!= IFN_LAST
)
3739 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3741 call
= gimple_build_call_vec (fndecl
, vargs
);
3742 new_temp
= make_ssa_name (vec_dest
, call
);
3743 gimple_call_set_lhs (call
, new_temp
);
3744 gimple_call_set_nothrow (call
, true);
3745 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3749 if (j
== (modifier
== NARROW
? 1 : 0))
3750 *vec_stmt
= new_stmt
;
3751 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3753 for (i
= 0; i
< nargs
; i
++)
3755 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3756 vec_oprndsi
.release ();
3759 else if (modifier
== NARROW
)
3761 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3762 /* We don't define any narrowing conditional functions at present. */
3763 gcc_assert (mask_opno
< 0);
3764 for (j
= 0; j
< ncopies
; ++j
)
3766 /* Build argument list for the vectorized call. */
3768 vargs
.create (nargs
* 2);
3774 vec
<tree
> vec_oprnds0
;
3776 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3777 vec_oprnds0
= vec_defs
[0];
3779 /* Arguments are ready. Create the new vector stmt. */
3780 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3784 for (k
= 0; k
< nargs
; k
++)
3786 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3787 vargs
.quick_push (vec_oprndsk
[i
]);
3788 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3791 if (ifn
!= IFN_LAST
)
3792 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3794 call
= gimple_build_call_vec (fndecl
, vargs
);
3795 new_temp
= make_ssa_name (vec_dest
, call
);
3796 gimple_call_set_lhs (call
, new_temp
);
3797 gimple_call_set_nothrow (call
, true);
3798 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3799 SLP_TREE_VEC_STMTS (slp_node
).quick_push (call
);
3804 for (i
= 0; i
< nargs
; i
++)
3806 op
= gimple_call_arg (stmt
, i
);
3809 vec_defs
.quick_push (vNULL
);
3810 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
3811 op
, &vec_defs
[i
], vectypes
[i
]);
3813 vec_oprnd0
= vec_defs
[i
][2*j
];
3814 vec_oprnd1
= vec_defs
[i
][2*j
+1];
3816 vargs
.quick_push (vec_oprnd0
);
3817 vargs
.quick_push (vec_oprnd1
);
3820 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3821 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3822 gimple_call_set_lhs (new_stmt
, new_temp
);
3823 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3825 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3829 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3831 for (i
= 0; i
< nargs
; i
++)
3833 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3834 vec_oprndsi
.release ();
3838 /* No current target implements this case. */
3843 /* The call in STMT might prevent it from being removed in dce.
3844 We however cannot remove it here, due to the way the ssa name
3845 it defines is mapped to the new definition. So just replace
3846 rhs of the statement with something harmless. */
3851 stmt_info
= vect_orig_stmt (stmt_info
);
3852 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3855 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3856 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3862 struct simd_call_arg_info
3866 HOST_WIDE_INT linear_step
;
3867 enum vect_def_type dt
;
3869 bool simd_lane_linear
;
3872 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3873 is linear within simd lane (but not within whole loop), note it in
3877 vect_simd_lane_linear (tree op
, class loop
*loop
,
3878 struct simd_call_arg_info
*arginfo
)
3880 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3882 if (!is_gimple_assign (def_stmt
)
3883 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3884 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3887 tree base
= gimple_assign_rhs1 (def_stmt
);
3888 HOST_WIDE_INT linear_step
= 0;
3889 tree v
= gimple_assign_rhs2 (def_stmt
);
3890 while (TREE_CODE (v
) == SSA_NAME
)
3893 def_stmt
= SSA_NAME_DEF_STMT (v
);
3894 if (is_gimple_assign (def_stmt
))
3895 switch (gimple_assign_rhs_code (def_stmt
))
3898 t
= gimple_assign_rhs2 (def_stmt
);
3899 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3901 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3902 v
= gimple_assign_rhs1 (def_stmt
);
3905 t
= gimple_assign_rhs2 (def_stmt
);
3906 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3908 linear_step
= tree_to_shwi (t
);
3909 v
= gimple_assign_rhs1 (def_stmt
);
3912 t
= gimple_assign_rhs1 (def_stmt
);
3913 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3914 || (TYPE_PRECISION (TREE_TYPE (v
))
3915 < TYPE_PRECISION (TREE_TYPE (t
))))
3924 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3926 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3927 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3932 arginfo
->linear_step
= linear_step
;
3934 arginfo
->simd_lane_linear
= true;
3940 /* Return the number of elements in vector type VECTYPE, which is associated
3941 with a SIMD clone. At present these vectors always have a constant
3944 static unsigned HOST_WIDE_INT
3945 simd_clone_subparts (tree vectype
)
3947 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3950 /* Function vectorizable_simd_clone_call.
3952 Check if STMT_INFO performs a function call that can be vectorized
3953 by calling a simd clone of the function.
3954 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3955 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3956 Return true if STMT_INFO is vectorizable in this way. */
3959 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3960 gimple_stmt_iterator
*gsi
,
3961 gimple
**vec_stmt
, slp_tree slp_node
,
3962 stmt_vector_for_cost
*)
3967 tree vec_oprnd0
= NULL_TREE
;
3970 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3971 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3972 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3973 tree fndecl
, new_temp
;
3975 auto_vec
<simd_call_arg_info
> arginfo
;
3976 vec
<tree
> vargs
= vNULL
;
3978 tree lhs
, rtype
, ratype
;
3979 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3981 /* Is STMT a vectorizable call? */
3982 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3986 fndecl
= gimple_call_fndecl (stmt
);
3987 if (fndecl
== NULL_TREE
)
3990 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3991 if (node
== NULL
|| node
->simd_clones
== NULL
)
3994 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3997 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4001 if (gimple_call_lhs (stmt
)
4002 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
4005 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
4007 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4009 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
4016 /* Process function arguments. */
4017 nargs
= gimple_call_num_args (stmt
);
4019 /* Bail out if the function has zero arguments. */
4023 arginfo
.reserve (nargs
, true);
4025 for (i
= 0; i
< nargs
; i
++)
4027 simd_call_arg_info thisarginfo
;
4030 thisarginfo
.linear_step
= 0;
4031 thisarginfo
.align
= 0;
4032 thisarginfo
.op
= NULL_TREE
;
4033 thisarginfo
.simd_lane_linear
= false;
4035 op
= gimple_call_arg (stmt
, i
);
4036 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
4037 &thisarginfo
.vectype
)
4038 || thisarginfo
.dt
== vect_uninitialized_def
)
4040 if (dump_enabled_p ())
4041 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4042 "use not simple.\n");
4046 if (thisarginfo
.dt
== vect_constant_def
4047 || thisarginfo
.dt
== vect_external_def
)
4048 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
4051 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
4052 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo
.vectype
))
4054 if (dump_enabled_p ())
4055 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4056 "vector mask arguments are not supported\n");
4061 /* For linear arguments, the analyze phase should have saved
4062 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4063 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
4064 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
4066 gcc_assert (vec_stmt
);
4067 thisarginfo
.linear_step
4068 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
4070 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
4071 thisarginfo
.simd_lane_linear
4072 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
4073 == boolean_true_node
);
4074 /* If loop has been peeled for alignment, we need to adjust it. */
4075 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
4076 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
4077 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
4079 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
4080 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
4081 tree opt
= TREE_TYPE (thisarginfo
.op
);
4082 bias
= fold_convert (TREE_TYPE (step
), bias
);
4083 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
4085 = fold_build2 (POINTER_TYPE_P (opt
)
4086 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
4087 thisarginfo
.op
, bias
);
4091 && thisarginfo
.dt
!= vect_constant_def
4092 && thisarginfo
.dt
!= vect_external_def
4094 && TREE_CODE (op
) == SSA_NAME
4095 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
4097 && tree_fits_shwi_p (iv
.step
))
4099 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
4100 thisarginfo
.op
= iv
.base
;
4102 else if ((thisarginfo
.dt
== vect_constant_def
4103 || thisarginfo
.dt
== vect_external_def
)
4104 && POINTER_TYPE_P (TREE_TYPE (op
)))
4105 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
4106 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4108 if (POINTER_TYPE_P (TREE_TYPE (op
))
4109 && !thisarginfo
.linear_step
4111 && thisarginfo
.dt
!= vect_constant_def
4112 && thisarginfo
.dt
!= vect_external_def
4115 && TREE_CODE (op
) == SSA_NAME
)
4116 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
4118 arginfo
.quick_push (thisarginfo
);
4121 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4122 if (!vf
.is_constant ())
4124 if (dump_enabled_p ())
4125 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4126 "not considering SIMD clones; not yet supported"
4127 " for variable-width vectors.\n");
4131 unsigned int badness
= 0;
4132 struct cgraph_node
*bestn
= NULL
;
4133 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
4134 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
4136 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4137 n
= n
->simdclone
->next_clone
)
4139 unsigned int this_badness
= 0;
4140 unsigned int num_calls
;
4141 if (!constant_multiple_p (vf
, n
->simdclone
->simdlen
, &num_calls
)
4142 || n
->simdclone
->nargs
!= nargs
)
4145 this_badness
+= exact_log2 (num_calls
) * 4096;
4146 if (n
->simdclone
->inbranch
)
4147 this_badness
+= 8192;
4148 int target_badness
= targetm
.simd_clone
.usable (n
);
4149 if (target_badness
< 0)
4151 this_badness
+= target_badness
* 512;
4152 /* FORNOW: Have to add code to add the mask argument. */
4153 if (n
->simdclone
->inbranch
)
4155 for (i
= 0; i
< nargs
; i
++)
4157 switch (n
->simdclone
->args
[i
].arg_type
)
4159 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4160 if (!useless_type_conversion_p
4161 (n
->simdclone
->args
[i
].orig_type
,
4162 TREE_TYPE (gimple_call_arg (stmt
, i
))))
4164 else if (arginfo
[i
].dt
== vect_constant_def
4165 || arginfo
[i
].dt
== vect_external_def
4166 || arginfo
[i
].linear_step
)
4169 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4170 if (arginfo
[i
].dt
!= vect_constant_def
4171 && arginfo
[i
].dt
!= vect_external_def
)
4174 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4175 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4176 if (arginfo
[i
].dt
== vect_constant_def
4177 || arginfo
[i
].dt
== vect_external_def
4178 || (arginfo
[i
].linear_step
4179 != n
->simdclone
->args
[i
].linear_step
))
4182 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4183 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4184 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4185 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4186 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4187 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4191 case SIMD_CLONE_ARG_TYPE_MASK
:
4194 if (i
== (size_t) -1)
4196 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4201 if (arginfo
[i
].align
)
4202 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4203 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4205 if (i
== (size_t) -1)
4207 if (bestn
== NULL
|| this_badness
< badness
)
4210 badness
= this_badness
;
4217 for (i
= 0; i
< nargs
; i
++)
4218 if ((arginfo
[i
].dt
== vect_constant_def
4219 || arginfo
[i
].dt
== vect_external_def
)
4220 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4222 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
));
4223 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4225 if (arginfo
[i
].vectype
== NULL
4226 || !constant_multiple_p (bestn
->simdclone
->simdlen
,
4227 simd_clone_subparts (arginfo
[i
].vectype
)))
4231 fndecl
= bestn
->decl
;
4232 nunits
= bestn
->simdclone
->simdlen
;
4233 ncopies
= vector_unroll_factor (vf
, nunits
);
4235 /* If the function isn't const, only allow it in simd loops where user
4236 has asserted that at least nunits consecutive iterations can be
4237 performed using SIMD instructions. */
4238 if ((loop
== NULL
|| maybe_lt ((unsigned) loop
->safelen
, nunits
))
4239 && gimple_vuse (stmt
))
4242 /* Sanity check: make sure that at least one copy of the vectorized stmt
4243 needs to be generated. */
4244 gcc_assert (ncopies
>= 1);
4246 if (!vec_stmt
) /* transformation not required. */
4248 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4249 for (i
= 0; i
< nargs
; i
++)
4250 if ((bestn
->simdclone
->args
[i
].arg_type
4251 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4252 || (bestn
->simdclone
->args
[i
].arg_type
4253 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4255 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4258 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4259 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4260 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4261 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4262 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4263 tree sll
= arginfo
[i
].simd_lane_linear
4264 ? boolean_true_node
: boolean_false_node
;
4265 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4267 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4268 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4269 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4270 dt, slp_node, cost_vec); */
4276 if (dump_enabled_p ())
4277 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4280 scalar_dest
= gimple_call_lhs (stmt
);
4281 vec_dest
= NULL_TREE
;
4286 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4287 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4288 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4291 rtype
= TREE_TYPE (ratype
);
4295 auto_vec
<vec
<tree
> > vec_oprnds
;
4296 auto_vec
<unsigned> vec_oprnds_i
;
4297 vec_oprnds
.safe_grow_cleared (nargs
, true);
4298 vec_oprnds_i
.safe_grow_cleared (nargs
, true);
4299 for (j
= 0; j
< ncopies
; ++j
)
4301 /* Build argument list for the vectorized call. */
4303 vargs
.create (nargs
);
4307 for (i
= 0; i
< nargs
; i
++)
4309 unsigned int k
, l
, m
, o
;
4311 op
= gimple_call_arg (stmt
, i
);
4312 switch (bestn
->simdclone
->args
[i
].arg_type
)
4314 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4315 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4316 o
= vector_unroll_factor (nunits
,
4317 simd_clone_subparts (atype
));
4318 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4320 if (simd_clone_subparts (atype
)
4321 < simd_clone_subparts (arginfo
[i
].vectype
))
4323 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4324 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4325 / simd_clone_subparts (atype
));
4326 gcc_assert ((k
& (k
- 1)) == 0);
4329 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4330 ncopies
* o
/ k
, op
,
4332 vec_oprnds_i
[i
] = 0;
4333 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4337 vec_oprnd0
= arginfo
[i
].op
;
4338 if ((m
& (k
- 1)) == 0)
4339 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4341 arginfo
[i
].op
= vec_oprnd0
;
4343 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4345 bitsize_int ((m
& (k
- 1)) * prec
));
4347 = gimple_build_assign (make_ssa_name (atype
),
4349 vect_finish_stmt_generation (vinfo
, stmt_info
,
4351 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4355 k
= (simd_clone_subparts (atype
)
4356 / simd_clone_subparts (arginfo
[i
].vectype
));
4357 gcc_assert ((k
& (k
- 1)) == 0);
4358 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4360 vec_alloc (ctor_elts
, k
);
4363 for (l
= 0; l
< k
; l
++)
4365 if (m
== 0 && l
== 0)
4367 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4371 vec_oprnds_i
[i
] = 0;
4372 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4375 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4376 arginfo
[i
].op
= vec_oprnd0
;
4379 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4383 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0
),
4387 = build1 (VIEW_CONVERT_EXPR
, atype
, vec_oprnd0
);
4389 = gimple_build_assign (make_ssa_name (atype
),
4391 vect_finish_stmt_generation (vinfo
, stmt_info
,
4393 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4396 vargs
.safe_push (vec_oprnd0
);
4399 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4401 = gimple_build_assign (make_ssa_name (atype
),
4403 vect_finish_stmt_generation (vinfo
, stmt_info
,
4405 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4410 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4411 vargs
.safe_push (op
);
4413 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4414 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4419 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4420 &stmts
, true, NULL_TREE
);
4424 edge pe
= loop_preheader_edge (loop
);
4425 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4426 gcc_assert (!new_bb
);
4428 if (arginfo
[i
].simd_lane_linear
)
4430 vargs
.safe_push (arginfo
[i
].op
);
4433 tree phi_res
= copy_ssa_name (op
);
4434 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4435 add_phi_arg (new_phi
, arginfo
[i
].op
,
4436 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4438 = POINTER_TYPE_P (TREE_TYPE (op
))
4439 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4440 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4441 ? sizetype
: TREE_TYPE (op
);
4443 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4445 tree tcst
= wide_int_to_tree (type
, cst
);
4446 tree phi_arg
= copy_ssa_name (op
);
4448 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4449 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4450 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4451 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4453 arginfo
[i
].op
= phi_res
;
4454 vargs
.safe_push (phi_res
);
4459 = POINTER_TYPE_P (TREE_TYPE (op
))
4460 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4461 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4462 ? sizetype
: TREE_TYPE (op
);
4464 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4466 tree tcst
= wide_int_to_tree (type
, cst
);
4467 new_temp
= make_ssa_name (TREE_TYPE (op
));
4469 = gimple_build_assign (new_temp
, code
,
4470 arginfo
[i
].op
, tcst
);
4471 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4472 vargs
.safe_push (new_temp
);
4475 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4476 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4477 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4478 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4479 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4480 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4486 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4490 || known_eq (simd_clone_subparts (rtype
), nunits
));
4492 new_temp
= create_tmp_var (ratype
);
4493 else if (useless_type_conversion_p (vectype
, rtype
))
4494 new_temp
= make_ssa_name (vec_dest
, new_call
);
4496 new_temp
= make_ssa_name (rtype
, new_call
);
4497 gimple_call_set_lhs (new_call
, new_temp
);
4499 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4500 gimple
*new_stmt
= new_call
;
4504 if (!multiple_p (simd_clone_subparts (vectype
), nunits
))
4507 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4508 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4509 k
= vector_unroll_factor (nunits
,
4510 simd_clone_subparts (vectype
));
4511 gcc_assert ((k
& (k
- 1)) == 0);
4512 for (l
= 0; l
< k
; l
++)
4517 t
= build_fold_addr_expr (new_temp
);
4518 t
= build2 (MEM_REF
, vectype
, t
,
4519 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4522 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4523 bitsize_int (prec
), bitsize_int (l
* prec
));
4524 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4525 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4527 if (j
== 0 && l
== 0)
4528 *vec_stmt
= new_stmt
;
4529 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4533 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4536 else if (!multiple_p (nunits
, simd_clone_subparts (vectype
)))
4538 unsigned int k
= (simd_clone_subparts (vectype
)
4539 / simd_clone_subparts (rtype
));
4540 gcc_assert ((k
& (k
- 1)) == 0);
4541 if ((j
& (k
- 1)) == 0)
4542 vec_alloc (ret_ctor_elts
, k
);
4546 o
= vector_unroll_factor (nunits
,
4547 simd_clone_subparts (rtype
));
4548 for (m
= 0; m
< o
; m
++)
4550 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4551 size_int (m
), NULL_TREE
, NULL_TREE
);
4552 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4554 vect_finish_stmt_generation (vinfo
, stmt_info
,
4556 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4557 gimple_assign_lhs (new_stmt
));
4559 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4562 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4563 if ((j
& (k
- 1)) != k
- 1)
4565 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4567 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4568 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4570 if ((unsigned) j
== k
- 1)
4571 *vec_stmt
= new_stmt
;
4572 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4577 tree t
= build_fold_addr_expr (new_temp
);
4578 t
= build2 (MEM_REF
, vectype
, t
,
4579 build_int_cst (TREE_TYPE (t
), 0));
4580 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4581 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4582 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4584 else if (!useless_type_conversion_p (vectype
, rtype
))
4586 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
4588 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4589 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4594 *vec_stmt
= new_stmt
;
4595 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4598 for (i
= 0; i
< nargs
; ++i
)
4600 vec
<tree
> oprndsi
= vec_oprnds
[i
];
4605 /* The call in STMT might prevent it from being removed in dce.
4606 We however cannot remove it here, due to the way the ssa name
4607 it defines is mapped to the new definition. So just replace
4608 rhs of the statement with something harmless. */
4616 type
= TREE_TYPE (scalar_dest
);
4617 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4618 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4621 new_stmt
= gimple_build_nop ();
4622 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4623 unlink_stmt_vdef (stmt
);
4629 /* Function vect_gen_widened_results_half
4631 Create a vector stmt whose code, type, number of arguments, and result
4632 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4633 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4634 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4635 needs to be created (DECL is a function-decl of a target-builtin).
4636 STMT_INFO is the original scalar stmt that we are vectorizing. */
4639 vect_gen_widened_results_half (vec_info
*vinfo
, enum tree_code code
,
4640 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4641 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4642 stmt_vec_info stmt_info
)
4647 /* Generate half of the widened result: */
4648 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4649 if (op_type
!= binary_op
)
4651 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4652 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4653 gimple_assign_set_lhs (new_stmt
, new_temp
);
4654 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4660 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4661 For multi-step conversions store the resulting vectors and call the function
4665 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4667 stmt_vec_info stmt_info
,
4668 vec
<tree
> &vec_dsts
,
4669 gimple_stmt_iterator
*gsi
,
4670 slp_tree slp_node
, enum tree_code code
)
4673 tree vop0
, vop1
, new_tmp
, vec_dest
;
4675 vec_dest
= vec_dsts
.pop ();
4677 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4679 /* Create demotion operation. */
4680 vop0
= (*vec_oprnds
)[i
];
4681 vop1
= (*vec_oprnds
)[i
+ 1];
4682 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4683 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4684 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4685 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4688 /* Store the resulting vector for next recursive call. */
4689 (*vec_oprnds
)[i
/2] = new_tmp
;
4692 /* This is the last step of the conversion sequence. Store the
4693 vectors in SLP_NODE or in vector info of the scalar statement
4694 (or in STMT_VINFO_RELATED_STMT chain). */
4696 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4698 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4702 /* For multi-step demotion operations we first generate demotion operations
4703 from the source type to the intermediate types, and then combine the
4704 results (stored in VEC_OPRNDS) in demotion operation to the destination
4708 /* At each level of recursion we have half of the operands we had at the
4710 vec_oprnds
->truncate ((i
+1)/2);
4711 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
4713 stmt_info
, vec_dsts
, gsi
,
4714 slp_node
, VEC_PACK_TRUNC_EXPR
);
4717 vec_dsts
.quick_push (vec_dest
);
4721 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4722 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4723 STMT_INFO. For multi-step conversions store the resulting vectors and
4724 call the function recursively. */
4727 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
4728 vec
<tree
> *vec_oprnds0
,
4729 vec
<tree
> *vec_oprnds1
,
4730 stmt_vec_info stmt_info
, tree vec_dest
,
4731 gimple_stmt_iterator
*gsi
,
4732 enum tree_code code1
,
4733 enum tree_code code2
, int op_type
)
4736 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4737 gimple
*new_stmt1
, *new_stmt2
;
4738 vec
<tree
> vec_tmp
= vNULL
;
4740 vec_tmp
.create (vec_oprnds0
->length () * 2);
4741 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4743 if (op_type
== binary_op
)
4744 vop1
= (*vec_oprnds1
)[i
];
4748 /* Generate the two halves of promotion operation. */
4749 new_stmt1
= vect_gen_widened_results_half (vinfo
, code1
, vop0
, vop1
,
4750 op_type
, vec_dest
, gsi
,
4752 new_stmt2
= vect_gen_widened_results_half (vinfo
, code2
, vop0
, vop1
,
4753 op_type
, vec_dest
, gsi
,
4755 if (is_gimple_call (new_stmt1
))
4757 new_tmp1
= gimple_call_lhs (new_stmt1
);
4758 new_tmp2
= gimple_call_lhs (new_stmt2
);
4762 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4763 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4766 /* Store the results for the next step. */
4767 vec_tmp
.quick_push (new_tmp1
);
4768 vec_tmp
.quick_push (new_tmp2
);
4771 vec_oprnds0
->release ();
4772 *vec_oprnds0
= vec_tmp
;
4775 /* Create vectorized promotion stmts for widening stmts using only half the
4776 potential vector size for input. */
4778 vect_create_half_widening_stmts (vec_info
*vinfo
,
4779 vec
<tree
> *vec_oprnds0
,
4780 vec
<tree
> *vec_oprnds1
,
4781 stmt_vec_info stmt_info
, tree vec_dest
,
4782 gimple_stmt_iterator
*gsi
,
4783 enum tree_code code1
,
4791 vec
<tree
> vec_tmp
= vNULL
;
4793 vec_tmp
.create (vec_oprnds0
->length ());
4794 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4796 tree new_tmp1
, new_tmp2
, new_tmp3
, out_type
;
4798 gcc_assert (op_type
== binary_op
);
4799 vop1
= (*vec_oprnds1
)[i
];
4801 /* Widen the first vector input. */
4802 out_type
= TREE_TYPE (vec_dest
);
4803 new_tmp1
= make_ssa_name (out_type
);
4804 new_stmt1
= gimple_build_assign (new_tmp1
, NOP_EXPR
, vop0
);
4805 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt1
, gsi
);
4806 if (VECTOR_TYPE_P (TREE_TYPE (vop1
)))
4808 /* Widen the second vector input. */
4809 new_tmp2
= make_ssa_name (out_type
);
4810 new_stmt2
= gimple_build_assign (new_tmp2
, NOP_EXPR
, vop1
);
4811 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt2
, gsi
);
4812 /* Perform the operation. With both vector inputs widened. */
4813 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, new_tmp2
);
4817 /* Perform the operation. With the single vector input widened. */
4818 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, vop1
);
4821 new_tmp3
= make_ssa_name (vec_dest
, new_stmt3
);
4822 gimple_assign_set_lhs (new_stmt3
, new_tmp3
);
4823 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt3
, gsi
);
4825 /* Store the results for the next step. */
4826 vec_tmp
.quick_push (new_tmp3
);
4829 vec_oprnds0
->release ();
4830 *vec_oprnds0
= vec_tmp
;
4834 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4835 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4836 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4837 Return true if STMT_INFO is vectorizable in this way. */
4840 vectorizable_conversion (vec_info
*vinfo
,
4841 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4842 gimple
**vec_stmt
, slp_tree slp_node
,
4843 stmt_vector_for_cost
*cost_vec
)
4847 tree op0
, op1
= NULL_TREE
;
4848 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4849 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4850 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4852 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4854 poly_uint64 nunits_in
;
4855 poly_uint64 nunits_out
;
4856 tree vectype_out
, vectype_in
;
4858 tree lhs_type
, rhs_type
;
4859 enum { NARROW
, NONE
, WIDEN
} modifier
;
4860 vec
<tree
> vec_oprnds0
= vNULL
;
4861 vec
<tree
> vec_oprnds1
= vNULL
;
4863 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4864 int multi_step_cvt
= 0;
4865 vec
<tree
> interm_types
= vNULL
;
4866 tree intermediate_type
, cvt_type
= NULL_TREE
;
4868 unsigned short fltsz
;
4870 /* Is STMT a vectorizable conversion? */
4872 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4875 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4879 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4883 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4886 code
= gimple_assign_rhs_code (stmt
);
4887 if (!CONVERT_EXPR_CODE_P (code
)
4888 && code
!= FIX_TRUNC_EXPR
4889 && code
!= FLOAT_EXPR
4890 && code
!= WIDEN_PLUS_EXPR
4891 && code
!= WIDEN_MINUS_EXPR
4892 && code
!= WIDEN_MULT_EXPR
4893 && code
!= WIDEN_LSHIFT_EXPR
)
4896 bool widen_arith
= (code
== WIDEN_PLUS_EXPR
4897 || code
== WIDEN_MINUS_EXPR
4898 || code
== WIDEN_MULT_EXPR
4899 || code
== WIDEN_LSHIFT_EXPR
);
4900 op_type
= TREE_CODE_LENGTH (code
);
4902 /* Check types of lhs and rhs. */
4903 scalar_dest
= gimple_assign_lhs (stmt
);
4904 lhs_type
= TREE_TYPE (scalar_dest
);
4905 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4907 /* Check the operands of the operation. */
4908 slp_tree slp_op0
, slp_op1
= NULL
;
4909 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
4910 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
4912 if (dump_enabled_p ())
4913 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4914 "use not simple.\n");
4918 rhs_type
= TREE_TYPE (op0
);
4919 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4920 && !((INTEGRAL_TYPE_P (lhs_type
)
4921 && INTEGRAL_TYPE_P (rhs_type
))
4922 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4923 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4926 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4927 && ((INTEGRAL_TYPE_P (lhs_type
)
4928 && !type_has_mode_precision_p (lhs_type
))
4929 || (INTEGRAL_TYPE_P (rhs_type
)
4930 && !type_has_mode_precision_p (rhs_type
))))
4932 if (dump_enabled_p ())
4933 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4934 "type conversion to/from bit-precision unsupported."
4939 if (op_type
== binary_op
)
4941 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
4942 || code
== WIDEN_PLUS_EXPR
|| code
== WIDEN_MINUS_EXPR
);
4944 op1
= gimple_assign_rhs2 (stmt
);
4946 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
4947 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
4949 if (dump_enabled_p ())
4950 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4951 "use not simple.\n");
4954 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4957 vectype_in
= vectype1_in
;
4960 /* If op0 is an external or constant def, infer the vector type
4961 from the scalar type. */
4963 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
4965 gcc_assert (vectype_in
);
4968 if (dump_enabled_p ())
4969 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4970 "no vectype for scalar type %T\n", rhs_type
);
4975 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4976 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4978 if (dump_enabled_p ())
4979 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4980 "can't convert between boolean and non "
4981 "boolean vectors %T\n", rhs_type
);
4986 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4987 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4988 if (known_eq (nunits_out
, nunits_in
))
4993 else if (multiple_p (nunits_out
, nunits_in
))
4997 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
5001 /* Multiple types in SLP are handled by creating the appropriate number of
5002 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5006 else if (modifier
== NARROW
)
5007 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
5009 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
5011 /* Sanity check: make sure that at least one copy of the vectorized stmt
5012 needs to be generated. */
5013 gcc_assert (ncopies
>= 1);
5015 bool found_mode
= false;
5016 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
5017 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
5018 opt_scalar_mode rhs_mode_iter
;
5020 /* Supportable by target? */
5024 if (code
!= FIX_TRUNC_EXPR
5025 && code
!= FLOAT_EXPR
5026 && !CONVERT_EXPR_CODE_P (code
))
5028 if (supportable_convert_operation (code
, vectype_out
, vectype_in
, &code1
))
5032 if (dump_enabled_p ())
5033 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5034 "conversion not supported by target.\n");
5038 if (known_eq (nunits_in
, nunits_out
))
5040 if (!supportable_half_widening_operation (code
, vectype_out
,
5041 vectype_in
, &code1
))
5043 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5046 if (supportable_widening_operation (vinfo
, code
, stmt_info
,
5047 vectype_out
, vectype_in
, &code1
,
5048 &code2
, &multi_step_cvt
,
5051 /* Binary widening operation can only be supported directly by the
5053 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5057 if (code
!= FLOAT_EXPR
5058 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
5061 fltsz
= GET_MODE_SIZE (lhs_mode
);
5062 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
5064 rhs_mode
= rhs_mode_iter
.require ();
5065 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
5069 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5070 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5071 if (cvt_type
== NULL_TREE
)
5074 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5076 if (!supportable_convert_operation (code
, vectype_out
,
5077 cvt_type
, &codecvt1
))
5080 else if (!supportable_widening_operation (vinfo
, code
, stmt_info
,
5081 vectype_out
, cvt_type
,
5082 &codecvt1
, &codecvt2
,
5087 gcc_assert (multi_step_cvt
== 0);
5089 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
5091 vectype_in
, &code1
, &code2
,
5092 &multi_step_cvt
, &interm_types
))
5102 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5103 codecvt2
= ERROR_MARK
;
5107 interm_types
.safe_push (cvt_type
);
5108 cvt_type
= NULL_TREE
;
5113 gcc_assert (op_type
== unary_op
);
5114 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
5115 &code1
, &multi_step_cvt
,
5119 if (code
!= FIX_TRUNC_EXPR
5120 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
5124 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5125 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5126 if (cvt_type
== NULL_TREE
)
5128 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
5131 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
5132 &code1
, &multi_step_cvt
,
5141 if (!vec_stmt
) /* transformation not required. */
5144 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
5145 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
5147 if (dump_enabled_p ())
5148 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5149 "incompatible vector types for invariants\n");
5152 DUMP_VECT_SCOPE ("vectorizable_conversion");
5153 if (modifier
== NONE
)
5155 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
5156 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5159 else if (modifier
== NARROW
)
5161 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
5162 /* The final packing step produces one vector result per copy. */
5163 unsigned int nvectors
5164 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
5165 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5166 multi_step_cvt
, cost_vec
,
5171 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5172 /* The initial unpacking step produces two vector results
5173 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5174 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5175 unsigned int nvectors
5177 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5179 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5180 multi_step_cvt
, cost_vec
,
5183 interm_types
.release ();
5188 if (dump_enabled_p ())
5189 dump_printf_loc (MSG_NOTE
, vect_location
,
5190 "transform conversion. ncopies = %d.\n", ncopies
);
5192 if (op_type
== binary_op
)
5194 if (CONSTANT_CLASS_P (op0
))
5195 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5196 else if (CONSTANT_CLASS_P (op1
))
5197 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5200 /* In case of multi-step conversion, we first generate conversion operations
5201 to the intermediate types, and then from that types to the final one.
5202 We create vector destinations for the intermediate type (TYPES) received
5203 from supportable_*_operation, and store them in the correct order
5204 for future use in vect_create_vectorized_*_stmts (). */
5205 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5206 vec_dest
= vect_create_destination_var (scalar_dest
,
5207 (cvt_type
&& modifier
== WIDEN
)
5208 ? cvt_type
: vectype_out
);
5209 vec_dsts
.quick_push (vec_dest
);
5213 for (i
= interm_types
.length () - 1;
5214 interm_types
.iterate (i
, &intermediate_type
); i
--)
5216 vec_dest
= vect_create_destination_var (scalar_dest
,
5218 vec_dsts
.quick_push (vec_dest
);
5223 vec_dest
= vect_create_destination_var (scalar_dest
,
5225 ? vectype_out
: cvt_type
);
5230 if (modifier
== WIDEN
)
5232 else if (modifier
== NARROW
)
5235 ninputs
= vect_pow2 (multi_step_cvt
);
5243 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5245 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5247 /* Arguments are ready, create the new vector stmt. */
5248 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5249 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
5250 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5251 gimple_assign_set_lhs (new_stmt
, new_temp
);
5252 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5255 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5257 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5262 /* In case the vectorization factor (VF) is bigger than the number
5263 of elements that we can fit in a vectype (nunits), we have to
5264 generate more than one vector stmt - i.e - we need to "unroll"
5265 the vector stmt by a factor VF/nunits. */
5266 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5268 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
5270 if (code
== WIDEN_LSHIFT_EXPR
)
5272 int oprnds_size
= vec_oprnds0
.length ();
5273 vec_oprnds1
.create (oprnds_size
);
5274 for (i
= 0; i
< oprnds_size
; ++i
)
5275 vec_oprnds1
.quick_push (op1
);
5277 /* Arguments are ready. Create the new vector stmts. */
5278 for (i
= multi_step_cvt
; i
>= 0; i
--)
5280 tree this_dest
= vec_dsts
[i
];
5281 enum tree_code c1
= code1
, c2
= code2
;
5282 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5287 if (known_eq (nunits_out
, nunits_in
))
5288 vect_create_half_widening_stmts (vinfo
, &vec_oprnds0
,
5289 &vec_oprnds1
, stmt_info
,
5293 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5294 &vec_oprnds1
, stmt_info
,
5299 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5304 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5305 new_temp
= make_ssa_name (vec_dest
);
5306 new_stmt
= gimple_build_assign (new_temp
, codecvt1
, vop0
);
5307 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5310 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
5313 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5315 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5320 /* In case the vectorization factor (VF) is bigger than the number
5321 of elements that we can fit in a vectype (nunits), we have to
5322 generate more than one vector stmt - i.e - we need to "unroll"
5323 the vector stmt by a factor VF/nunits. */
5324 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5326 /* Arguments are ready. Create the new vector stmts. */
5328 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5330 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5331 new_temp
= make_ssa_name (vec_dest
);
5333 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5334 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5335 vec_oprnds0
[i
] = new_temp
;
5338 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5340 stmt_info
, vec_dsts
, gsi
,
5345 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5347 vec_oprnds0
.release ();
5348 vec_oprnds1
.release ();
5349 interm_types
.release ();
5354 /* Return true if we can assume from the scalar form of STMT_INFO that
5355 neither the scalar nor the vector forms will generate code. STMT_INFO
5356 is known not to involve a data reference. */
5359 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5361 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5365 tree lhs
= gimple_assign_lhs (stmt
);
5366 tree_code code
= gimple_assign_rhs_code (stmt
);
5367 tree rhs
= gimple_assign_rhs1 (stmt
);
5369 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5372 if (CONVERT_EXPR_CODE_P (code
))
5373 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5378 /* Function vectorizable_assignment.
5380 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5381 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5382 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5383 Return true if STMT_INFO is vectorizable in this way. */
5386 vectorizable_assignment (vec_info
*vinfo
,
5387 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5388 gimple
**vec_stmt
, slp_tree slp_node
,
5389 stmt_vector_for_cost
*cost_vec
)
5394 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5396 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5400 vec
<tree
> vec_oprnds
= vNULL
;
5402 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5403 enum tree_code code
;
5406 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5409 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5413 /* Is vectorizable assignment? */
5414 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5418 scalar_dest
= gimple_assign_lhs (stmt
);
5419 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5422 if (STMT_VINFO_DATA_REF (stmt_info
))
5425 code
= gimple_assign_rhs_code (stmt
);
5426 if (!(gimple_assign_single_p (stmt
)
5427 || code
== PAREN_EXPR
5428 || CONVERT_EXPR_CODE_P (code
)))
5431 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5432 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5434 /* Multiple types in SLP are handled by creating the appropriate number of
5435 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5440 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5442 gcc_assert (ncopies
>= 1);
5445 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5446 &dt
[0], &vectype_in
))
5448 if (dump_enabled_p ())
5449 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5450 "use not simple.\n");
5454 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5456 /* We can handle NOP_EXPR conversions that do not change the number
5457 of elements or the vector size. */
5458 if ((CONVERT_EXPR_CODE_P (code
)
5459 || code
== VIEW_CONVERT_EXPR
)
5461 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5462 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5463 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5466 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
5467 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5469 if (dump_enabled_p ())
5470 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5471 "can't convert between boolean and non "
5472 "boolean vectors %T\n", TREE_TYPE (op
));
5477 /* We do not handle bit-precision changes. */
5478 if ((CONVERT_EXPR_CODE_P (code
)
5479 || code
== VIEW_CONVERT_EXPR
)
5480 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5481 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5482 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5483 /* But a conversion that does not change the bit-pattern is ok. */
5484 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5485 > TYPE_PRECISION (TREE_TYPE (op
)))
5486 && TYPE_UNSIGNED (TREE_TYPE (op
))))
5488 if (dump_enabled_p ())
5489 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5490 "type conversion to/from bit-precision "
5495 if (!vec_stmt
) /* transformation not required. */
5498 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
5500 if (dump_enabled_p ())
5501 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5502 "incompatible vector types for invariants\n");
5505 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5506 DUMP_VECT_SCOPE ("vectorizable_assignment");
5507 if (!vect_nop_conversion_p (stmt_info
))
5508 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5514 if (dump_enabled_p ())
5515 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5518 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5521 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
5523 /* Arguments are ready. create the new vector stmt. */
5524 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5526 if (CONVERT_EXPR_CODE_P (code
)
5527 || code
== VIEW_CONVERT_EXPR
)
5528 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5529 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5530 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5531 gimple_assign_set_lhs (new_stmt
, new_temp
);
5532 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5534 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5536 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5539 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5541 vec_oprnds
.release ();
5546 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5547 either as shift by a scalar or by a vector. */
5550 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5553 machine_mode vec_mode
;
5558 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5562 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5564 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5566 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5568 || (optab_handler (optab
, TYPE_MODE (vectype
))
5569 == CODE_FOR_nothing
))
5573 vec_mode
= TYPE_MODE (vectype
);
5574 icode
= (int) optab_handler (optab
, vec_mode
);
5575 if (icode
== CODE_FOR_nothing
)
5582 /* Function vectorizable_shift.
5584 Check if STMT_INFO performs a shift operation that can be vectorized.
5585 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5586 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5587 Return true if STMT_INFO is vectorizable in this way. */
5590 vectorizable_shift (vec_info
*vinfo
,
5591 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5592 gimple
**vec_stmt
, slp_tree slp_node
,
5593 stmt_vector_for_cost
*cost_vec
)
5597 tree op0
, op1
= NULL
;
5598 tree vec_oprnd1
= NULL_TREE
;
5600 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5601 enum tree_code code
;
5602 machine_mode vec_mode
;
5606 machine_mode optab_op2_mode
;
5607 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5609 poly_uint64 nunits_in
;
5610 poly_uint64 nunits_out
;
5615 vec
<tree
> vec_oprnds0
= vNULL
;
5616 vec
<tree
> vec_oprnds1
= vNULL
;
5619 bool scalar_shift_arg
= true;
5620 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5621 bool incompatible_op1_vectype_p
= false;
5623 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5626 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5627 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5631 /* Is STMT a vectorizable binary/unary operation? */
5632 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5636 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5639 code
= gimple_assign_rhs_code (stmt
);
5641 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5642 || code
== RROTATE_EXPR
))
5645 scalar_dest
= gimple_assign_lhs (stmt
);
5646 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5647 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5649 if (dump_enabled_p ())
5650 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5651 "bit-precision shifts not supported.\n");
5656 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5657 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5659 if (dump_enabled_p ())
5660 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5661 "use not simple.\n");
5664 /* If op0 is an external or constant def, infer the vector type
5665 from the scalar type. */
5667 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
5669 gcc_assert (vectype
);
5672 if (dump_enabled_p ())
5673 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5674 "no vectype for scalar type\n");
5678 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5679 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5680 if (maybe_ne (nunits_out
, nunits_in
))
5683 stmt_vec_info op1_def_stmt_info
;
5685 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
5686 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
5688 if (dump_enabled_p ())
5689 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5690 "use not simple.\n");
5694 /* Multiple types in SLP are handled by creating the appropriate number of
5695 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5700 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5702 gcc_assert (ncopies
>= 1);
5704 /* Determine whether the shift amount is a vector, or scalar. If the
5705 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5707 if ((dt
[1] == vect_internal_def
5708 || dt
[1] == vect_induction_def
5709 || dt
[1] == vect_nested_cycle
)
5711 scalar_shift_arg
= false;
5712 else if (dt
[1] == vect_constant_def
5713 || dt
[1] == vect_external_def
5714 || dt
[1] == vect_internal_def
)
5716 /* In SLP, need to check whether the shift count is the same,
5717 in loops if it is a constant or invariant, it is always
5721 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5722 stmt_vec_info slpstmt_info
;
5724 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5726 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5727 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5728 scalar_shift_arg
= false;
5731 /* For internal SLP defs we have to make sure we see scalar stmts
5732 for all vector elements.
5733 ??? For different vectors we could resort to a different
5734 scalar shift operand but code-generation below simply always
5736 if (dt
[1] == vect_internal_def
5737 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5739 scalar_shift_arg
= false;
5742 /* If the shift amount is computed by a pattern stmt we cannot
5743 use the scalar amount directly thus give up and use a vector
5745 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5746 scalar_shift_arg
= false;
5750 if (dump_enabled_p ())
5751 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5752 "operand mode requires invariant argument.\n");
5756 /* Vector shifted by vector. */
5757 bool was_scalar_shift_arg
= scalar_shift_arg
;
5758 if (!scalar_shift_arg
)
5760 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5761 if (dump_enabled_p ())
5762 dump_printf_loc (MSG_NOTE
, vect_location
,
5763 "vector/vector shift/rotate found.\n");
5766 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
5768 incompatible_op1_vectype_p
5769 = (op1_vectype
== NULL_TREE
5770 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5771 TYPE_VECTOR_SUBPARTS (vectype
))
5772 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5773 if (incompatible_op1_vectype_p
5775 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
5776 || slp_op1
->refcnt
!= 1))
5778 if (dump_enabled_p ())
5779 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5780 "unusable type for last operand in"
5781 " vector/vector shift/rotate.\n");
5785 /* See if the machine has a vector shifted by scalar insn and if not
5786 then see if it has a vector shifted by vector insn. */
5789 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5791 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5793 if (dump_enabled_p ())
5794 dump_printf_loc (MSG_NOTE
, vect_location
,
5795 "vector/scalar shift/rotate found.\n");
5799 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5801 && (optab_handler (optab
, TYPE_MODE (vectype
))
5802 != CODE_FOR_nothing
))
5804 scalar_shift_arg
= false;
5806 if (dump_enabled_p ())
5807 dump_printf_loc (MSG_NOTE
, vect_location
,
5808 "vector/vector shift/rotate found.\n");
5811 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
5815 /* Unlike the other binary operators, shifts/rotates have
5816 the rhs being int, instead of the same type as the lhs,
5817 so make sure the scalar is the right type if we are
5818 dealing with vectors of long long/long/short/char. */
5819 incompatible_op1_vectype_p
5821 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
5823 if (incompatible_op1_vectype_p
5824 && dt
[1] == vect_internal_def
)
5826 if (dump_enabled_p ())
5827 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5828 "unusable type for last operand in"
5829 " vector/vector shift/rotate.\n");
5836 /* Supportable by target? */
5839 if (dump_enabled_p ())
5840 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5844 vec_mode
= TYPE_MODE (vectype
);
5845 icode
= (int) optab_handler (optab
, vec_mode
);
5846 if (icode
== CODE_FOR_nothing
)
5848 if (dump_enabled_p ())
5849 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5850 "op not supported by target.\n");
5853 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5854 if (vect_emulated_vector_p (vectype
))
5857 if (!vec_stmt
) /* transformation not required. */
5860 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5861 || ((!scalar_shift_arg
|| dt
[1] == vect_internal_def
)
5862 && (!incompatible_op1_vectype_p
5863 || dt
[1] == vect_constant_def
)
5864 && !vect_maybe_update_slp_op_vectype
5866 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
5868 if (dump_enabled_p ())
5869 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5870 "incompatible vector types for invariants\n");
5873 /* Now adjust the constant shift amount in place. */
5875 && incompatible_op1_vectype_p
5876 && dt
[1] == vect_constant_def
)
5878 for (unsigned i
= 0;
5879 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
5881 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
5882 = fold_convert (TREE_TYPE (vectype
),
5883 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
5884 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
5888 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5889 DUMP_VECT_SCOPE ("vectorizable_shift");
5890 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
5891 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
5897 if (dump_enabled_p ())
5898 dump_printf_loc (MSG_NOTE
, vect_location
,
5899 "transform binary/unary operation.\n");
5901 if (incompatible_op1_vectype_p
&& !slp_node
)
5903 gcc_assert (!scalar_shift_arg
&& was_scalar_shift_arg
);
5904 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5905 if (dt
[1] != vect_constant_def
)
5906 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
5907 TREE_TYPE (vectype
), NULL
);
5911 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5913 if (scalar_shift_arg
&& dt
[1] != vect_internal_def
)
5915 /* Vector shl and shr insn patterns can be defined with scalar
5916 operand 2 (shift operand). In this case, use constant or loop
5917 invariant op1 directly, without extending it to vector mode
5919 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5920 if (!VECTOR_MODE_P (optab_op2_mode
))
5922 if (dump_enabled_p ())
5923 dump_printf_loc (MSG_NOTE
, vect_location
,
5924 "operand 1 using scalar mode.\n");
5926 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
5927 vec_oprnds1
.quick_push (vec_oprnd1
);
5928 /* Store vec_oprnd1 for every vector stmt to be created.
5929 We check during the analysis that all the shift arguments
5931 TODO: Allow different constants for different vector
5932 stmts generated for an SLP instance. */
5934 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
5935 vec_oprnds1
.quick_push (vec_oprnd1
);
5938 else if (!scalar_shift_arg
&& slp_node
&& incompatible_op1_vectype_p
)
5940 if (was_scalar_shift_arg
)
5942 /* If the argument was the same in all lanes create
5943 the correctly typed vector shift amount directly. */
5944 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5945 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
5946 !loop_vinfo
? gsi
: NULL
);
5947 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
5948 !loop_vinfo
? gsi
: NULL
);
5949 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5950 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
5951 vec_oprnds1
.quick_push (vec_oprnd1
);
5953 else if (dt
[1] == vect_constant_def
)
5954 /* The constant shift amount has been adjusted in place. */
5957 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
5960 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5961 (a special case for certain kind of vector shifts); otherwise,
5962 operand 1 should be of a vector type (the usual case). */
5963 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5965 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
5967 /* Arguments are ready. Create the new vector stmt. */
5968 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5970 /* For internal defs where we need to use a scalar shift arg
5971 extract the first lane. */
5972 if (scalar_shift_arg
&& dt
[1] == vect_internal_def
)
5974 vop1
= vec_oprnds1
[0];
5975 new_temp
= make_ssa_name (TREE_TYPE (TREE_TYPE (vop1
)));
5977 = gimple_build_assign (new_temp
,
5978 build3 (BIT_FIELD_REF
, TREE_TYPE (new_temp
),
5980 TYPE_SIZE (TREE_TYPE (new_temp
)),
5981 bitsize_zero_node
));
5982 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5986 vop1
= vec_oprnds1
[i
];
5987 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5988 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5989 gimple_assign_set_lhs (new_stmt
, new_temp
);
5990 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5992 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5994 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5998 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6000 vec_oprnds0
.release ();
6001 vec_oprnds1
.release ();
6007 /* Function vectorizable_operation.
6009 Check if STMT_INFO performs a binary, unary or ternary operation that can
6011 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6012 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6013 Return true if STMT_INFO is vectorizable in this way. */
6016 vectorizable_operation (vec_info
*vinfo
,
6017 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6018 gimple
**vec_stmt
, slp_tree slp_node
,
6019 stmt_vector_for_cost
*cost_vec
)
6023 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
6025 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6026 enum tree_code code
, orig_code
;
6027 machine_mode vec_mode
;
6031 bool target_support_p
;
6032 enum vect_def_type dt
[3]
6033 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
6035 poly_uint64 nunits_in
;
6036 poly_uint64 nunits_out
;
6038 int ncopies
, vec_num
;
6040 vec
<tree
> vec_oprnds0
= vNULL
;
6041 vec
<tree
> vec_oprnds1
= vNULL
;
6042 vec
<tree
> vec_oprnds2
= vNULL
;
6043 tree vop0
, vop1
, vop2
;
6044 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6046 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6049 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6053 /* Is STMT a vectorizable binary/unary operation? */
6054 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6058 /* Loads and stores are handled in vectorizable_{load,store}. */
6059 if (STMT_VINFO_DATA_REF (stmt_info
))
6062 orig_code
= code
= gimple_assign_rhs_code (stmt
);
6064 /* Shifts are handled in vectorizable_shift. */
6065 if (code
== LSHIFT_EXPR
6066 || code
== RSHIFT_EXPR
6067 || code
== LROTATE_EXPR
6068 || code
== RROTATE_EXPR
)
6071 /* Comparisons are handled in vectorizable_comparison. */
6072 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
6075 /* Conditions are handled in vectorizable_condition. */
6076 if (code
== COND_EXPR
)
6079 /* For pointer addition and subtraction, we should use the normal
6080 plus and minus for the vector operation. */
6081 if (code
== POINTER_PLUS_EXPR
)
6083 if (code
== POINTER_DIFF_EXPR
)
6086 /* Support only unary or binary operations. */
6087 op_type
= TREE_CODE_LENGTH (code
);
6088 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
6090 if (dump_enabled_p ())
6091 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6092 "num. args = %d (not unary/binary/ternary op).\n",
6097 scalar_dest
= gimple_assign_lhs (stmt
);
6098 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6100 /* Most operations cannot handle bit-precision types without extra
6102 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
6104 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
6105 /* Exception are bitwise binary operations. */
6106 && code
!= BIT_IOR_EXPR
6107 && code
!= BIT_XOR_EXPR
6108 && code
!= BIT_AND_EXPR
)
6110 if (dump_enabled_p ())
6111 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6112 "bit-precision arithmetic not supported.\n");
6117 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6118 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6120 if (dump_enabled_p ())
6121 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6122 "use not simple.\n");
6125 /* If op0 is an external or constant def, infer the vector type
6126 from the scalar type. */
6129 /* For boolean type we cannot determine vectype by
6130 invariant value (don't know whether it is a vector
6131 of booleans or vector of integers). We use output
6132 vectype because operations on boolean don't change
6134 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6136 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6138 if (dump_enabled_p ())
6139 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6140 "not supported operation on bool value.\n");
6143 vectype
= vectype_out
;
6146 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6150 gcc_assert (vectype
);
6153 if (dump_enabled_p ())
6154 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6155 "no vectype for scalar type %T\n",
6161 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6162 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6163 if (maybe_ne (nunits_out
, nunits_in
))
6166 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6167 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6168 if (op_type
== binary_op
|| op_type
== ternary_op
)
6170 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6171 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6173 if (dump_enabled_p ())
6174 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6175 "use not simple.\n");
6179 && maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype2
)))
6182 if (op_type
== ternary_op
)
6184 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6185 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6187 if (dump_enabled_p ())
6188 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6189 "use not simple.\n");
6193 && maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype3
)))
6197 /* Multiple types in SLP are handled by creating the appropriate number of
6198 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6203 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6207 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6211 gcc_assert (ncopies
>= 1);
6213 /* Reject attempts to combine mask types with nonmask types, e.g. if
6214 we have an AND between a (nonmask) boolean loaded from memory and
6215 a (mask) boolean result of a comparison.
6217 TODO: We could easily fix these cases up using pattern statements. */
6218 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6219 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6220 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6222 if (dump_enabled_p ())
6223 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6224 "mixed mask and nonmask vector types\n");
6228 /* Supportable by target? */
6230 vec_mode
= TYPE_MODE (vectype
);
6231 if (code
== MULT_HIGHPART_EXPR
)
6232 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6235 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6238 if (dump_enabled_p ())
6239 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6243 target_support_p
= (optab_handler (optab
, vec_mode
)
6244 != CODE_FOR_nothing
);
6247 bool using_emulated_vectors_p
= vect_emulated_vector_p (vectype
);
6248 if (!target_support_p
)
6250 if (dump_enabled_p ())
6251 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6252 "op not supported by target.\n");
6253 /* Check only during analysis. */
6254 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6255 || (!vec_stmt
&& !vect_can_vectorize_without_simd_p (code
)))
6257 if (dump_enabled_p ())
6258 dump_printf_loc (MSG_NOTE
, vect_location
,
6259 "proceeding using word mode.\n");
6260 using_emulated_vectors_p
= true;
6263 if (using_emulated_vectors_p
6264 && !vect_can_vectorize_without_simd_p (code
))
6266 if (dump_enabled_p ())
6267 dump_printf (MSG_NOTE
, "using word mode not possible.\n");
6271 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6272 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6273 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6275 if (!vec_stmt
) /* transformation not required. */
6277 /* If this operation is part of a reduction, a fully-masked loop
6278 should only change the active lanes of the reduction chain,
6279 keeping the inactive lanes as-is. */
6281 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
6284 if (cond_fn
== IFN_LAST
6285 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
6286 OPTIMIZE_FOR_SPEED
))
6288 if (dump_enabled_p ())
6289 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6290 "can't use a fully-masked loop because no"
6291 " conditional operation is available.\n");
6292 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
6295 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6299 /* Put types on constant and invariant SLP children. */
6301 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6302 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6303 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6305 if (dump_enabled_p ())
6306 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6307 "incompatible vector types for invariants\n");
6311 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6312 DUMP_VECT_SCOPE ("vectorizable_operation");
6313 vect_model_simple_cost (vinfo
, stmt_info
,
6314 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6315 if (using_emulated_vectors_p
)
6317 /* The above vect_model_simple_cost call handles constants
6318 in the prologue and (mis-)costs one of the stmts as
6319 vector stmt. See tree-vect-generic.cc:do_plus_minus/do_negate
6320 for the actual lowering that will be applied. */
6322 = slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
;
6336 record_stmt_cost (cost_vec
, n
, scalar_stmt
, stmt_info
, 0, vect_body
);
6343 if (dump_enabled_p ())
6344 dump_printf_loc (MSG_NOTE
, vect_location
,
6345 "transform binary/unary operation.\n");
6347 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6349 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6350 vectors with unsigned elements, but the result is signed. So, we
6351 need to compute the MINUS_EXPR into vectype temporary and
6352 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6353 tree vec_cvt_dest
= NULL_TREE
;
6354 if (orig_code
== POINTER_DIFF_EXPR
)
6356 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6357 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6361 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6363 /* In case the vectorization factor (VF) is bigger than the number
6364 of elements that we can fit in a vectype (nunits), we have to generate
6365 more than one vector stmt - i.e - we need to "unroll" the
6366 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6367 from one copy of the vector stmt to the next, in the field
6368 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6369 stages to find the correct vector defs to be used when vectorizing
6370 stmts that use the defs of the current stmt. The example below
6371 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6372 we need to create 4 vectorized stmts):
6374 before vectorization:
6375 RELATED_STMT VEC_STMT
6379 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6381 RELATED_STMT VEC_STMT
6382 VS1_0: vx0 = memref0 VS1_1 -
6383 VS1_1: vx1 = memref1 VS1_2 -
6384 VS1_2: vx2 = memref2 VS1_3 -
6385 VS1_3: vx3 = memref3 - -
6386 S1: x = load - VS1_0
6389 step2: vectorize stmt S2 (done here):
6390 To vectorize stmt S2 we first need to find the relevant vector
6391 def for the first operand 'x'. This is, as usual, obtained from
6392 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6393 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6394 relevant vector def 'vx0'. Having found 'vx0' we can generate
6395 the vector stmt VS2_0, and as usual, record it in the
6396 STMT_VINFO_VEC_STMT of stmt S2.
6397 When creating the second copy (VS2_1), we obtain the relevant vector
6398 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6399 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6400 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6401 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6402 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6403 chain of stmts and pointers:
6404 RELATED_STMT VEC_STMT
6405 VS1_0: vx0 = memref0 VS1_1 -
6406 VS1_1: vx1 = memref1 VS1_2 -
6407 VS1_2: vx2 = memref2 VS1_3 -
6408 VS1_3: vx3 = memref3 - -
6409 S1: x = load - VS1_0
6410 VS2_0: vz0 = vx0 + v1 VS2_1 -
6411 VS2_1: vz1 = vx1 + v1 VS2_2 -
6412 VS2_2: vz2 = vx2 + v1 VS2_3 -
6413 VS2_3: vz3 = vx3 + v1 - -
6414 S2: z = x + 1 - VS2_0 */
6416 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6417 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
6418 /* Arguments are ready. Create the new vector stmt. */
6419 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6421 gimple
*new_stmt
= NULL
;
6422 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6423 ? vec_oprnds1
[i
] : NULL_TREE
);
6424 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
6425 if (masked_loop_p
&& reduc_idx
>= 0)
6427 /* Perform the operation on active elements only and take
6428 inactive elements from the reduction chain input. */
6430 vop2
= reduc_idx
== 1 ? vop1
: vop0
;
6431 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6433 gcall
*call
= gimple_build_call_internal (cond_fn
, 4, mask
,
6435 new_temp
= make_ssa_name (vec_dest
, call
);
6436 gimple_call_set_lhs (call
, new_temp
);
6437 gimple_call_set_nothrow (call
, true);
6438 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
6443 tree mask
= NULL_TREE
;
6444 /* When combining two masks check if either of them is elsewhere
6445 combined with a loop mask, if that's the case we can mark that the
6446 new combined mask doesn't need to be combined with a loop mask. */
6448 && code
== BIT_AND_EXPR
6449 && VECTOR_BOOLEAN_TYPE_P (vectype
))
6451 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op0
,
6454 mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6457 vop0
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
6461 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op1
,
6464 mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6467 vop1
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
6472 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
6473 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6474 gimple_assign_set_lhs (new_stmt
, new_temp
);
6475 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6476 if (using_emulated_vectors_p
)
6477 suppress_warning (new_stmt
, OPT_Wvector_operation_performance
);
6479 /* Enter the combined value into the vector cond hash so we don't
6480 AND it with a loop mask again. */
6482 loop_vinfo
->vec_cond_masked_set
.add ({ new_temp
, mask
});
6486 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6487 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6489 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6490 gimple_assign_set_lhs (new_stmt
, new_temp
);
6491 vect_finish_stmt_generation (vinfo
, stmt_info
,
6496 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6498 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6502 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6504 vec_oprnds0
.release ();
6505 vec_oprnds1
.release ();
6506 vec_oprnds2
.release ();
6511 /* A helper function to ensure data reference DR_INFO's base alignment. */
6514 ensure_base_align (dr_vec_info
*dr_info
)
6516 /* Alignment is only analyzed for the first element of a DR group,
6517 use that to look at base alignment we need to enforce. */
6518 if (STMT_VINFO_GROUPED_ACCESS (dr_info
->stmt
))
6519 dr_info
= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info
->stmt
));
6521 gcc_assert (dr_info
->misalignment
!= DR_MISALIGNMENT_UNINITIALIZED
);
6523 if (dr_info
->base_misaligned
)
6525 tree base_decl
= dr_info
->base_decl
;
6527 // We should only be able to increase the alignment of a base object if
6528 // we know what its new alignment should be at compile time.
6529 unsigned HOST_WIDE_INT align_base_to
=
6530 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6532 if (decl_in_symtab_p (base_decl
))
6533 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6534 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6536 SET_DECL_ALIGN (base_decl
, align_base_to
);
6537 DECL_USER_ALIGN (base_decl
) = 1;
6539 dr_info
->base_misaligned
= false;
6544 /* Function get_group_alias_ptr_type.
6546 Return the alias type for the group starting at FIRST_STMT_INFO. */
6549 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6551 struct data_reference
*first_dr
, *next_dr
;
6553 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6554 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6555 while (next_stmt_info
)
6557 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6558 if (get_alias_set (DR_REF (first_dr
))
6559 != get_alias_set (DR_REF (next_dr
)))
6561 if (dump_enabled_p ())
6562 dump_printf_loc (MSG_NOTE
, vect_location
,
6563 "conflicting alias set types.\n");
6564 return ptr_type_node
;
6566 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6568 return reference_alias_ptr_type (DR_REF (first_dr
));
6572 /* Function scan_operand_equal_p.
6574 Helper function for check_scan_store. Compare two references
6575 with .GOMP_SIMD_LANE bases. */
6578 scan_operand_equal_p (tree ref1
, tree ref2
)
6580 tree ref
[2] = { ref1
, ref2
};
6581 poly_int64 bitsize
[2], bitpos
[2];
6582 tree offset
[2], base
[2];
6583 for (int i
= 0; i
< 2; ++i
)
6586 int unsignedp
, reversep
, volatilep
= 0;
6587 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6588 &offset
[i
], &mode
, &unsignedp
,
6589 &reversep
, &volatilep
);
6590 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6592 if (TREE_CODE (base
[i
]) == MEM_REF
6593 && offset
[i
] == NULL_TREE
6594 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6596 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6597 if (is_gimple_assign (def_stmt
)
6598 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6599 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6600 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6602 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6604 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6605 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6610 if (!operand_equal_p (base
[0], base
[1], 0))
6612 if (maybe_ne (bitsize
[0], bitsize
[1]))
6614 if (offset
[0] != offset
[1])
6616 if (!offset
[0] || !offset
[1])
6618 if (!operand_equal_p (offset
[0], offset
[1], 0))
6621 for (int i
= 0; i
< 2; ++i
)
6623 step
[i
] = integer_one_node
;
6624 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6626 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6627 if (is_gimple_assign (def_stmt
)
6628 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6629 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6632 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6633 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6636 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6638 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6639 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6641 tree rhs1
= NULL_TREE
;
6642 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6644 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6645 if (gimple_assign_cast_p (def_stmt
))
6646 rhs1
= gimple_assign_rhs1 (def_stmt
);
6648 else if (CONVERT_EXPR_P (offset
[i
]))
6649 rhs1
= TREE_OPERAND (offset
[i
], 0);
6651 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6652 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6653 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6654 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6657 if (!operand_equal_p (offset
[0], offset
[1], 0)
6658 || !operand_equal_p (step
[0], step
[1], 0))
6666 enum scan_store_kind
{
6667 /* Normal permutation. */
6668 scan_store_kind_perm
,
6670 /* Whole vector left shift permutation with zero init. */
6671 scan_store_kind_lshift_zero
,
6673 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6674 scan_store_kind_lshift_cond
6677 /* Function check_scan_store.
6679 Verify if we can perform the needed permutations or whole vector shifts.
6680 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6681 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6682 to do at each step. */
6685 scan_store_can_perm_p (tree vectype
, tree init
,
6686 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6688 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6689 unsigned HOST_WIDE_INT nunits
;
6690 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6692 int units_log2
= exact_log2 (nunits
);
6693 if (units_log2
<= 0)
6697 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6698 for (i
= 0; i
<= units_log2
; ++i
)
6700 unsigned HOST_WIDE_INT j
, k
;
6701 enum scan_store_kind kind
= scan_store_kind_perm
;
6702 vec_perm_builder
sel (nunits
, nunits
, 1);
6703 sel
.quick_grow (nunits
);
6704 if (i
== units_log2
)
6706 for (j
= 0; j
< nunits
; ++j
)
6707 sel
[j
] = nunits
- 1;
6711 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6713 for (k
= 0; j
< nunits
; ++j
, ++k
)
6714 sel
[j
] = nunits
+ k
;
6716 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6717 if (!can_vec_perm_const_p (vec_mode
, vec_mode
, indices
))
6719 if (i
== units_log2
)
6722 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6724 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6726 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6727 /* Whole vector shifts shift in zeros, so if init is all zero
6728 constant, there is no need to do anything further. */
6729 if ((TREE_CODE (init
) != INTEGER_CST
6730 && TREE_CODE (init
) != REAL_CST
)
6731 || !initializer_zerop (init
))
6733 tree masktype
= truth_type_for (vectype
);
6734 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6736 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6739 kind
= whole_vector_shift_kind
;
6741 if (use_whole_vector
)
6743 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6744 use_whole_vector
->safe_grow_cleared (i
, true);
6745 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6746 use_whole_vector
->safe_push (kind
);
6754 /* Function check_scan_store.
6756 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6759 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
6760 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6761 vect_memory_access_type memory_access_type
)
6763 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6764 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6767 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6770 || memory_access_type
!= VMAT_CONTIGUOUS
6771 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6772 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6773 || loop_vinfo
== NULL
6774 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6775 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6776 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
6777 || !integer_zerop (DR_INIT (dr_info
->dr
))
6778 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6779 || !alias_sets_conflict_p (get_alias_set (vectype
),
6780 get_alias_set (TREE_TYPE (ref_type
))))
6782 if (dump_enabled_p ())
6783 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6784 "unsupported OpenMP scan store.\n");
6788 /* We need to pattern match code built by OpenMP lowering and simplified
6789 by following optimizations into something we can handle.
6790 #pragma omp simd reduction(inscan,+:r)
6794 #pragma omp scan inclusive (r)
6797 shall have body with:
6798 // Initialization for input phase, store the reduction initializer:
6799 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6800 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6802 // Actual input phase:
6804 r.0_5 = D.2042[_20];
6807 // Initialization for scan phase:
6808 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6814 // Actual scan phase:
6816 r.1_8 = D.2042[_20];
6818 The "omp simd array" variable D.2042 holds the privatized copy used
6819 inside of the loop and D.2043 is another one that holds copies of
6820 the current original list item. The separate GOMP_SIMD_LANE ifn
6821 kinds are there in order to allow optimizing the initializer store
6822 and combiner sequence, e.g. if it is originally some C++ish user
6823 defined reduction, but allow the vectorizer to pattern recognize it
6824 and turn into the appropriate vectorized scan.
6826 For exclusive scan, this is slightly different:
6827 #pragma omp simd reduction(inscan,+:r)
6831 #pragma omp scan exclusive (r)
6834 shall have body with:
6835 // Initialization for input phase, store the reduction initializer:
6836 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6837 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6839 // Actual input phase:
6841 r.0_5 = D.2042[_20];
6844 // Initialization for scan phase:
6845 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6851 // Actual scan phase:
6853 r.1_8 = D.2044[_20];
6856 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6858 /* Match the D.2042[_21] = 0; store above. Just require that
6859 it is a constant or external definition store. */
6860 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6863 if (dump_enabled_p ())
6864 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6865 "unsupported OpenMP scan initializer store.\n");
6869 if (! loop_vinfo
->scan_map
)
6870 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6871 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6872 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6875 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6877 /* These stores can be vectorized normally. */
6881 if (rhs_dt
!= vect_internal_def
)
6884 if (dump_enabled_p ())
6885 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6886 "unsupported OpenMP scan combiner pattern.\n");
6890 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6891 tree rhs
= gimple_assign_rhs1 (stmt
);
6892 if (TREE_CODE (rhs
) != SSA_NAME
)
6895 gimple
*other_store_stmt
= NULL
;
6896 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6897 bool inscan_var_store
6898 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6900 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6902 if (!inscan_var_store
)
6904 use_operand_p use_p
;
6905 imm_use_iterator iter
;
6906 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6908 gimple
*use_stmt
= USE_STMT (use_p
);
6909 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6911 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6912 || !is_gimple_assign (use_stmt
)
6913 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6915 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6917 other_store_stmt
= use_stmt
;
6919 if (other_store_stmt
== NULL
)
6921 rhs
= gimple_assign_lhs (other_store_stmt
);
6922 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6926 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6928 use_operand_p use_p
;
6929 imm_use_iterator iter
;
6930 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6932 gimple
*use_stmt
= USE_STMT (use_p
);
6933 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6935 if (other_store_stmt
)
6937 other_store_stmt
= use_stmt
;
6943 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6944 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6945 || !is_gimple_assign (def_stmt
)
6946 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6949 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6950 /* For pointer addition, we should use the normal plus for the vector
6954 case POINTER_PLUS_EXPR
:
6957 case MULT_HIGHPART_EXPR
:
6962 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6965 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6966 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6967 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6970 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6971 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6972 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6973 || !gimple_assign_load_p (load1_stmt
)
6974 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6975 || !gimple_assign_load_p (load2_stmt
))
6978 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6979 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6980 if (load1_stmt_info
== NULL
6981 || load2_stmt_info
== NULL
6982 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
6983 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
6984 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
6985 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6988 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
6990 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6991 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
6992 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
6994 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6996 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7000 use_operand_p use_p
;
7001 imm_use_iterator iter
;
7002 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
7004 gimple
*use_stmt
= USE_STMT (use_p
);
7005 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
7007 if (other_store_stmt
)
7009 other_store_stmt
= use_stmt
;
7013 if (other_store_stmt
== NULL
)
7015 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
7016 || !gimple_store_p (other_store_stmt
))
7019 stmt_vec_info other_store_stmt_info
7020 = loop_vinfo
->lookup_stmt (other_store_stmt
);
7021 if (other_store_stmt_info
== NULL
7022 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
7023 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7026 gimple
*stmt1
= stmt
;
7027 gimple
*stmt2
= other_store_stmt
;
7028 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7029 std::swap (stmt1
, stmt2
);
7030 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7031 gimple_assign_rhs1 (load2_stmt
)))
7033 std::swap (rhs1
, rhs2
);
7034 std::swap (load1_stmt
, load2_stmt
);
7035 std::swap (load1_stmt_info
, load2_stmt_info
);
7037 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7038 gimple_assign_rhs1 (load1_stmt
)))
7041 tree var3
= NULL_TREE
;
7042 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
7043 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
7044 gimple_assign_rhs1 (load2_stmt
)))
7046 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7048 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7049 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
7050 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
7052 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7053 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
7054 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
7055 || lookup_attribute ("omp simd inscan exclusive",
7056 DECL_ATTRIBUTES (var3
)))
7060 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
7061 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
7062 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
7065 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7066 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
7067 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
7068 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
7069 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7070 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
7073 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7074 std::swap (var1
, var2
);
7076 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7078 if (!lookup_attribute ("omp simd inscan exclusive",
7079 DECL_ATTRIBUTES (var1
)))
7084 if (loop_vinfo
->scan_map
== NULL
)
7086 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7090 /* The IL is as expected, now check if we can actually vectorize it.
7097 should be vectorized as (where _40 is the vectorized rhs
7098 from the D.2042[_21] = 0; store):
7099 _30 = MEM <vector(8) int> [(int *)&D.2043];
7100 _31 = MEM <vector(8) int> [(int *)&D.2042];
7101 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7103 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7104 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7106 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7107 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7108 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7110 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7111 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7113 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7114 MEM <vector(8) int> [(int *)&D.2043] = _39;
7115 MEM <vector(8) int> [(int *)&D.2042] = _38;
7122 should be vectorized as (where _40 is the vectorized rhs
7123 from the D.2042[_21] = 0; store):
7124 _30 = MEM <vector(8) int> [(int *)&D.2043];
7125 _31 = MEM <vector(8) int> [(int *)&D.2042];
7126 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7127 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7129 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7130 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7131 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7133 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7134 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7135 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7137 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7138 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7141 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7142 MEM <vector(8) int> [(int *)&D.2044] = _39;
7143 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7144 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7145 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7146 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7149 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7150 if (units_log2
== -1)
7157 /* Function vectorizable_scan_store.
7159 Helper of vectorizable_score, arguments like on vectorizable_store.
7160 Handle only the transformation, checking is done in check_scan_store. */
7163 vectorizable_scan_store (vec_info
*vinfo
,
7164 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7165 gimple
**vec_stmt
, int ncopies
)
7167 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7168 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7169 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7170 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7172 if (dump_enabled_p ())
7173 dump_printf_loc (MSG_NOTE
, vect_location
,
7174 "transform scan store. ncopies = %d\n", ncopies
);
7176 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7177 tree rhs
= gimple_assign_rhs1 (stmt
);
7178 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7180 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7181 bool inscan_var_store
7182 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7184 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7186 use_operand_p use_p
;
7187 imm_use_iterator iter
;
7188 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7190 gimple
*use_stmt
= USE_STMT (use_p
);
7191 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7193 rhs
= gimple_assign_lhs (use_stmt
);
7198 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7199 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7200 if (code
== POINTER_PLUS_EXPR
)
7202 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7203 && commutative_tree_code (code
));
7204 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7205 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7206 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7207 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7208 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7209 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7210 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7211 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7212 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7213 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7214 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7216 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7218 std::swap (rhs1
, rhs2
);
7219 std::swap (var1
, var2
);
7220 std::swap (load1_dr_info
, load2_dr_info
);
7223 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7226 unsigned HOST_WIDE_INT nunits
;
7227 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7229 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7230 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7231 gcc_assert (units_log2
> 0);
7232 auto_vec
<tree
, 16> perms
;
7233 perms
.quick_grow (units_log2
+ 1);
7234 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7235 for (int i
= 0; i
<= units_log2
; ++i
)
7237 unsigned HOST_WIDE_INT j
, k
;
7238 vec_perm_builder
sel (nunits
, nunits
, 1);
7239 sel
.quick_grow (nunits
);
7240 if (i
== units_log2
)
7241 for (j
= 0; j
< nunits
; ++j
)
7242 sel
[j
] = nunits
- 1;
7245 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7247 for (k
= 0; j
< nunits
; ++j
, ++k
)
7248 sel
[j
] = nunits
+ k
;
7250 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7251 if (!use_whole_vector
.is_empty ()
7252 && use_whole_vector
[i
] != scan_store_kind_perm
)
7254 if (zero_vec
== NULL_TREE
)
7255 zero_vec
= build_zero_cst (vectype
);
7256 if (masktype
== NULL_TREE
7257 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7258 masktype
= truth_type_for (vectype
);
7259 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7262 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7265 tree vec_oprnd1
= NULL_TREE
;
7266 tree vec_oprnd2
= NULL_TREE
;
7267 tree vec_oprnd3
= NULL_TREE
;
7268 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7269 tree dataref_offset
= build_int_cst (ref_type
, 0);
7270 tree bump
= vect_get_data_ptr_increment (vinfo
, dr_info
,
7271 vectype
, VMAT_CONTIGUOUS
);
7272 tree ldataref_ptr
= NULL_TREE
;
7273 tree orig
= NULL_TREE
;
7274 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7275 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7276 auto_vec
<tree
> vec_oprnds1
;
7277 auto_vec
<tree
> vec_oprnds2
;
7278 auto_vec
<tree
> vec_oprnds3
;
7279 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
7280 *init
, &vec_oprnds1
,
7281 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
7282 rhs2
, &vec_oprnds3
);
7283 for (int j
= 0; j
< ncopies
; j
++)
7285 vec_oprnd1
= vec_oprnds1
[j
];
7286 if (ldataref_ptr
== NULL
)
7287 vec_oprnd2
= vec_oprnds2
[j
];
7288 vec_oprnd3
= vec_oprnds3
[j
];
7291 else if (!inscan_var_store
)
7292 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7296 vec_oprnd2
= make_ssa_name (vectype
);
7297 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7298 unshare_expr (ldataref_ptr
),
7300 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7301 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7302 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7303 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7304 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7307 tree v
= vec_oprnd2
;
7308 for (int i
= 0; i
< units_log2
; ++i
)
7310 tree new_temp
= make_ssa_name (vectype
);
7311 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7313 && (use_whole_vector
[i
]
7314 != scan_store_kind_perm
))
7315 ? zero_vec
: vec_oprnd1
, v
,
7317 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7318 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7319 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7321 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7323 /* Whole vector shift shifted in zero bits, but if *init
7324 is not initializer_zerop, we need to replace those elements
7325 with elements from vec_oprnd1. */
7326 tree_vector_builder
vb (masktype
, nunits
, 1);
7327 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7328 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7329 ? boolean_false_node
: boolean_true_node
);
7331 tree new_temp2
= make_ssa_name (vectype
);
7332 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7333 new_temp
, vec_oprnd1
);
7334 vect_finish_stmt_generation (vinfo
, stmt_info
,
7336 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7337 new_temp
= new_temp2
;
7340 /* For exclusive scan, perform the perms[i] permutation once
7343 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7351 tree new_temp2
= make_ssa_name (vectype
);
7352 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7353 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7354 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7359 tree new_temp
= make_ssa_name (vectype
);
7360 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7361 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7362 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7364 tree last_perm_arg
= new_temp
;
7365 /* For exclusive scan, new_temp computed above is the exclusive scan
7366 prefix sum. Turn it into inclusive prefix sum for the broadcast
7367 of the last element into orig. */
7368 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7370 last_perm_arg
= make_ssa_name (vectype
);
7371 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7372 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7373 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7376 orig
= make_ssa_name (vectype
);
7377 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7378 last_perm_arg
, perms
[units_log2
]);
7379 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7380 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7382 if (!inscan_var_store
)
7384 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7385 unshare_expr (dataref_ptr
),
7387 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7388 g
= gimple_build_assign (data_ref
, new_temp
);
7389 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7390 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7394 if (inscan_var_store
)
7395 for (int j
= 0; j
< ncopies
; j
++)
7398 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7400 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7401 unshare_expr (dataref_ptr
),
7403 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7404 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7405 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7406 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7412 /* Function vectorizable_store.
7414 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7415 that can be vectorized.
7416 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7417 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7418 Return true if STMT_INFO is vectorizable in this way. */
7421 vectorizable_store (vec_info
*vinfo
,
7422 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7423 gimple
**vec_stmt
, slp_tree slp_node
,
7424 stmt_vector_for_cost
*cost_vec
)
7428 tree vec_oprnd
= NULL_TREE
;
7430 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7431 class loop
*loop
= NULL
;
7432 machine_mode vec_mode
;
7434 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7435 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7436 tree dataref_ptr
= NULL_TREE
;
7437 tree dataref_offset
= NULL_TREE
;
7438 gimple
*ptr_incr
= NULL
;
7441 stmt_vec_info first_stmt_info
;
7443 unsigned int group_size
, i
;
7444 vec
<tree
> oprnds
= vNULL
;
7445 vec
<tree
> result_chain
= vNULL
;
7446 vec
<tree
> vec_oprnds
= vNULL
;
7447 bool slp
= (slp_node
!= NULL
);
7448 unsigned int vec_num
;
7449 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
7451 gather_scatter_info gs_info
;
7453 vec_load_store_type vls_type
;
7456 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7459 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7463 /* Is vectorizable store? */
7465 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7466 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7468 tree scalar_dest
= gimple_assign_lhs (assign
);
7469 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7470 && is_pattern_stmt_p (stmt_info
))
7471 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7472 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7473 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7474 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7475 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7476 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7477 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7478 && TREE_CODE (scalar_dest
) != MEM_REF
)
7483 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7484 if (!call
|| !gimple_call_internal_p (call
))
7487 internal_fn ifn
= gimple_call_internal_fn (call
);
7488 if (!internal_store_fn_p (ifn
))
7491 if (slp_node
!= NULL
)
7493 if (dump_enabled_p ())
7494 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7495 "SLP of masked stores not supported.\n");
7499 int mask_index
= internal_fn_mask_index (ifn
);
7501 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
7502 &mask
, NULL
, &mask_dt
, &mask_vectype
))
7506 op
= vect_get_store_rhs (stmt_info
);
7508 /* Cannot have hybrid store SLP -- that would mean storing to the
7509 same location twice. */
7510 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7512 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7513 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7517 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7518 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7523 /* Multiple types in SLP are handled by creating the appropriate number of
7524 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7529 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7531 gcc_assert (ncopies
>= 1);
7533 /* FORNOW. This restriction should be relaxed. */
7534 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7536 if (dump_enabled_p ())
7537 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7538 "multiple types in nested loop.\n");
7542 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
7543 op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7546 elem_type
= TREE_TYPE (vectype
);
7547 vec_mode
= TYPE_MODE (vectype
);
7549 if (!STMT_VINFO_DATA_REF (stmt_info
))
7552 vect_memory_access_type memory_access_type
;
7553 enum dr_alignment_support alignment_support_scheme
;
7556 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, vls_type
,
7557 ncopies
, &memory_access_type
, &poffset
,
7558 &alignment_support_scheme
, &misalignment
, &gs_info
))
7563 if (memory_access_type
== VMAT_CONTIGUOUS
)
7565 if (!VECTOR_MODE_P (vec_mode
)
7566 || !can_vec_mask_load_store_p (vec_mode
,
7567 TYPE_MODE (mask_vectype
), false))
7570 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7571 && (memory_access_type
!= VMAT_GATHER_SCATTER
7572 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7574 if (dump_enabled_p ())
7575 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7576 "unsupported access type for masked store.\n");
7582 /* FORNOW. In some cases can vectorize even if data-type not supported
7583 (e.g. - array initialization with 0). */
7584 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7588 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7589 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7590 && memory_access_type
!= VMAT_GATHER_SCATTER
7591 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7594 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7595 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7596 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7600 first_stmt_info
= stmt_info
;
7601 first_dr_info
= dr_info
;
7602 group_size
= vec_num
= 1;
7605 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7607 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7608 memory_access_type
))
7612 if (!vec_stmt
) /* transformation not required. */
7614 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7617 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
7618 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
7619 vls_type
, group_size
,
7620 memory_access_type
, &gs_info
,
7624 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
7627 if (dump_enabled_p ())
7628 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7629 "incompatible vector types for invariants\n");
7633 if (dump_enabled_p ()
7634 && memory_access_type
!= VMAT_ELEMENTWISE
7635 && memory_access_type
!= VMAT_GATHER_SCATTER
7636 && alignment_support_scheme
!= dr_aligned
)
7637 dump_printf_loc (MSG_NOTE
, vect_location
,
7638 "Vectorizing an unaligned access.\n");
7640 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7641 vect_model_store_cost (vinfo
, stmt_info
, ncopies
,
7642 memory_access_type
, alignment_support_scheme
,
7643 misalignment
, vls_type
, slp_node
, cost_vec
);
7646 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7650 ensure_base_align (dr_info
);
7652 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7654 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7655 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7656 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7657 tree ptr
, var
, scale
, vec_mask
;
7658 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7659 tree mask_halfvectype
= mask_vectype
;
7660 edge pe
= loop_preheader_edge (loop
);
7663 enum { NARROW
, NONE
, WIDEN
} modifier
;
7664 poly_uint64 scatter_off_nunits
7665 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7667 if (known_eq (nunits
, scatter_off_nunits
))
7669 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7673 /* Currently gathers and scatters are only supported for
7674 fixed-length vectors. */
7675 unsigned int count
= scatter_off_nunits
.to_constant ();
7676 vec_perm_builder
sel (count
, count
, 1);
7677 for (i
= 0; i
< (unsigned int) count
; ++i
)
7678 sel
.quick_push (i
| (count
/ 2));
7680 vec_perm_indices
indices (sel
, 1, count
);
7681 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7683 gcc_assert (perm_mask
!= NULL_TREE
);
7685 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7689 /* Currently gathers and scatters are only supported for
7690 fixed-length vectors. */
7691 unsigned int count
= nunits
.to_constant ();
7692 vec_perm_builder
sel (count
, count
, 1);
7693 for (i
= 0; i
< (unsigned int) count
; ++i
)
7694 sel
.quick_push (i
| (count
/ 2));
7696 vec_perm_indices
indices (sel
, 2, count
);
7697 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7698 gcc_assert (perm_mask
!= NULL_TREE
);
7702 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7707 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7708 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7709 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7710 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7711 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7712 scaletype
= TREE_VALUE (arglist
);
7714 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7715 && TREE_CODE (rettype
) == VOID_TYPE
);
7717 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7718 if (!is_gimple_min_invariant (ptr
))
7720 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7721 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7722 gcc_assert (!new_bb
);
7725 if (mask
== NULL_TREE
)
7727 mask_arg
= build_int_cst (masktype
, -1);
7728 mask_arg
= vect_init_vector (vinfo
, stmt_info
,
7729 mask_arg
, masktype
, NULL
);
7732 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7734 auto_vec
<tree
> vec_oprnds0
;
7735 auto_vec
<tree
> vec_oprnds1
;
7736 auto_vec
<tree
> vec_masks
;
7739 tree mask_vectype
= truth_type_for (vectype
);
7740 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7742 ? ncopies
/ 2 : ncopies
,
7743 mask
, &vec_masks
, mask_vectype
);
7745 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7747 ? ncopies
/ 2 : ncopies
,
7748 gs_info
.offset
, &vec_oprnds0
);
7749 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7751 ? ncopies
/ 2 : ncopies
,
7753 for (j
= 0; j
< ncopies
; ++j
)
7755 if (modifier
== WIDEN
)
7758 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
7759 perm_mask
, stmt_info
, gsi
);
7761 op
= vec_oprnd0
= vec_oprnds0
[j
/ 2];
7762 src
= vec_oprnd1
= vec_oprnds1
[j
];
7764 mask_op
= vec_mask
= vec_masks
[j
];
7766 else if (modifier
== NARROW
)
7769 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
7770 perm_mask
, stmt_info
, gsi
);
7772 src
= vec_oprnd1
= vec_oprnds1
[j
/ 2];
7773 op
= vec_oprnd0
= vec_oprnds0
[j
];
7775 mask_op
= vec_mask
= vec_masks
[j
/ 2];
7779 op
= vec_oprnd0
= vec_oprnds0
[j
];
7780 src
= vec_oprnd1
= vec_oprnds1
[j
];
7782 mask_op
= vec_mask
= vec_masks
[j
];
7785 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7787 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7788 TYPE_VECTOR_SUBPARTS (srctype
)));
7789 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7790 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7792 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7793 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7797 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7799 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7800 TYPE_VECTOR_SUBPARTS (idxtype
)));
7801 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7802 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7804 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7805 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7813 if (modifier
== NARROW
)
7815 var
= vect_get_new_ssa_name (mask_halfvectype
,
7818 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7819 : VEC_UNPACK_LO_EXPR
,
7821 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7824 tree optype
= TREE_TYPE (mask_arg
);
7825 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7828 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7829 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7830 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7832 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7833 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7835 if (!useless_type_conversion_p (masktype
, utype
))
7837 gcc_assert (TYPE_PRECISION (utype
)
7838 <= TYPE_PRECISION (masktype
));
7839 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7840 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7841 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7847 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7848 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7850 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
7852 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7855 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7856 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
7858 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7859 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7864 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7866 /* We vectorize all the stmts of the interleaving group when we
7867 reach the last stmt in the group. */
7868 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7869 < DR_GROUP_SIZE (first_stmt_info
)
7878 grouped_store
= false;
7879 /* VEC_NUM is the number of vect stmts to be created for this
7881 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7882 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7883 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7884 == first_stmt_info
);
7885 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7886 op
= vect_get_store_rhs (first_stmt_info
);
7889 /* VEC_NUM is the number of vect stmts to be created for this
7891 vec_num
= group_size
;
7893 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7896 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7898 if (dump_enabled_p ())
7899 dump_printf_loc (MSG_NOTE
, vect_location
,
7900 "transform store. ncopies = %d\n", ncopies
);
7902 if (memory_access_type
== VMAT_ELEMENTWISE
7903 || memory_access_type
== VMAT_STRIDED_SLP
)
7905 gimple_stmt_iterator incr_gsi
;
7911 tree stride_base
, stride_step
, alias_off
;
7915 /* Checked by get_load_store_type. */
7916 unsigned int const_nunits
= nunits
.to_constant ();
7918 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7919 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7921 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
7923 = fold_build_pointer_plus
7924 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7925 size_binop (PLUS_EXPR
,
7926 convert_to_ptrofftype (dr_offset
),
7927 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7928 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7930 /* For a store with loop-invariant (but other than power-of-2)
7931 stride (i.e. not a grouped access) like so:
7933 for (i = 0; i < n; i += stride)
7936 we generate a new induction variable and new stores from
7937 the components of the (vectorized) rhs:
7939 for (j = 0; ; j += VF*stride)
7944 array[j + stride] = tmp2;
7948 unsigned nstores
= const_nunits
;
7950 tree ltype
= elem_type
;
7951 tree lvectype
= vectype
;
7954 if (group_size
< const_nunits
7955 && const_nunits
% group_size
== 0)
7957 nstores
= const_nunits
/ group_size
;
7959 ltype
= build_vector_type (elem_type
, group_size
);
7962 /* First check if vec_extract optab doesn't support extraction
7963 of vector elts directly. */
7964 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7966 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7967 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
7968 group_size
).exists (&vmode
)
7969 || (convert_optab_handler (vec_extract_optab
,
7970 TYPE_MODE (vectype
), vmode
)
7971 == CODE_FOR_nothing
))
7973 /* Try to avoid emitting an extract of vector elements
7974 by performing the extracts using an integer type of the
7975 same size, extracting from a vector of those and then
7976 re-interpreting it as the original vector type if
7979 = group_size
* GET_MODE_BITSIZE (elmode
);
7980 unsigned int lnunits
= const_nunits
/ group_size
;
7981 /* If we can't construct such a vector fall back to
7982 element extracts from the original vector type and
7983 element size stores. */
7984 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7985 && VECTOR_MODE_P (TYPE_MODE (vectype
))
7986 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
7987 lnunits
).exists (&vmode
)
7988 && (convert_optab_handler (vec_extract_optab
,
7990 != CODE_FOR_nothing
))
7994 ltype
= build_nonstandard_integer_type (lsize
, 1);
7995 lvectype
= build_vector_type (ltype
, nstores
);
7997 /* Else fall back to vector extraction anyway.
7998 Fewer stores are more important than avoiding spilling
7999 of the vector we extract from. Compared to the
8000 construction case in vectorizable_load no store-forwarding
8001 issue exists here for reasonable archs. */
8004 else if (group_size
>= const_nunits
8005 && group_size
% const_nunits
== 0)
8008 lnel
= const_nunits
;
8012 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
8013 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8016 ivstep
= stride_step
;
8017 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
8018 build_int_cst (TREE_TYPE (ivstep
), vf
));
8020 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8022 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8023 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8024 create_iv (stride_base
, ivstep
, NULL
,
8025 loop
, &incr_gsi
, insert_after
,
8027 incr
= gsi_stmt (incr_gsi
);
8029 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8031 alias_off
= build_int_cst (ref_type
, 0);
8032 stmt_vec_info next_stmt_info
= first_stmt_info
;
8033 for (g
= 0; g
< group_size
; g
++)
8035 running_off
= offvar
;
8038 tree size
= TYPE_SIZE_UNIT (ltype
);
8039 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
8041 tree newoff
= copy_ssa_name (running_off
, NULL
);
8042 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8044 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8045 running_off
= newoff
;
8048 op
= vect_get_store_rhs (next_stmt_info
);
8049 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
,
8051 unsigned int group_el
= 0;
8052 unsigned HOST_WIDE_INT
8053 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8054 for (j
= 0; j
< ncopies
; j
++)
8056 vec_oprnd
= vec_oprnds
[j
];
8057 /* Pun the vector to extract from if necessary. */
8058 if (lvectype
!= vectype
)
8060 tree tem
= make_ssa_name (lvectype
);
8062 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
8063 lvectype
, vec_oprnd
));
8064 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
8067 for (i
= 0; i
< nstores
; i
++)
8069 tree newref
, newoff
;
8070 gimple
*incr
, *assign
;
8071 tree size
= TYPE_SIZE (ltype
);
8072 /* Extract the i'th component. */
8073 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
8074 bitsize_int (i
), size
);
8075 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
8078 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
8082 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8084 newref
= build2 (MEM_REF
, ltype
,
8085 running_off
, this_off
);
8086 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
8088 /* And store it to *running_off. */
8089 assign
= gimple_build_assign (newref
, elem
);
8090 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
8094 || group_el
== group_size
)
8096 newoff
= copy_ssa_name (running_off
, NULL
);
8097 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8098 running_off
, stride_step
);
8099 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8101 running_off
= newoff
;
8104 if (g
== group_size
- 1
8107 if (j
== 0 && i
== 0)
8109 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
8113 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8114 vec_oprnds
.release ();
8122 auto_vec
<tree
> dr_chain (group_size
);
8123 oprnds
.create (group_size
);
8125 gcc_assert (alignment_support_scheme
);
8126 vec_loop_masks
*loop_masks
8127 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8128 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8130 vec_loop_lens
*loop_lens
8131 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
8132 ? &LOOP_VINFO_LENS (loop_vinfo
)
8135 /* Shouldn't go with length-based approach if fully masked. */
8136 gcc_assert (!loop_lens
|| !loop_masks
);
8138 /* Targets with store-lane instructions must not require explicit
8139 realignment. vect_supportable_dr_alignment always returns either
8140 dr_aligned or dr_unaligned_supported for masked operations. */
8141 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8144 || alignment_support_scheme
== dr_aligned
8145 || alignment_support_scheme
== dr_unaligned_supported
);
8147 tree offset
= NULL_TREE
;
8148 if (!known_eq (poffset
, 0))
8149 offset
= size_int (poffset
);
8152 tree vec_offset
= NULL_TREE
;
8153 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8155 aggr_type
= NULL_TREE
;
8158 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8160 aggr_type
= elem_type
;
8161 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8162 &bump
, &vec_offset
);
8166 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8167 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8169 aggr_type
= vectype
;
8170 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
8171 memory_access_type
);
8175 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8177 /* In case the vectorization factor (VF) is bigger than the number
8178 of elements that we can fit in a vectype (nunits), we have to generate
8179 more than one vector stmt - i.e - we need to "unroll" the
8180 vector stmt by a factor VF/nunits. */
8182 /* In case of interleaving (non-unit grouped access):
8189 We create vectorized stores starting from base address (the access of the
8190 first stmt in the chain (S2 in the above example), when the last store stmt
8191 of the chain (S4) is reached:
8194 VS2: &base + vec_size*1 = vx0
8195 VS3: &base + vec_size*2 = vx1
8196 VS4: &base + vec_size*3 = vx3
8198 Then permutation statements are generated:
8200 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8201 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8204 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8205 (the order of the data-refs in the output of vect_permute_store_chain
8206 corresponds to the order of scalar stmts in the interleaving chain - see
8207 the documentation of vect_permute_store_chain()).
8209 In case of both multiple types and interleaving, above vector stores and
8210 permutation stmts are created for every copy. The result vector stmts are
8211 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8212 STMT_VINFO_RELATED_STMT for the next copies.
8215 auto_vec
<tree
> vec_masks
;
8216 tree vec_mask
= NULL
;
8217 auto_vec
<tree
> vec_offsets
;
8218 auto_vec
<vec
<tree
> > gvec_oprnds
;
8219 gvec_oprnds
.safe_grow_cleared (group_size
, true);
8220 for (j
= 0; j
< ncopies
; j
++)
8227 /* Get vectorized arguments for SLP_NODE. */
8228 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1,
8230 vec_oprnd
= vec_oprnds
[0];
8234 /* For interleaved stores we collect vectorized defs for all the
8235 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8236 used as an input to vect_permute_store_chain().
8238 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8239 and OPRNDS are of size 1. */
8240 stmt_vec_info next_stmt_info
= first_stmt_info
;
8241 for (i
= 0; i
< group_size
; i
++)
8243 /* Since gaps are not supported for interleaved stores,
8244 DR_GROUP_SIZE is the exact number of stmts in the chain.
8245 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8246 that there is no interleaving, DR_GROUP_SIZE is 1,
8247 and only one iteration of the loop will be executed. */
8248 op
= vect_get_store_rhs (next_stmt_info
);
8249 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
8250 ncopies
, op
, &gvec_oprnds
[i
]);
8251 vec_oprnd
= gvec_oprnds
[i
][0];
8252 dr_chain
.quick_push (gvec_oprnds
[i
][0]);
8253 oprnds
.quick_push (gvec_oprnds
[i
][0]);
8254 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8258 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
8259 mask
, &vec_masks
, mask_vectype
);
8260 vec_mask
= vec_masks
[0];
8264 /* We should have catched mismatched types earlier. */
8265 gcc_assert (useless_type_conversion_p (vectype
,
8266 TREE_TYPE (vec_oprnd
)));
8267 bool simd_lane_access_p
8268 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8269 if (simd_lane_access_p
8271 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8272 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8273 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
8274 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8275 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8276 get_alias_set (TREE_TYPE (ref_type
))))
8278 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8279 dataref_offset
= build_int_cst (ref_type
, 0);
8281 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8283 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
8284 slp_node
, &gs_info
, &dataref_ptr
,
8286 vec_offset
= vec_offsets
[0];
8290 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
8291 simd_lane_access_p
? loop
: NULL
,
8292 offset
, &dummy
, gsi
, &ptr_incr
,
8293 simd_lane_access_p
, bump
);
8297 /* For interleaved stores we created vectorized defs for all the
8298 defs stored in OPRNDS in the previous iteration (previous copy).
8299 DR_CHAIN is then used as an input to vect_permute_store_chain().
8300 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8301 OPRNDS are of size 1. */
8302 for (i
= 0; i
< group_size
; i
++)
8304 vec_oprnd
= gvec_oprnds
[i
][j
];
8305 dr_chain
[i
] = gvec_oprnds
[i
][j
];
8306 oprnds
[i
] = gvec_oprnds
[i
][j
];
8309 vec_mask
= vec_masks
[j
];
8312 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8313 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8314 vec_offset
= vec_offsets
[j
];
8316 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8320 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8324 /* Get an array into which we can store the individual vectors. */
8325 vec_array
= create_vector_array (vectype
, vec_num
);
8327 /* Invalidate the current contents of VEC_ARRAY. This should
8328 become an RTL clobber too, which prevents the vector registers
8329 from being upward-exposed. */
8330 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8332 /* Store the individual vectors into the array. */
8333 for (i
= 0; i
< vec_num
; i
++)
8335 vec_oprnd
= dr_chain
[i
];
8336 write_vector_array (vinfo
, stmt_info
,
8337 gsi
, vec_oprnd
, vec_array
, i
);
8340 tree final_mask
= NULL
;
8342 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8345 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
8346 final_mask
, vec_mask
, gsi
);
8352 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8354 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8355 tree alias_ptr
= build_int_cst (ref_type
, align
);
8356 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8357 dataref_ptr
, alias_ptr
,
8358 final_mask
, vec_array
);
8363 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8364 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8365 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8367 gimple_call_set_lhs (call
, data_ref
);
8369 gimple_call_set_nothrow (call
, true);
8370 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8373 /* Record that VEC_ARRAY is now dead. */
8374 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8382 result_chain
.create (group_size
);
8384 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
8385 gsi
, &result_chain
);
8388 stmt_vec_info next_stmt_info
= first_stmt_info
;
8389 for (i
= 0; i
< vec_num
; i
++)
8392 unsigned HOST_WIDE_INT align
;
8394 tree final_mask
= NULL_TREE
;
8396 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8398 vectype
, vec_num
* j
+ i
);
8400 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
8401 final_mask
, vec_mask
, gsi
);
8403 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8405 tree scale
= size_int (gs_info
.scale
);
8408 call
= gimple_build_call_internal
8409 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8410 scale
, vec_oprnd
, final_mask
);
8412 call
= gimple_build_call_internal
8413 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8415 gimple_call_set_nothrow (call
, true);
8416 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8422 /* Bump the vector pointer. */
8423 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
8424 gsi
, stmt_info
, bump
);
8427 vec_oprnd
= vec_oprnds
[i
];
8428 else if (grouped_store
)
8429 /* For grouped stores vectorized defs are interleaved in
8430 vect_permute_store_chain(). */
8431 vec_oprnd
= result_chain
[i
];
8433 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8434 if (alignment_support_scheme
== dr_aligned
)
8436 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
8438 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
8442 misalign
= misalignment
;
8443 if (dataref_offset
== NULL_TREE
8444 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8445 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8447 align
= least_bit_hwi (misalign
| align
);
8449 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8451 tree perm_mask
= perm_mask_for_reverse (vectype
);
8452 tree perm_dest
= vect_create_destination_var
8453 (vect_get_store_rhs (stmt_info
), vectype
);
8454 tree new_temp
= make_ssa_name (perm_dest
);
8456 /* Generate the permute statement. */
8458 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8459 vec_oprnd
, perm_mask
);
8460 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8462 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8463 vec_oprnd
= new_temp
;
8466 /* Arguments are ready. Create the new vector stmt. */
8469 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8471 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8473 final_mask
, vec_oprnd
);
8474 gimple_call_set_nothrow (call
, true);
8475 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8481 = vect_get_loop_len (loop_vinfo
, loop_lens
,
8482 vec_num
* ncopies
, vec_num
* j
+ i
);
8483 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8484 machine_mode vmode
= TYPE_MODE (vectype
);
8485 opt_machine_mode new_ovmode
8486 = get_len_load_store_mode (vmode
, false);
8487 machine_mode new_vmode
= new_ovmode
.require ();
8488 /* Need conversion if it's wrapped with VnQI. */
8489 if (vmode
!= new_vmode
)
8492 = build_vector_type_for_mode (unsigned_intQI_type_node
,
8495 = vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
8497 = build1 (VIEW_CONVERT_EXPR
, new_vtype
, vec_oprnd
);
8499 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
,
8501 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
8506 signed char biasval
=
8507 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
8509 tree bias
= build_int_cst (intQI_type_node
, biasval
);
8511 = gimple_build_call_internal (IFN_LEN_STORE
, 5, dataref_ptr
,
8512 ptr
, final_len
, vec_oprnd
,
8514 gimple_call_set_nothrow (call
, true);
8515 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8520 data_ref
= fold_build2 (MEM_REF
, vectype
,
8524 : build_int_cst (ref_type
, 0));
8525 if (alignment_support_scheme
== dr_aligned
)
8528 TREE_TYPE (data_ref
)
8529 = build_aligned_type (TREE_TYPE (data_ref
),
8530 align
* BITS_PER_UNIT
);
8531 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8532 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
8533 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8539 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8540 if (!next_stmt_info
)
8547 *vec_stmt
= new_stmt
;
8548 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8552 for (i
= 0; i
< group_size
; ++i
)
8554 vec
<tree
> oprndsi
= gvec_oprnds
[i
];
8558 result_chain
.release ();
8559 vec_oprnds
.release ();
8564 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8565 VECTOR_CST mask. No checks are made that the target platform supports the
8566 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8567 vect_gen_perm_mask_checked. */
8570 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8574 poly_uint64 nunits
= sel
.length ();
8575 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8577 mask_type
= build_vector_type (ssizetype
, nunits
);
8578 return vec_perm_indices_to_tree (mask_type
, sel
);
8581 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8582 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8585 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8587 machine_mode vmode
= TYPE_MODE (vectype
);
8588 gcc_assert (can_vec_perm_const_p (vmode
, vmode
, sel
));
8589 return vect_gen_perm_mask_any (vectype
, sel
);
8592 /* Given a vector variable X and Y, that was generated for the scalar
8593 STMT_INFO, generate instructions to permute the vector elements of X and Y
8594 using permutation mask MASK_VEC, insert them at *GSI and return the
8595 permuted vector variable. */
8598 permute_vec_elements (vec_info
*vinfo
,
8599 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8600 gimple_stmt_iterator
*gsi
)
8602 tree vectype
= TREE_TYPE (x
);
8603 tree perm_dest
, data_ref
;
8606 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8607 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8608 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8610 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8611 data_ref
= make_ssa_name (perm_dest
);
8613 /* Generate the permute statement. */
8614 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8615 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8620 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8621 inserting them on the loops preheader edge. Returns true if we
8622 were successful in doing so (and thus STMT_INFO can be moved then),
8623 otherwise returns false. */
8626 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8632 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8634 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8635 if (!gimple_nop_p (def_stmt
)
8636 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8638 /* Make sure we don't need to recurse. While we could do
8639 so in simple cases when there are more complex use webs
8640 we don't have an easy way to preserve stmt order to fulfil
8641 dependencies within them. */
8644 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8646 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8648 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8649 if (!gimple_nop_p (def_stmt2
)
8650 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8660 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8662 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8663 if (!gimple_nop_p (def_stmt
)
8664 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8666 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8667 gsi_remove (&gsi
, false);
8668 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8675 /* vectorizable_load.
8677 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8678 that can be vectorized.
8679 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8680 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8681 Return true if STMT_INFO is vectorizable in this way. */
8684 vectorizable_load (vec_info
*vinfo
,
8685 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8686 gimple
**vec_stmt
, slp_tree slp_node
,
8687 stmt_vector_for_cost
*cost_vec
)
8690 tree vec_dest
= NULL
;
8691 tree data_ref
= NULL
;
8692 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8693 class loop
*loop
= NULL
;
8694 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8695 bool nested_in_vect_loop
= false;
8700 tree dataref_ptr
= NULL_TREE
;
8701 tree dataref_offset
= NULL_TREE
;
8702 gimple
*ptr_incr
= NULL
;
8705 unsigned int group_size
;
8706 poly_uint64 group_gap_adj
;
8707 tree msq
= NULL_TREE
, lsq
;
8708 tree realignment_token
= NULL_TREE
;
8710 vec
<tree
> dr_chain
= vNULL
;
8711 bool grouped_load
= false;
8712 stmt_vec_info first_stmt_info
;
8713 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8714 bool compute_in_loop
= false;
8715 class loop
*at_loop
;
8717 bool slp
= (slp_node
!= NULL
);
8718 bool slp_perm
= false;
8719 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8722 gather_scatter_info gs_info
;
8724 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8726 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8729 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8733 if (!STMT_VINFO_DATA_REF (stmt_info
))
8736 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8737 int mask_index
= -1;
8738 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8740 scalar_dest
= gimple_assign_lhs (assign
);
8741 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8744 tree_code code
= gimple_assign_rhs_code (assign
);
8745 if (code
!= ARRAY_REF
8746 && code
!= BIT_FIELD_REF
8747 && code
!= INDIRECT_REF
8748 && code
!= COMPONENT_REF
8749 && code
!= IMAGPART_EXPR
8750 && code
!= REALPART_EXPR
8752 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8757 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8758 if (!call
|| !gimple_call_internal_p (call
))
8761 internal_fn ifn
= gimple_call_internal_fn (call
);
8762 if (!internal_load_fn_p (ifn
))
8765 scalar_dest
= gimple_call_lhs (call
);
8769 mask_index
= internal_fn_mask_index (ifn
);
8770 /* ??? For SLP the mask operand is always last. */
8771 if (mask_index
>= 0 && slp_node
)
8772 mask_index
= SLP_TREE_CHILDREN (slp_node
).length () - 1;
8774 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
8775 &mask
, NULL
, &mask_dt
, &mask_vectype
))
8779 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8780 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8784 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8785 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8786 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8791 /* Multiple types in SLP are handled by creating the appropriate number of
8792 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8797 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8799 gcc_assert (ncopies
>= 1);
8801 /* FORNOW. This restriction should be relaxed. */
8802 if (nested_in_vect_loop
&& ncopies
> 1)
8804 if (dump_enabled_p ())
8805 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8806 "multiple types in nested loop.\n");
8810 /* Invalidate assumptions made by dependence analysis when vectorization
8811 on the unrolled body effectively re-orders stmts. */
8813 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8814 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8815 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8817 if (dump_enabled_p ())
8818 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8819 "cannot perform implicit CSE when unrolling "
8820 "with negative dependence distance\n");
8824 elem_type
= TREE_TYPE (vectype
);
8825 mode
= TYPE_MODE (vectype
);
8827 /* FORNOW. In some cases can vectorize even if data-type not supported
8828 (e.g. - data copies). */
8829 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8831 if (dump_enabled_p ())
8832 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8833 "Aligned load, but unsupported type.\n");
8837 /* Check if the load is a part of an interleaving chain. */
8838 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8840 grouped_load
= true;
8842 gcc_assert (!nested_in_vect_loop
);
8843 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8845 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8846 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8848 /* Refuse non-SLP vectorization of SLP-only groups. */
8849 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8851 if (dump_enabled_p ())
8852 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8853 "cannot vectorize load in non-SLP mode.\n");
8857 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8863 /* In BB vectorization we may not actually use a loaded vector
8864 accessing elements in excess of DR_GROUP_SIZE. */
8865 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8866 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
8867 unsigned HOST_WIDE_INT nunits
;
8868 unsigned j
, k
, maxk
= 0;
8869 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
8872 tree vectype
= SLP_TREE_VECTYPE (slp_node
);
8873 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
8874 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
8876 if (dump_enabled_p ())
8877 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8878 "BB vectorization with gaps at the end of "
8879 "a load is not supported\n");
8886 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
8889 if (dump_enabled_p ())
8890 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
8892 "unsupported load permutation\n");
8897 /* Invalidate assumptions made by dependence analysis when vectorization
8898 on the unrolled body effectively re-orders stmts. */
8899 if (!PURE_SLP_STMT (stmt_info
)
8900 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8901 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8902 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8904 if (dump_enabled_p ())
8905 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8906 "cannot perform implicit CSE when performing "
8907 "group loads with negative dependence distance\n");
8914 vect_memory_access_type memory_access_type
;
8915 enum dr_alignment_support alignment_support_scheme
;
8918 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, VLS_LOAD
,
8919 ncopies
, &memory_access_type
, &poffset
,
8920 &alignment_support_scheme
, &misalignment
, &gs_info
))
8925 if (memory_access_type
== VMAT_CONTIGUOUS
)
8927 machine_mode vec_mode
= TYPE_MODE (vectype
);
8928 if (!VECTOR_MODE_P (vec_mode
)
8929 || !can_vec_mask_load_store_p (vec_mode
,
8930 TYPE_MODE (mask_vectype
), true))
8933 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8934 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8936 if (dump_enabled_p ())
8937 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8938 "unsupported access type for masked load.\n");
8941 else if (memory_access_type
== VMAT_GATHER_SCATTER
8942 && gs_info
.ifn
== IFN_LAST
8945 if (dump_enabled_p ())
8946 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8947 "unsupported masked emulated gather.\n");
8952 if (!vec_stmt
) /* transformation not required. */
8956 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
8959 if (dump_enabled_p ())
8960 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8961 "incompatible vector types for invariants\n");
8966 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8969 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
8970 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
8971 VLS_LOAD
, group_size
,
8972 memory_access_type
, &gs_info
,
8975 if (dump_enabled_p ()
8976 && memory_access_type
!= VMAT_ELEMENTWISE
8977 && memory_access_type
!= VMAT_GATHER_SCATTER
8978 && alignment_support_scheme
!= dr_aligned
)
8979 dump_printf_loc (MSG_NOTE
, vect_location
,
8980 "Vectorizing an unaligned access.\n");
8982 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
8983 vect_model_load_cost (vinfo
, stmt_info
, ncopies
, vf
, memory_access_type
,
8984 alignment_support_scheme
, misalignment
,
8985 &gs_info
, slp_node
, cost_vec
);
8990 gcc_assert (memory_access_type
8991 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8993 if (dump_enabled_p ())
8994 dump_printf_loc (MSG_NOTE
, vect_location
,
8995 "transform load. ncopies = %d\n", ncopies
);
8999 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
9000 ensure_base_align (dr_info
);
9002 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
9004 vect_build_gather_load_calls (vinfo
,
9005 stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
9009 if (memory_access_type
== VMAT_INVARIANT
)
9011 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
9012 /* If we have versioned for aliasing or the loop doesn't
9013 have any data dependencies that would preclude this,
9014 then we are sure this is a loop invariant load and
9015 thus we can insert it on the preheader edge. */
9016 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
9017 && !nested_in_vect_loop
9018 && hoist_defs_of_uses (stmt_info
, loop
));
9021 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
9022 if (dump_enabled_p ())
9023 dump_printf_loc (MSG_NOTE
, vect_location
,
9024 "hoisting out of the vectorized loop: %G", stmt
);
9025 scalar_dest
= copy_ssa_name (scalar_dest
);
9026 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
9027 gsi_insert_on_edge_immediate
9028 (loop_preheader_edge (loop
),
9029 gimple_build_assign (scalar_dest
, rhs
));
9031 /* These copies are all equivalent, but currently the representation
9032 requires a separate STMT_VINFO_VEC_STMT for each one. */
9033 gimple_stmt_iterator gsi2
= *gsi
;
9035 for (j
= 0; j
< ncopies
; j
++)
9038 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
9041 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
9043 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9045 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9049 *vec_stmt
= new_stmt
;
9050 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9056 if (memory_access_type
== VMAT_ELEMENTWISE
9057 || memory_access_type
== VMAT_STRIDED_SLP
)
9059 gimple_stmt_iterator incr_gsi
;
9064 vec
<constructor_elt
, va_gc
> *v
= NULL
;
9065 tree stride_base
, stride_step
, alias_off
;
9066 /* Checked by get_load_store_type. */
9067 unsigned int const_nunits
= nunits
.to_constant ();
9068 unsigned HOST_WIDE_INT cst_offset
= 0;
9071 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
));
9072 gcc_assert (!nested_in_vect_loop
);
9076 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9077 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9081 first_stmt_info
= stmt_info
;
9082 first_dr_info
= dr_info
;
9084 if (slp
&& grouped_load
)
9086 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9087 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9093 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
9094 * vect_get_place_in_interleaving_chain (stmt_info
,
9097 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
9100 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
9102 = fold_build_pointer_plus
9103 (DR_BASE_ADDRESS (first_dr_info
->dr
),
9104 size_binop (PLUS_EXPR
,
9105 convert_to_ptrofftype (dr_offset
),
9106 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
9107 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
9109 /* For a load with loop-invariant (but other than power-of-2)
9110 stride (i.e. not a grouped access) like so:
9112 for (i = 0; i < n; i += stride)
9115 we generate a new induction variable and new accesses to
9116 form a new vector (or vectors, depending on ncopies):
9118 for (j = 0; ; j += VF*stride)
9120 tmp2 = array[j + stride];
9122 vectemp = {tmp1, tmp2, ...}
9125 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
9126 build_int_cst (TREE_TYPE (stride_step
), vf
));
9128 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
9130 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
9131 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
9132 create_iv (stride_base
, ivstep
, NULL
,
9133 loop
, &incr_gsi
, insert_after
,
9136 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
9138 running_off
= offvar
;
9139 alias_off
= build_int_cst (ref_type
, 0);
9140 int nloads
= const_nunits
;
9142 tree ltype
= TREE_TYPE (vectype
);
9143 tree lvectype
= vectype
;
9144 auto_vec
<tree
> dr_chain
;
9145 if (memory_access_type
== VMAT_STRIDED_SLP
)
9147 if (group_size
< const_nunits
)
9149 /* First check if vec_init optab supports construction from vector
9150 elts directly. Otherwise avoid emitting a constructor of
9151 vector elements by performing the loads using an integer type
9152 of the same size, constructing a vector of those and then
9153 re-interpreting it as the original vector type. This avoids a
9154 huge runtime penalty due to the general inability to perform
9155 store forwarding from smaller stores to a larger load. */
9158 = vector_vector_composition_type (vectype
,
9159 const_nunits
/ group_size
,
9161 if (vtype
!= NULL_TREE
)
9163 nloads
= const_nunits
/ group_size
;
9172 lnel
= const_nunits
;
9175 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
9177 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9178 else if (nloads
== 1)
9183 /* For SLP permutation support we need to load the whole group,
9184 not only the number of vector stmts the permutation result
9188 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9190 unsigned int const_vf
= vf
.to_constant ();
9191 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
9192 dr_chain
.create (ncopies
);
9195 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9197 unsigned int group_el
= 0;
9198 unsigned HOST_WIDE_INT
9199 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
9200 for (j
= 0; j
< ncopies
; j
++)
9203 vec_alloc (v
, nloads
);
9204 gimple
*new_stmt
= NULL
;
9205 for (i
= 0; i
< nloads
; i
++)
9207 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
9208 group_el
* elsz
+ cst_offset
);
9209 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
9210 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9211 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
9212 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9214 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9215 gimple_assign_lhs (new_stmt
));
9219 || group_el
== group_size
)
9221 tree newoff
= copy_ssa_name (running_off
);
9222 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
9223 running_off
, stride_step
);
9224 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
9226 running_off
= newoff
;
9232 tree vec_inv
= build_constructor (lvectype
, v
);
9233 new_temp
= vect_init_vector (vinfo
, stmt_info
,
9234 vec_inv
, lvectype
, gsi
);
9235 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9236 if (lvectype
!= vectype
)
9238 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
9240 build1 (VIEW_CONVERT_EXPR
,
9241 vectype
, new_temp
));
9242 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9249 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
9251 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9256 *vec_stmt
= new_stmt
;
9257 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9263 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
9269 if (memory_access_type
== VMAT_GATHER_SCATTER
9270 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
9271 grouped_load
= false;
9275 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9276 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9277 /* For SLP vectorization we directly vectorize a subchain
9278 without permutation. */
9279 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9280 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9281 /* For BB vectorization always use the first stmt to base
9282 the data ref pointer on. */
9284 first_stmt_info_for_drptr
9285 = vect_find_first_scalar_stmt_in_slp (slp_node
);
9287 /* Check if the chain of loads is already vectorized. */
9288 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
9289 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9290 ??? But we can only do so if there is exactly one
9291 as we have no way to get at the rest. Leave the CSE
9293 ??? With the group load eventually participating
9294 in multiple different permutations (having multiple
9295 slp nodes which refer to the same group) the CSE
9296 is even wrong code. See PR56270. */
9299 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9302 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9305 /* VEC_NUM is the number of vect stmts to be created for this group. */
9308 grouped_load
= false;
9309 /* If an SLP permutation is from N elements to N elements,
9310 and if one vector holds a whole number of N, we can load
9311 the inputs to the permutation in the same way as an
9312 unpermuted sequence. In other cases we need to load the
9313 whole group, not only the number of vector stmts the
9314 permutation result fits in. */
9315 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
9317 && (group_size
!= scalar_lanes
9318 || !multiple_p (nunits
, group_size
)))
9320 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9321 variable VF; see vect_transform_slp_perm_load. */
9322 unsigned int const_vf
= vf
.to_constant ();
9323 unsigned int const_nunits
= nunits
.to_constant ();
9324 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
9325 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
9329 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9331 = group_size
- scalar_lanes
;
9335 vec_num
= group_size
;
9337 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9341 first_stmt_info
= stmt_info
;
9342 first_dr_info
= dr_info
;
9343 group_size
= vec_num
= 1;
9345 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
9347 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9350 gcc_assert (alignment_support_scheme
);
9351 vec_loop_masks
*loop_masks
9352 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
9353 ? &LOOP_VINFO_MASKS (loop_vinfo
)
9355 vec_loop_lens
*loop_lens
9356 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
9357 ? &LOOP_VINFO_LENS (loop_vinfo
)
9360 /* Shouldn't go with length-based approach if fully masked. */
9361 gcc_assert (!loop_lens
|| !loop_masks
);
9363 /* Targets with store-lane instructions must not require explicit
9364 realignment. vect_supportable_dr_alignment always returns either
9365 dr_aligned or dr_unaligned_supported for masked operations. */
9366 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
9369 || alignment_support_scheme
== dr_aligned
9370 || alignment_support_scheme
== dr_unaligned_supported
);
9372 /* In case the vectorization factor (VF) is bigger than the number
9373 of elements that we can fit in a vectype (nunits), we have to generate
9374 more than one vector stmt - i.e - we need to "unroll" the
9375 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9376 from one copy of the vector stmt to the next, in the field
9377 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9378 stages to find the correct vector defs to be used when vectorizing
9379 stmts that use the defs of the current stmt. The example below
9380 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9381 need to create 4 vectorized stmts):
9383 before vectorization:
9384 RELATED_STMT VEC_STMT
9388 step 1: vectorize stmt S1:
9389 We first create the vector stmt VS1_0, and, as usual, record a
9390 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9391 Next, we create the vector stmt VS1_1, and record a pointer to
9392 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9393 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9395 RELATED_STMT VEC_STMT
9396 VS1_0: vx0 = memref0 VS1_1 -
9397 VS1_1: vx1 = memref1 VS1_2 -
9398 VS1_2: vx2 = memref2 VS1_3 -
9399 VS1_3: vx3 = memref3 - -
9400 S1: x = load - VS1_0
9404 /* In case of interleaving (non-unit grouped access):
9411 Vectorized loads are created in the order of memory accesses
9412 starting from the access of the first stmt of the chain:
9415 VS2: vx1 = &base + vec_size*1
9416 VS3: vx3 = &base + vec_size*2
9417 VS4: vx4 = &base + vec_size*3
9419 Then permutation statements are generated:
9421 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9422 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9425 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9426 (the order of the data-refs in the output of vect_permute_load_chain
9427 corresponds to the order of scalar stmts in the interleaving chain - see
9428 the documentation of vect_permute_load_chain()).
9429 The generation of permutation stmts and recording them in
9430 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9432 In case of both multiple types and interleaving, the vector loads and
9433 permutation stmts above are created for every copy. The result vector
9434 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9435 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9437 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9438 on a target that supports unaligned accesses (dr_unaligned_supported)
9439 we generate the following code:
9443 p = p + indx * vectype_size;
9448 Otherwise, the data reference is potentially unaligned on a target that
9449 does not support unaligned accesses (dr_explicit_realign_optimized) -
9450 then generate the following code, in which the data in each iteration is
9451 obtained by two vector loads, one from the previous iteration, and one
9452 from the current iteration:
9454 msq_init = *(floor(p1))
9455 p2 = initial_addr + VS - 1;
9456 realignment_token = call target_builtin;
9459 p2 = p2 + indx * vectype_size
9461 vec_dest = realign_load (msq, lsq, realignment_token)
9466 /* If the misalignment remains the same throughout the execution of the
9467 loop, we can create the init_addr and permutation mask at the loop
9468 preheader. Otherwise, it needs to be created inside the loop.
9469 This can only occur when vectorizing memory accesses in the inner-loop
9470 nested within an outer-loop that is being vectorized. */
9472 if (nested_in_vect_loop
9473 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9474 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9476 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9477 compute_in_loop
= true;
9480 bool diff_first_stmt_info
9481 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
9483 tree offset
= NULL_TREE
;
9484 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9485 || alignment_support_scheme
== dr_explicit_realign
)
9486 && !compute_in_loop
)
9488 /* If we have different first_stmt_info, we can't set up realignment
9489 here, since we can't guarantee first_stmt_info DR has been
9490 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9491 distance from first_stmt_info DR instead as below. */
9492 if (!diff_first_stmt_info
)
9493 msq
= vect_setup_realignment (vinfo
,
9494 first_stmt_info
, gsi
, &realignment_token
,
9495 alignment_support_scheme
, NULL_TREE
,
9497 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9499 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9500 offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9502 gcc_assert (!first_stmt_info_for_drptr
);
9508 if (!known_eq (poffset
, 0))
9510 ? size_binop (PLUS_EXPR
, offset
, size_int (poffset
))
9511 : size_int (poffset
));
9514 tree vec_offset
= NULL_TREE
;
9515 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9517 aggr_type
= NULL_TREE
;
9520 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9522 aggr_type
= elem_type
;
9523 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9524 &bump
, &vec_offset
);
9528 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9529 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9531 aggr_type
= vectype
;
9532 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
9533 memory_access_type
);
9536 auto_vec
<tree
> vec_offsets
;
9537 auto_vec
<tree
> vec_masks
;
9541 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[mask_index
],
9544 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
, mask
,
9545 &vec_masks
, mask_vectype
);
9547 tree vec_mask
= NULL_TREE
;
9548 poly_uint64 group_elt
= 0;
9549 for (j
= 0; j
< ncopies
; j
++)
9551 /* 1. Create the vector or array pointer update chain. */
9554 bool simd_lane_access_p
9555 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9556 if (simd_lane_access_p
9557 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9558 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9559 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9560 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9561 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9562 get_alias_set (TREE_TYPE (ref_type
)))
9563 && (alignment_support_scheme
== dr_aligned
9564 || alignment_support_scheme
== dr_unaligned_supported
))
9566 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9567 dataref_offset
= build_int_cst (ref_type
, 0);
9569 else if (diff_first_stmt_info
)
9572 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
9573 aggr_type
, at_loop
, offset
, &dummy
,
9574 gsi
, &ptr_incr
, simd_lane_access_p
,
9576 /* Adjust the pointer by the difference to first_stmt. */
9577 data_reference_p ptrdr
9578 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9580 = fold_convert (sizetype
,
9581 size_binop (MINUS_EXPR
,
9582 DR_INIT (first_dr_info
->dr
),
9584 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9586 if (alignment_support_scheme
== dr_explicit_realign
)
9588 msq
= vect_setup_realignment (vinfo
,
9589 first_stmt_info_for_drptr
, gsi
,
9591 alignment_support_scheme
,
9592 dataref_ptr
, &at_loop
);
9593 gcc_assert (!compute_in_loop
);
9596 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9598 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
9599 slp_node
, &gs_info
, &dataref_ptr
,
9604 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9606 offset
, &dummy
, gsi
, &ptr_incr
,
9607 simd_lane_access_p
, bump
);
9609 vec_mask
= vec_masks
[0];
9614 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9616 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9617 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9620 vec_mask
= vec_masks
[j
];
9623 if (grouped_load
|| slp_perm
)
9624 dr_chain
.create (vec_num
);
9626 gimple
*new_stmt
= NULL
;
9627 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9631 vec_array
= create_vector_array (vectype
, vec_num
);
9633 tree final_mask
= NULL_TREE
;
9635 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9638 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
9639 final_mask
, vec_mask
, gsi
);
9645 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9647 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
9648 tree alias_ptr
= build_int_cst (ref_type
, align
);
9649 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9650 dataref_ptr
, alias_ptr
,
9656 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9657 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9658 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9660 gimple_call_set_lhs (call
, vec_array
);
9661 gimple_call_set_nothrow (call
, true);
9662 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9665 /* Extract each vector into an SSA_NAME. */
9666 for (i
= 0; i
< vec_num
; i
++)
9668 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
9670 dr_chain
.quick_push (new_temp
);
9673 /* Record the mapping between SSA_NAMEs and statements. */
9674 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
9676 /* Record that VEC_ARRAY is now dead. */
9677 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
9681 for (i
= 0; i
< vec_num
; i
++)
9683 tree final_mask
= NULL_TREE
;
9685 && memory_access_type
!= VMAT_INVARIANT
)
9686 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9688 vectype
, vec_num
* j
+ i
);
9690 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
9691 final_mask
, vec_mask
, gsi
);
9693 if (i
> 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9694 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9695 gsi
, stmt_info
, bump
);
9697 /* 2. Create the vector-load in the loop. */
9698 switch (alignment_support_scheme
)
9701 case dr_unaligned_supported
:
9703 unsigned int misalign
;
9704 unsigned HOST_WIDE_INT align
;
9706 if (memory_access_type
== VMAT_GATHER_SCATTER
9707 && gs_info
.ifn
!= IFN_LAST
)
9709 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9710 vec_offset
= vec_offsets
[vec_num
* j
+ i
];
9711 tree zero
= build_zero_cst (vectype
);
9712 tree scale
= size_int (gs_info
.scale
);
9715 call
= gimple_build_call_internal
9716 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
9717 vec_offset
, scale
, zero
, final_mask
);
9719 call
= gimple_build_call_internal
9720 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
9721 vec_offset
, scale
, zero
);
9722 gimple_call_set_nothrow (call
, true);
9724 data_ref
= NULL_TREE
;
9727 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9729 /* Emulated gather-scatter. */
9730 gcc_assert (!final_mask
);
9731 unsigned HOST_WIDE_INT const_nunits
9732 = nunits
.to_constant ();
9733 unsigned HOST_WIDE_INT const_offset_nunits
9734 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
)
9736 vec
<constructor_elt
, va_gc
> *ctor_elts
;
9737 vec_alloc (ctor_elts
, const_nunits
);
9738 gimple_seq stmts
= NULL
;
9739 /* We support offset vectors with more elements
9740 than the data vector for now. */
9741 unsigned HOST_WIDE_INT factor
9742 = const_offset_nunits
/ const_nunits
;
9743 vec_offset
= vec_offsets
[j
/ factor
];
9744 unsigned elt_offset
= (j
% factor
) * const_nunits
;
9745 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
9746 tree scale
= size_int (gs_info
.scale
);
9748 = get_object_alignment (DR_REF (first_dr_info
->dr
));
9749 tree ltype
= build_aligned_type (TREE_TYPE (vectype
),
9751 for (unsigned k
= 0; k
< const_nunits
; ++k
)
9753 tree boff
= size_binop (MULT_EXPR
,
9754 TYPE_SIZE (idx_type
),
9757 tree idx
= gimple_build (&stmts
, BIT_FIELD_REF
,
9758 idx_type
, vec_offset
,
9759 TYPE_SIZE (idx_type
),
9761 idx
= gimple_convert (&stmts
, sizetype
, idx
);
9762 idx
= gimple_build (&stmts
, MULT_EXPR
,
9763 sizetype
, idx
, scale
);
9764 tree ptr
= gimple_build (&stmts
, PLUS_EXPR
,
9765 TREE_TYPE (dataref_ptr
),
9767 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
9768 tree elt
= make_ssa_name (TREE_TYPE (vectype
));
9769 tree ref
= build2 (MEM_REF
, ltype
, ptr
,
9770 build_int_cst (ref_type
, 0));
9771 new_stmt
= gimple_build_assign (elt
, ref
);
9772 gimple_seq_add_stmt (&stmts
, new_stmt
);
9773 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
, elt
);
9775 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
9776 new_stmt
= gimple_build_assign (NULL_TREE
,
9778 (vectype
, ctor_elts
));
9779 data_ref
= NULL_TREE
;
9784 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9785 if (alignment_support_scheme
== dr_aligned
)
9787 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
9789 align
= dr_alignment
9790 (vect_dr_behavior (vinfo
, first_dr_info
));
9794 misalign
= misalignment
;
9795 if (dataref_offset
== NULL_TREE
9796 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9797 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9799 align
= least_bit_hwi (misalign
| align
);
9803 tree ptr
= build_int_cst (ref_type
,
9804 align
* BITS_PER_UNIT
);
9806 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9809 gimple_call_set_nothrow (call
, true);
9811 data_ref
= NULL_TREE
;
9813 else if (loop_lens
&& memory_access_type
!= VMAT_INVARIANT
)
9816 = vect_get_loop_len (loop_vinfo
, loop_lens
,
9819 tree ptr
= build_int_cst (ref_type
,
9820 align
* BITS_PER_UNIT
);
9822 machine_mode vmode
= TYPE_MODE (vectype
);
9823 opt_machine_mode new_ovmode
9824 = get_len_load_store_mode (vmode
, true);
9825 machine_mode new_vmode
= new_ovmode
.require ();
9826 tree qi_type
= unsigned_intQI_type_node
;
9828 signed char biasval
=
9829 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
9831 tree bias
= build_int_cst (intQI_type_node
, biasval
);
9834 = gimple_build_call_internal (IFN_LEN_LOAD
, 4,
9837 gimple_call_set_nothrow (call
, true);
9839 data_ref
= NULL_TREE
;
9841 /* Need conversion if it's wrapped with VnQI. */
9842 if (vmode
!= new_vmode
)
9845 = build_vector_type_for_mode (qi_type
, new_vmode
);
9846 tree var
= vect_get_new_ssa_name (new_vtype
,
9848 gimple_set_lhs (call
, var
);
9849 vect_finish_stmt_generation (vinfo
, stmt_info
, call
,
9851 tree op
= build1 (VIEW_CONVERT_EXPR
, vectype
, var
);
9853 = gimple_build_assign (vec_dest
,
9854 VIEW_CONVERT_EXPR
, op
);
9859 tree ltype
= vectype
;
9860 tree new_vtype
= NULL_TREE
;
9861 unsigned HOST_WIDE_INT gap
9862 = DR_GROUP_GAP (first_stmt_info
);
9863 unsigned int vect_align
9864 = vect_known_alignment_in_bytes (first_dr_info
,
9866 unsigned int scalar_dr_size
9867 = vect_get_scalar_dr_size (first_dr_info
);
9868 /* If there's no peeling for gaps but we have a gap
9869 with slp loads then load the lower half of the
9870 vector only. See get_group_load_store_type for
9871 when we apply this optimization. */
9874 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9876 && known_eq (nunits
, (group_size
- gap
) * 2)
9877 && known_eq (nunits
, group_size
)
9878 && gap
>= (vect_align
/ scalar_dr_size
))
9882 = vector_vector_composition_type (vectype
, 2,
9884 if (new_vtype
!= NULL_TREE
)
9888 = (dataref_offset
? dataref_offset
9889 : build_int_cst (ref_type
, 0));
9890 if (ltype
!= vectype
9891 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9893 unsigned HOST_WIDE_INT gap_offset
9894 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
9895 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
9896 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
9899 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
9900 if (alignment_support_scheme
== dr_aligned
)
9903 TREE_TYPE (data_ref
)
9904 = build_aligned_type (TREE_TYPE (data_ref
),
9905 align
* BITS_PER_UNIT
);
9906 if (ltype
!= vectype
)
9908 vect_copy_ref_info (data_ref
,
9909 DR_REF (first_dr_info
->dr
));
9910 tree tem
= make_ssa_name (ltype
);
9911 new_stmt
= gimple_build_assign (tem
, data_ref
);
9912 vect_finish_stmt_generation (vinfo
, stmt_info
,
9915 vec
<constructor_elt
, va_gc
> *v
;
9917 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9919 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9920 build_zero_cst (ltype
));
9921 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9925 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9926 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9927 build_zero_cst (ltype
));
9929 gcc_assert (new_vtype
!= NULL_TREE
);
9930 if (new_vtype
== vectype
)
9931 new_stmt
= gimple_build_assign (
9932 vec_dest
, build_constructor (vectype
, v
));
9935 tree new_vname
= make_ssa_name (new_vtype
);
9936 new_stmt
= gimple_build_assign (
9937 new_vname
, build_constructor (new_vtype
, v
));
9938 vect_finish_stmt_generation (vinfo
, stmt_info
,
9940 new_stmt
= gimple_build_assign (
9941 vec_dest
, build1 (VIEW_CONVERT_EXPR
, vectype
,
9948 case dr_explicit_realign
:
9952 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9954 if (compute_in_loop
)
9955 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
9957 dr_explicit_realign
,
9960 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9961 ptr
= copy_ssa_name (dataref_ptr
);
9963 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9964 // For explicit realign the target alignment should be
9965 // known at compile time.
9966 unsigned HOST_WIDE_INT align
=
9967 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9968 new_stmt
= gimple_build_assign
9969 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
9971 (TREE_TYPE (dataref_ptr
),
9972 -(HOST_WIDE_INT
) align
));
9973 vect_finish_stmt_generation (vinfo
, stmt_info
,
9976 = build2 (MEM_REF
, vectype
, ptr
,
9977 build_int_cst (ref_type
, 0));
9978 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9979 vec_dest
= vect_create_destination_var (scalar_dest
,
9981 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9982 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9983 gimple_assign_set_lhs (new_stmt
, new_temp
);
9984 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
9985 vect_finish_stmt_generation (vinfo
, stmt_info
,
9989 bump
= size_binop (MULT_EXPR
, vs
,
9990 TYPE_SIZE_UNIT (elem_type
));
9991 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
9992 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
,
9994 new_stmt
= gimple_build_assign
9995 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
9997 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
9998 ptr
= copy_ssa_name (ptr
, new_stmt
);
9999 gimple_assign_set_lhs (new_stmt
, ptr
);
10000 vect_finish_stmt_generation (vinfo
, stmt_info
,
10003 = build2 (MEM_REF
, vectype
, ptr
,
10004 build_int_cst (ref_type
, 0));
10007 case dr_explicit_realign_optimized
:
10009 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
10010 new_temp
= copy_ssa_name (dataref_ptr
);
10012 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
10013 // We should only be doing this if we know the target
10014 // alignment at compile time.
10015 unsigned HOST_WIDE_INT align
=
10016 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
10017 new_stmt
= gimple_build_assign
10018 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
10019 build_int_cst (TREE_TYPE (dataref_ptr
),
10020 -(HOST_WIDE_INT
) align
));
10021 vect_finish_stmt_generation (vinfo
, stmt_info
,
10024 = build2 (MEM_REF
, vectype
, new_temp
,
10025 build_int_cst (ref_type
, 0));
10029 gcc_unreachable ();
10031 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10032 /* DATA_REF is null if we've already built the statement. */
10035 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
10036 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
10038 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
10039 gimple_set_lhs (new_stmt
, new_temp
);
10040 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10042 /* 3. Handle explicit realignment if necessary/supported.
10044 vec_dest = realign_load (msq, lsq, realignment_token) */
10045 if (alignment_support_scheme
== dr_explicit_realign_optimized
10046 || alignment_support_scheme
== dr_explicit_realign
)
10048 lsq
= gimple_assign_lhs (new_stmt
);
10049 if (!realignment_token
)
10050 realignment_token
= dataref_ptr
;
10051 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10052 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
10053 msq
, lsq
, realignment_token
);
10054 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
10055 gimple_assign_set_lhs (new_stmt
, new_temp
);
10056 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10058 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
10061 if (i
== vec_num
- 1 && j
== ncopies
- 1)
10062 add_phi_arg (phi
, lsq
,
10063 loop_latch_edge (containing_loop
),
10069 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
10071 tree perm_mask
= perm_mask_for_reverse (vectype
);
10072 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
10073 perm_mask
, stmt_info
, gsi
);
10074 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
10077 /* Collect vector loads and later create their permutation in
10078 vect_transform_grouped_load (). */
10079 if (grouped_load
|| slp_perm
)
10080 dr_chain
.quick_push (new_temp
);
10082 /* Store vector loads in the corresponding SLP_NODE. */
10083 if (slp
&& !slp_perm
)
10084 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10086 /* With SLP permutation we load the gaps as well, without
10087 we need to skip the gaps after we manage to fully load
10088 all elements. group_gap_adj is DR_GROUP_SIZE here. */
10089 group_elt
+= nunits
;
10090 if (maybe_ne (group_gap_adj
, 0U)
10092 && known_eq (group_elt
, group_size
- group_gap_adj
))
10094 poly_wide_int bump_val
10095 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
10097 if (tree_int_cst_sgn
10098 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
10099 bump_val
= -bump_val
;
10100 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
10101 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10102 gsi
, stmt_info
, bump
);
10106 /* Bump the vector pointer to account for a gap or for excess
10107 elements loaded for a permuted SLP load. */
10108 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
10110 poly_wide_int bump_val
10111 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
10113 if (tree_int_cst_sgn
10114 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
10115 bump_val
= -bump_val
;
10116 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
10117 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
10122 if (slp
&& !slp_perm
)
10128 /* For SLP we know we've seen all possible uses of dr_chain so
10129 direct vect_transform_slp_perm_load to DCE the unused parts.
10130 ??? This is a hack to prevent compile-time issues as seen
10131 in PR101120 and friends. */
10132 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
10133 gsi
, vf
, false, &n_perms
,
10141 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
10142 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
10144 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10148 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10151 dr_chain
.release ();
10154 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10159 /* Function vect_is_simple_cond.
10162 LOOP - the loop that is being vectorized.
10163 COND - Condition that is checked for simple use.
10166 *COMP_VECTYPE - the vector type for the comparison.
10167 *DTS - The def types for the arguments of the comparison
10169 Returns whether a COND can be vectorized. Checks whether
10170 condition operands are supportable using vec_is_simple_use. */
10173 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
10174 slp_tree slp_node
, tree
*comp_vectype
,
10175 enum vect_def_type
*dts
, tree vectype
)
10178 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10182 if (TREE_CODE (cond
) == SSA_NAME
10183 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
10185 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
10186 &slp_op
, &dts
[0], comp_vectype
)
10188 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
10193 if (!COMPARISON_CLASS_P (cond
))
10196 lhs
= TREE_OPERAND (cond
, 0);
10197 rhs
= TREE_OPERAND (cond
, 1);
10199 if (TREE_CODE (lhs
) == SSA_NAME
)
10201 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
10202 &lhs
, &slp_op
, &dts
[0], &vectype1
))
10205 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
10206 || TREE_CODE (lhs
) == FIXED_CST
)
10207 dts
[0] = vect_constant_def
;
10211 if (TREE_CODE (rhs
) == SSA_NAME
)
10213 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
10214 &rhs
, &slp_op
, &dts
[1], &vectype2
))
10217 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
10218 || TREE_CODE (rhs
) == FIXED_CST
)
10219 dts
[1] = vect_constant_def
;
10223 if (vectype1
&& vectype2
10224 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10225 TYPE_VECTOR_SUBPARTS (vectype2
)))
10228 *comp_vectype
= vectype1
? vectype1
: vectype2
;
10229 /* Invariant comparison. */
10230 if (! *comp_vectype
)
10232 tree scalar_type
= TREE_TYPE (lhs
);
10233 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10234 *comp_vectype
= truth_type_for (vectype
);
10237 /* If we can widen the comparison to match vectype do so. */
10238 if (INTEGRAL_TYPE_P (scalar_type
)
10240 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
10241 TYPE_SIZE (TREE_TYPE (vectype
))))
10242 scalar_type
= build_nonstandard_integer_type
10243 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
10244 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
10252 /* vectorizable_condition.
10254 Check if STMT_INFO is conditional modify expression that can be vectorized.
10255 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10256 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10259 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10261 Return true if STMT_INFO is vectorizable in this way. */
10264 vectorizable_condition (vec_info
*vinfo
,
10265 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10267 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10269 tree scalar_dest
= NULL_TREE
;
10270 tree vec_dest
= NULL_TREE
;
10271 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
10272 tree then_clause
, else_clause
;
10273 tree comp_vectype
= NULL_TREE
;
10274 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
10275 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
10278 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10279 enum vect_def_type dts
[4]
10280 = {vect_unknown_def_type
, vect_unknown_def_type
,
10281 vect_unknown_def_type
, vect_unknown_def_type
};
10285 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10287 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10288 vec
<tree
> vec_oprnds0
= vNULL
;
10289 vec
<tree
> vec_oprnds1
= vNULL
;
10290 vec
<tree
> vec_oprnds2
= vNULL
;
10291 vec
<tree
> vec_oprnds3
= vNULL
;
10293 bool masked
= false;
10295 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10298 /* Is vectorizable conditional operation? */
10299 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10303 code
= gimple_assign_rhs_code (stmt
);
10304 if (code
!= COND_EXPR
)
10307 stmt_vec_info reduc_info
= NULL
;
10308 int reduc_index
= -1;
10309 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
10311 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
10314 if (STMT_SLP_TYPE (stmt_info
))
10316 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
10317 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
10318 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
10319 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
10320 || reduc_index
!= -1);
10324 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10328 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10329 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10334 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10338 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10342 gcc_assert (ncopies
>= 1);
10343 if (for_reduction
&& ncopies
> 1)
10344 return false; /* FORNOW */
10346 cond_expr
= gimple_assign_rhs1 (stmt
);
10348 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
10349 &comp_vectype
, &dts
[0], vectype
)
10353 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
10354 slp_tree then_slp_node
, else_slp_node
;
10355 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
10356 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
10358 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
10359 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
10362 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
10365 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
10368 masked
= !COMPARISON_CLASS_P (cond_expr
);
10369 vec_cmp_type
= truth_type_for (comp_vectype
);
10371 if (vec_cmp_type
== NULL_TREE
)
10374 cond_code
= TREE_CODE (cond_expr
);
10377 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
10378 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
10381 /* For conditional reductions, the "then" value needs to be the candidate
10382 value calculated by this iteration while the "else" value needs to be
10383 the result carried over from previous iterations. If the COND_EXPR
10384 is the other way around, we need to swap it. */
10385 bool must_invert_cmp_result
= false;
10386 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
10389 must_invert_cmp_result
= true;
10392 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
10393 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
10394 if (new_code
== ERROR_MARK
)
10395 must_invert_cmp_result
= true;
10398 cond_code
= new_code
;
10399 /* Make sure we don't accidentally use the old condition. */
10400 cond_expr
= NULL_TREE
;
10403 std::swap (then_clause
, else_clause
);
10406 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
10408 /* Boolean values may have another representation in vectors
10409 and therefore we prefer bit operations over comparison for
10410 them (which also works for scalar masks). We store opcodes
10411 to use in bitop1 and bitop2. Statement is vectorized as
10412 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10413 depending on bitop1 and bitop2 arity. */
10417 bitop1
= BIT_NOT_EXPR
;
10418 bitop2
= BIT_AND_EXPR
;
10421 bitop1
= BIT_NOT_EXPR
;
10422 bitop2
= BIT_IOR_EXPR
;
10425 bitop1
= BIT_NOT_EXPR
;
10426 bitop2
= BIT_AND_EXPR
;
10427 std::swap (cond_expr0
, cond_expr1
);
10430 bitop1
= BIT_NOT_EXPR
;
10431 bitop2
= BIT_IOR_EXPR
;
10432 std::swap (cond_expr0
, cond_expr1
);
10435 bitop1
= BIT_XOR_EXPR
;
10438 bitop1
= BIT_XOR_EXPR
;
10439 bitop2
= BIT_NOT_EXPR
;
10444 cond_code
= SSA_NAME
;
10447 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
10448 && reduction_type
== EXTRACT_LAST_REDUCTION
10449 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
10451 if (dump_enabled_p ())
10452 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10453 "reduction comparison operation not supported.\n");
10459 if (bitop1
!= NOP_EXPR
)
10461 machine_mode mode
= TYPE_MODE (comp_vectype
);
10464 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
10465 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10468 if (bitop2
!= NOP_EXPR
)
10470 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
10472 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10477 vect_cost_for_stmt kind
= vector_stmt
;
10478 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10479 /* Count one reduction-like operation per vector. */
10480 kind
= vec_to_scalar
;
10481 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
))
10485 && (!vect_maybe_update_slp_op_vectype
10486 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
10488 && !vect_maybe_update_slp_op_vectype
10489 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
10490 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
10491 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
10493 if (dump_enabled_p ())
10494 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10495 "incompatible vector types for invariants\n");
10499 if (loop_vinfo
&& for_reduction
10500 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
10502 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10503 vect_record_loop_mask (loop_vinfo
, &LOOP_VINFO_MASKS (loop_vinfo
),
10504 ncopies
* vec_num
, vectype
, NULL
);
10505 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10506 else if (STMT_VINFO_DEF_TYPE (reduc_info
) != vect_nested_cycle
)
10508 if (dump_enabled_p ())
10509 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10510 "conditional reduction prevents the use"
10511 " of partial vectors.\n");
10512 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
10516 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10517 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10525 scalar_dest
= gimple_assign_lhs (stmt
);
10526 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10527 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10529 bool swap_cond_operands
= false;
10531 /* See whether another part of the vectorized code applies a loop
10532 mask to the condition, or to its inverse. */
10534 vec_loop_masks
*masks
= NULL
;
10535 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10537 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10538 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10541 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10542 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10543 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10546 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10547 tree_code orig_code
= cond
.code
;
10548 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10549 if (!masked
&& loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10551 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10552 cond_code
= cond
.code
;
10553 swap_cond_operands
= true;
10557 /* Try the inverse of the current mask. We check if the
10558 inverse mask is live and if so we generate a negate of
10559 the current mask such that we still honor NaNs. */
10560 cond
.inverted_p
= true;
10561 cond
.code
= orig_code
;
10562 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10564 bitop1
= orig_code
;
10565 bitop2
= BIT_NOT_EXPR
;
10566 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10567 cond_code
= cond
.code
;
10568 swap_cond_operands
= true;
10575 /* Handle cond expr. */
10577 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10578 cond_expr
, &vec_oprnds0
, comp_vectype
,
10579 then_clause
, &vec_oprnds2
, vectype
,
10580 reduction_type
!= EXTRACT_LAST_REDUCTION
10581 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10583 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10584 cond_expr0
, &vec_oprnds0
, comp_vectype
,
10585 cond_expr1
, &vec_oprnds1
, comp_vectype
,
10586 then_clause
, &vec_oprnds2
, vectype
,
10587 reduction_type
!= EXTRACT_LAST_REDUCTION
10588 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10590 /* Arguments are ready. Create the new vector stmt. */
10591 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10593 vec_then_clause
= vec_oprnds2
[i
];
10594 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10595 vec_else_clause
= vec_oprnds3
[i
];
10597 if (swap_cond_operands
)
10598 std::swap (vec_then_clause
, vec_else_clause
);
10601 vec_compare
= vec_cond_lhs
;
10604 vec_cond_rhs
= vec_oprnds1
[i
];
10605 if (bitop1
== NOP_EXPR
)
10607 gimple_seq stmts
= NULL
;
10608 vec_compare
= gimple_build (&stmts
, cond_code
, vec_cmp_type
,
10609 vec_cond_lhs
, vec_cond_rhs
);
10610 gsi_insert_before (gsi
, stmts
, GSI_SAME_STMT
);
10614 new_temp
= make_ssa_name (vec_cmp_type
);
10616 if (bitop1
== BIT_NOT_EXPR
)
10617 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10621 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10623 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10624 if (bitop2
== NOP_EXPR
)
10625 vec_compare
= new_temp
;
10626 else if (bitop2
== BIT_NOT_EXPR
)
10628 /* Instead of doing ~x ? y : z do x ? z : y. */
10629 vec_compare
= new_temp
;
10630 std::swap (vec_then_clause
, vec_else_clause
);
10634 vec_compare
= make_ssa_name (vec_cmp_type
);
10636 = gimple_build_assign (vec_compare
, bitop2
,
10637 vec_cond_lhs
, new_temp
);
10638 vect_finish_stmt_generation (vinfo
, stmt_info
,
10644 /* If we decided to apply a loop mask to the result of the vector
10645 comparison, AND the comparison with the mask now. Later passes
10646 should then be able to reuse the AND results between mulitple
10650 for (int i = 0; i < 100; ++i)
10651 x[i] = y[i] ? z[i] : 10;
10653 results in following optimized GIMPLE:
10655 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10656 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10657 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10658 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10659 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10660 vect_iftmp.11_47, { 10, ... }>;
10662 instead of using a masked and unmasked forms of
10663 vec != { 0, ... } (masked in the MASK_LOAD,
10664 unmasked in the VEC_COND_EXPR). */
10666 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10667 in cases where that's necessary. */
10669 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
10671 if (!is_gimple_val (vec_compare
))
10673 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10674 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10676 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10677 vec_compare
= vec_compare_name
;
10680 if (must_invert_cmp_result
)
10682 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10683 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10686 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10687 vec_compare
= vec_compare_name
;
10693 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10695 tree tmp2
= make_ssa_name (vec_cmp_type
);
10697 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10699 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
10700 vec_compare
= tmp2
;
10705 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10707 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
10708 tree lhs
= gimple_get_lhs (old_stmt
);
10709 new_stmt
= gimple_build_call_internal
10710 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10712 gimple_call_set_lhs (new_stmt
, lhs
);
10713 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
10714 if (old_stmt
== gsi_stmt (*gsi
))
10715 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
10718 /* In this case we're moving the definition to later in the
10719 block. That doesn't matter because the only uses of the
10720 lhs are in phi statements. */
10721 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
10722 gsi_remove (&old_gsi
, true);
10723 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10728 new_temp
= make_ssa_name (vec_dest
);
10729 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10730 vec_then_clause
, vec_else_clause
);
10731 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10734 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10736 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10740 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10742 vec_oprnds0
.release ();
10743 vec_oprnds1
.release ();
10744 vec_oprnds2
.release ();
10745 vec_oprnds3
.release ();
10750 /* vectorizable_comparison.
10752 Check if STMT_INFO is comparison expression that can be vectorized.
10753 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10754 comparison, put it in VEC_STMT, and insert it at GSI.
10756 Return true if STMT_INFO is vectorizable in this way. */
10759 vectorizable_comparison (vec_info
*vinfo
,
10760 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10762 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10764 tree lhs
, rhs1
, rhs2
;
10765 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10766 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10767 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10769 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10770 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10772 poly_uint64 nunits
;
10774 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10776 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10777 vec
<tree
> vec_oprnds0
= vNULL
;
10778 vec
<tree
> vec_oprnds1
= vNULL
;
10782 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10785 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10788 mask_type
= vectype
;
10789 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10794 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10796 gcc_assert (ncopies
>= 1);
10797 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10800 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10804 code
= gimple_assign_rhs_code (stmt
);
10806 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10809 slp_tree slp_rhs1
, slp_rhs2
;
10810 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10811 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
10814 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10815 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
10818 if (vectype1
&& vectype2
10819 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10820 TYPE_VECTOR_SUBPARTS (vectype2
)))
10823 vectype
= vectype1
? vectype1
: vectype2
;
10825 /* Invariant comparison. */
10828 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10829 vectype
= mask_type
;
10831 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
10833 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10836 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10839 /* Can't compare mask and non-mask types. */
10840 if (vectype1
&& vectype2
10841 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10844 /* Boolean values may have another representation in vectors
10845 and therefore we prefer bit operations over comparison for
10846 them (which also works for scalar masks). We store opcodes
10847 to use in bitop1 and bitop2. Statement is vectorized as
10848 BITOP2 (rhs1 BITOP1 rhs2) or
10849 rhs1 BITOP2 (BITOP1 rhs2)
10850 depending on bitop1 and bitop2 arity. */
10851 bool swap_p
= false;
10852 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10854 if (code
== GT_EXPR
)
10856 bitop1
= BIT_NOT_EXPR
;
10857 bitop2
= BIT_AND_EXPR
;
10859 else if (code
== GE_EXPR
)
10861 bitop1
= BIT_NOT_EXPR
;
10862 bitop2
= BIT_IOR_EXPR
;
10864 else if (code
== LT_EXPR
)
10866 bitop1
= BIT_NOT_EXPR
;
10867 bitop2
= BIT_AND_EXPR
;
10870 else if (code
== LE_EXPR
)
10872 bitop1
= BIT_NOT_EXPR
;
10873 bitop2
= BIT_IOR_EXPR
;
10878 bitop1
= BIT_XOR_EXPR
;
10879 if (code
== EQ_EXPR
)
10880 bitop2
= BIT_NOT_EXPR
;
10886 if (bitop1
== NOP_EXPR
)
10888 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10893 machine_mode mode
= TYPE_MODE (vectype
);
10896 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10897 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10900 if (bitop2
!= NOP_EXPR
)
10902 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10903 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10908 /* Put types on constant and invariant SLP children. */
10910 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
10911 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
10913 if (dump_enabled_p ())
10914 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10915 "incompatible vector types for invariants\n");
10919 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10920 vect_model_simple_cost (vinfo
, stmt_info
,
10921 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10922 dts
, ndts
, slp_node
, cost_vec
);
10929 lhs
= gimple_assign_lhs (stmt
);
10930 mask
= vect_create_destination_var (lhs
, mask_type
);
10932 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10933 rhs1
, &vec_oprnds0
, vectype
,
10934 rhs2
, &vec_oprnds1
, vectype
);
10936 std::swap (vec_oprnds0
, vec_oprnds1
);
10938 /* Arguments are ready. Create the new vector stmt. */
10939 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10942 vec_rhs2
= vec_oprnds1
[i
];
10944 new_temp
= make_ssa_name (mask
);
10945 if (bitop1
== NOP_EXPR
)
10947 new_stmt
= gimple_build_assign (new_temp
, code
,
10948 vec_rhs1
, vec_rhs2
);
10949 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10953 if (bitop1
== BIT_NOT_EXPR
)
10954 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10956 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10958 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10959 if (bitop2
!= NOP_EXPR
)
10961 tree res
= make_ssa_name (mask
);
10962 if (bitop2
== BIT_NOT_EXPR
)
10963 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10965 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
10967 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10971 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10973 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10977 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10979 vec_oprnds0
.release ();
10980 vec_oprnds1
.release ();
10985 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10986 can handle all live statements in the node. Otherwise return true
10987 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10988 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10991 can_vectorize_live_stmts (vec_info
*vinfo
,
10992 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10993 slp_tree slp_node
, slp_instance slp_node_instance
,
10995 stmt_vector_for_cost
*cost_vec
)
10999 stmt_vec_info slp_stmt_info
;
11001 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
11003 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
11004 && !vectorizable_live_operation (vinfo
,
11005 slp_stmt_info
, gsi
, slp_node
,
11006 slp_node_instance
, i
,
11007 vec_stmt_p
, cost_vec
))
11011 else if (STMT_VINFO_LIVE_P (stmt_info
)
11012 && !vectorizable_live_operation (vinfo
, stmt_info
, gsi
,
11013 slp_node
, slp_node_instance
, -1,
11014 vec_stmt_p
, cost_vec
))
11020 /* Make sure the statement is vectorizable. */
11023 vect_analyze_stmt (vec_info
*vinfo
,
11024 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
11025 slp_tree node
, slp_instance node_instance
,
11026 stmt_vector_for_cost
*cost_vec
)
11028 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
11029 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
11031 gimple_seq pattern_def_seq
;
11033 if (dump_enabled_p ())
11034 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
11037 if (gimple_has_volatile_ops (stmt_info
->stmt
))
11038 return opt_result::failure_at (stmt_info
->stmt
,
11040 " stmt has volatile operands: %G\n",
11043 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11045 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
11047 gimple_stmt_iterator si
;
11049 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
11051 stmt_vec_info pattern_def_stmt_info
11052 = vinfo
->lookup_stmt (gsi_stmt (si
));
11053 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
11054 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
11056 /* Analyze def stmt of STMT if it's a pattern stmt. */
11057 if (dump_enabled_p ())
11058 dump_printf_loc (MSG_NOTE
, vect_location
,
11059 "==> examining pattern def statement: %G",
11060 pattern_def_stmt_info
->stmt
);
11063 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
11064 need_to_vectorize
, node
, node_instance
,
11072 /* Skip stmts that do not need to be vectorized. In loops this is expected
11074 - the COND_EXPR which is the loop exit condition
11075 - any LABEL_EXPRs in the loop
11076 - computations that are used only for array indexing or loop control.
11077 In basic blocks we only analyze statements that are a part of some SLP
11078 instance, therefore, all the statements are relevant.
11080 Pattern statement needs to be analyzed instead of the original statement
11081 if the original statement is not relevant. Otherwise, we analyze both
11082 statements. In basic blocks we are called from some SLP instance
11083 traversal, don't analyze pattern stmts instead, the pattern stmts
11084 already will be part of SLP instance. */
11086 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
11087 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
11088 && !STMT_VINFO_LIVE_P (stmt_info
))
11090 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11091 && pattern_stmt_info
11092 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
11093 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
11095 /* Analyze PATTERN_STMT instead of the original stmt. */
11096 stmt_info
= pattern_stmt_info
;
11097 if (dump_enabled_p ())
11098 dump_printf_loc (MSG_NOTE
, vect_location
,
11099 "==> examining pattern statement: %G",
11104 if (dump_enabled_p ())
11105 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
11107 return opt_result::success ();
11110 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11112 && pattern_stmt_info
11113 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
11114 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
11116 /* Analyze PATTERN_STMT too. */
11117 if (dump_enabled_p ())
11118 dump_printf_loc (MSG_NOTE
, vect_location
,
11119 "==> examining pattern statement: %G",
11120 pattern_stmt_info
->stmt
);
11123 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
11124 node_instance
, cost_vec
);
11129 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
11131 case vect_internal_def
:
11134 case vect_reduction_def
:
11135 case vect_nested_cycle
:
11136 gcc_assert (!bb_vinfo
11137 && (relevance
== vect_used_in_outer
11138 || relevance
== vect_used_in_outer_by_reduction
11139 || relevance
== vect_used_by_reduction
11140 || relevance
== vect_unused_in_scope
11141 || relevance
== vect_used_only_live
));
11144 case vect_induction_def
:
11145 gcc_assert (!bb_vinfo
);
11148 case vect_constant_def
:
11149 case vect_external_def
:
11150 case vect_unknown_def_type
:
11152 gcc_unreachable ();
11155 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11157 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (node
);
11159 if (STMT_VINFO_RELEVANT_P (stmt_info
))
11161 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
11162 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
11163 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
11164 *need_to_vectorize
= true;
11167 if (PURE_SLP_STMT (stmt_info
) && !node
)
11169 if (dump_enabled_p ())
11170 dump_printf_loc (MSG_NOTE
, vect_location
,
11171 "handled only by SLP analysis\n");
11172 return opt_result::success ();
11177 && (STMT_VINFO_RELEVANT_P (stmt_info
)
11178 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
11179 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11180 -mveclibabi= takes preference over library functions with
11181 the simd attribute. */
11182 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11183 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
11185 || vectorizable_conversion (vinfo
, stmt_info
,
11186 NULL
, NULL
, node
, cost_vec
)
11187 || vectorizable_operation (vinfo
, stmt_info
,
11188 NULL
, NULL
, node
, cost_vec
)
11189 || vectorizable_assignment (vinfo
, stmt_info
,
11190 NULL
, NULL
, node
, cost_vec
)
11191 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11192 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11193 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11194 node
, node_instance
, cost_vec
)
11195 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11196 NULL
, node
, cost_vec
)
11197 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11198 || vectorizable_condition (vinfo
, stmt_info
,
11199 NULL
, NULL
, node
, cost_vec
)
11200 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11202 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11203 stmt_info
, NULL
, node
));
11207 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11208 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
11209 NULL
, NULL
, node
, cost_vec
)
11210 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
11212 || vectorizable_shift (vinfo
, stmt_info
,
11213 NULL
, NULL
, node
, cost_vec
)
11214 || vectorizable_operation (vinfo
, stmt_info
,
11215 NULL
, NULL
, node
, cost_vec
)
11216 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
11218 || vectorizable_load (vinfo
, stmt_info
,
11219 NULL
, NULL
, node
, cost_vec
)
11220 || vectorizable_store (vinfo
, stmt_info
,
11221 NULL
, NULL
, node
, cost_vec
)
11222 || vectorizable_condition (vinfo
, stmt_info
,
11223 NULL
, NULL
, node
, cost_vec
)
11224 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11226 || vectorizable_phi (vinfo
, stmt_info
, NULL
, node
, cost_vec
));
11230 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11233 return opt_result::failure_at (stmt_info
->stmt
,
11235 " relevant stmt not supported: %G",
11238 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11239 need extra handling, except for vectorizable reductions. */
11241 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11242 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
11243 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
11244 stmt_info
, NULL
, node
, node_instance
,
11246 return opt_result::failure_at (stmt_info
->stmt
,
11248 " live stmt not supported: %G",
11251 return opt_result::success ();
11255 /* Function vect_transform_stmt.
11257 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11260 vect_transform_stmt (vec_info
*vinfo
,
11261 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11262 slp_tree slp_node
, slp_instance slp_node_instance
)
11264 bool is_store
= false;
11265 gimple
*vec_stmt
= NULL
;
11268 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
11270 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11272 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (slp_node
);
11274 switch (STMT_VINFO_TYPE (stmt_info
))
11276 case type_demotion_vec_info_type
:
11277 case type_promotion_vec_info_type
:
11278 case type_conversion_vec_info_type
:
11279 done
= vectorizable_conversion (vinfo
, stmt_info
,
11280 gsi
, &vec_stmt
, slp_node
, NULL
);
11284 case induc_vec_info_type
:
11285 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
11286 stmt_info
, &vec_stmt
, slp_node
,
11291 case shift_vec_info_type
:
11292 done
= vectorizable_shift (vinfo
, stmt_info
,
11293 gsi
, &vec_stmt
, slp_node
, NULL
);
11297 case op_vec_info_type
:
11298 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11303 case assignment_vec_info_type
:
11304 done
= vectorizable_assignment (vinfo
, stmt_info
,
11305 gsi
, &vec_stmt
, slp_node
, NULL
);
11309 case load_vec_info_type
:
11310 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11315 case store_vec_info_type
:
11316 done
= vectorizable_store (vinfo
, stmt_info
,
11317 gsi
, &vec_stmt
, slp_node
, NULL
);
11319 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
11321 /* In case of interleaving, the whole chain is vectorized when the
11322 last store in the chain is reached. Store stmts before the last
11323 one are skipped, and there vec_stmt_info shouldn't be freed
11325 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
11326 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
11333 case condition_vec_info_type
:
11334 done
= vectorizable_condition (vinfo
, stmt_info
,
11335 gsi
, &vec_stmt
, slp_node
, NULL
);
11339 case comparison_vec_info_type
:
11340 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11345 case call_vec_info_type
:
11346 done
= vectorizable_call (vinfo
, stmt_info
,
11347 gsi
, &vec_stmt
, slp_node
, NULL
);
11350 case call_simd_clone_vec_info_type
:
11351 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11355 case reduc_vec_info_type
:
11356 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11357 gsi
, &vec_stmt
, slp_node
);
11361 case cycle_phi_info_type
:
11362 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11363 &vec_stmt
, slp_node
, slp_node_instance
);
11367 case lc_phi_info_type
:
11368 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11369 stmt_info
, &vec_stmt
, slp_node
);
11373 case phi_info_type
:
11374 done
= vectorizable_phi (vinfo
, stmt_info
, &vec_stmt
, slp_node
, NULL
);
11379 if (!STMT_VINFO_LIVE_P (stmt_info
))
11381 if (dump_enabled_p ())
11382 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11383 "stmt not supported.\n");
11384 gcc_unreachable ();
11389 if (!slp_node
&& vec_stmt
)
11390 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
11392 if (STMT_VINFO_TYPE (stmt_info
) != store_vec_info_type
)
11394 /* Handle stmts whose DEF is used outside the loop-nest that is
11395 being vectorized. */
11396 done
= can_vectorize_live_stmts (vinfo
, stmt_info
, gsi
, slp_node
,
11397 slp_node_instance
, true, NULL
);
11402 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11408 /* Remove a group of stores (for SLP or interleaving), free their
11412 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
11414 stmt_vec_info next_stmt_info
= first_stmt_info
;
11416 while (next_stmt_info
)
11418 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
11419 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
11420 /* Free the attached stmt_vec_info and remove the stmt. */
11421 vinfo
->remove_stmt (next_stmt_info
);
11422 next_stmt_info
= tmp
;
11426 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11427 elements of type SCALAR_TYPE, or null if the target doesn't support
11430 If NUNITS is zero, return a vector type that contains elements of
11431 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11433 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11434 for this vectorization region and want to "autodetect" the best choice.
11435 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11436 and we want the new type to be interoperable with it. PREVAILING_MODE
11437 in this case can be a scalar integer mode or a vector mode; when it
11438 is a vector mode, the function acts like a tree-level version of
11439 related_vector_mode. */
11442 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
11443 tree scalar_type
, poly_uint64 nunits
)
11445 tree orig_scalar_type
= scalar_type
;
11446 scalar_mode inner_mode
;
11447 machine_mode simd_mode
;
11450 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
11451 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
11454 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
11456 /* For vector types of elements whose mode precision doesn't
11457 match their types precision we use a element type of mode
11458 precision. The vectorization routines will have to make sure
11459 they support the proper result truncation/extension.
11460 We also make sure to build vector types with INTEGER_TYPE
11461 component type only. */
11462 if (INTEGRAL_TYPE_P (scalar_type
)
11463 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
11464 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
11465 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
11466 TYPE_UNSIGNED (scalar_type
));
11468 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11469 When the component mode passes the above test simply use a type
11470 corresponding to that mode. The theory is that any use that
11471 would cause problems with this will disable vectorization anyway. */
11472 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
11473 && !INTEGRAL_TYPE_P (scalar_type
))
11474 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
11476 /* We can't build a vector type of elements with alignment bigger than
11478 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
11479 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
11480 TYPE_UNSIGNED (scalar_type
));
11482 /* If we felt back to using the mode fail if there was
11483 no scalar type for it. */
11484 if (scalar_type
== NULL_TREE
)
11487 /* If no prevailing mode was supplied, use the mode the target prefers.
11488 Otherwise lookup a vector mode based on the prevailing mode. */
11489 if (prevailing_mode
== VOIDmode
)
11491 gcc_assert (known_eq (nunits
, 0U));
11492 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
11493 if (SCALAR_INT_MODE_P (simd_mode
))
11495 /* Traditional behavior is not to take the integer mode
11496 literally, but simply to use it as a way of determining
11497 the vector size. It is up to mode_for_vector to decide
11498 what the TYPE_MODE should be.
11500 Note that nunits == 1 is allowed in order to support single
11501 element vector types. */
11502 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
11503 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11507 else if (SCALAR_INT_MODE_P (prevailing_mode
)
11508 || !related_vector_mode (prevailing_mode
,
11509 inner_mode
, nunits
).exists (&simd_mode
))
11511 /* Fall back to using mode_for_vector, mostly in the hope of being
11512 able to use an integer mode. */
11513 if (known_eq (nunits
, 0U)
11514 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
11517 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11521 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
11523 /* In cases where the mode was chosen by mode_for_vector, check that
11524 the target actually supports the chosen mode, or that it at least
11525 allows the vector mode to be replaced by a like-sized integer. */
11526 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11527 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11530 /* Re-attach the address-space qualifier if we canonicalized the scalar
11532 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11533 return build_qualified_type
11534 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11539 /* Function get_vectype_for_scalar_type.
11541 Returns the vector type corresponding to SCALAR_TYPE as supported
11542 by the target. If GROUP_SIZE is nonzero and we're performing BB
11543 vectorization, make sure that the number of elements in the vector
11544 is no bigger than GROUP_SIZE. */
11547 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11548 unsigned int group_size
)
11550 /* For BB vectorization, we should always have a group size once we've
11551 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11552 are tentative requests during things like early data reference
11553 analysis and pattern recognition. */
11554 if (is_a
<bb_vec_info
> (vinfo
))
11555 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11559 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11561 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
11562 vinfo
->vector_mode
= TYPE_MODE (vectype
);
11564 /* Register the natural choice of vector type, before the group size
11565 has been applied. */
11567 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
11569 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11570 try again with an explicit number of elements. */
11573 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
11575 /* Start with the biggest number of units that fits within
11576 GROUP_SIZE and halve it until we find a valid vector type.
11577 Usually either the first attempt will succeed or all will
11578 fail (in the latter case because GROUP_SIZE is too small
11579 for the target), but it's possible that a target could have
11580 a hole between supported vector types.
11582 If GROUP_SIZE is not a power of 2, this has the effect of
11583 trying the largest power of 2 that fits within the group,
11584 even though the group is not a multiple of that vector size.
11585 The BB vectorizer will then try to carve up the group into
11587 unsigned int nunits
= 1 << floor_log2 (group_size
);
11590 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11591 scalar_type
, nunits
);
11594 while (nunits
> 1 && !vectype
);
11600 /* Return the vector type corresponding to SCALAR_TYPE as supported
11601 by the target. NODE, if nonnull, is the SLP tree node that will
11602 use the returned vector type. */
11605 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
11607 unsigned int group_size
= 0;
11609 group_size
= SLP_TREE_LANES (node
);
11610 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11613 /* Function get_mask_type_for_scalar_type.
11615 Returns the mask type corresponding to a result of comparison
11616 of vectors of specified SCALAR_TYPE as supported by target.
11617 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11618 make sure that the number of elements in the vector is no bigger
11619 than GROUP_SIZE. */
11622 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11623 unsigned int group_size
)
11625 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11630 return truth_type_for (vectype
);
11633 /* Function get_same_sized_vectype
11635 Returns a vector type corresponding to SCALAR_TYPE of size
11636 VECTOR_TYPE if supported by the target. */
11639 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11641 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11642 return truth_type_for (vector_type
);
11644 poly_uint64 nunits
;
11645 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
11646 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
11649 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
11650 scalar_type
, nunits
);
11653 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11654 would not change the chosen vector modes. */
11657 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
11659 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
11660 i
!= vinfo
->used_vector_modes
.end (); ++i
)
11661 if (!VECTOR_MODE_P (*i
)
11662 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
11667 /* Function vect_is_simple_use.
11670 VINFO - the vect info of the loop or basic block that is being vectorized.
11671 OPERAND - operand in the loop or bb.
11673 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11674 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11675 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11676 the definition could be anywhere in the function
11677 DT - the type of definition
11679 Returns whether a stmt with OPERAND can be vectorized.
11680 For loops, supportable operands are constants, loop invariants, and operands
11681 that are defined by the current iteration of the loop. Unsupportable
11682 operands are those that are defined by a previous iteration of the loop (as
11683 is the case in reduction/induction computations).
11684 For basic blocks, supportable operands are constants and bb invariants.
11685 For now, operands defined outside the basic block are not supported. */
11688 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11689 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11691 if (def_stmt_info_out
)
11692 *def_stmt_info_out
= NULL
;
11694 *def_stmt_out
= NULL
;
11695 *dt
= vect_unknown_def_type
;
11697 if (dump_enabled_p ())
11699 dump_printf_loc (MSG_NOTE
, vect_location
,
11700 "vect_is_simple_use: operand ");
11701 if (TREE_CODE (operand
) == SSA_NAME
11702 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11703 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11705 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11708 if (CONSTANT_CLASS_P (operand
))
11709 *dt
= vect_constant_def
;
11710 else if (is_gimple_min_invariant (operand
))
11711 *dt
= vect_external_def
;
11712 else if (TREE_CODE (operand
) != SSA_NAME
)
11713 *dt
= vect_unknown_def_type
;
11714 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11715 *dt
= vect_external_def
;
11718 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11719 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11721 *dt
= vect_external_def
;
11724 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11725 def_stmt
= stmt_vinfo
->stmt
;
11726 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11727 if (def_stmt_info_out
)
11728 *def_stmt_info_out
= stmt_vinfo
;
11731 *def_stmt_out
= def_stmt
;
11734 if (dump_enabled_p ())
11736 dump_printf (MSG_NOTE
, ", type of def: ");
11739 case vect_uninitialized_def
:
11740 dump_printf (MSG_NOTE
, "uninitialized\n");
11742 case vect_constant_def
:
11743 dump_printf (MSG_NOTE
, "constant\n");
11745 case vect_external_def
:
11746 dump_printf (MSG_NOTE
, "external\n");
11748 case vect_internal_def
:
11749 dump_printf (MSG_NOTE
, "internal\n");
11751 case vect_induction_def
:
11752 dump_printf (MSG_NOTE
, "induction\n");
11754 case vect_reduction_def
:
11755 dump_printf (MSG_NOTE
, "reduction\n");
11757 case vect_double_reduction_def
:
11758 dump_printf (MSG_NOTE
, "double reduction\n");
11760 case vect_nested_cycle
:
11761 dump_printf (MSG_NOTE
, "nested cycle\n");
11763 case vect_unknown_def_type
:
11764 dump_printf (MSG_NOTE
, "unknown\n");
11769 if (*dt
== vect_unknown_def_type
)
11771 if (dump_enabled_p ())
11772 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11773 "Unsupported pattern.\n");
11780 /* Function vect_is_simple_use.
11782 Same as vect_is_simple_use but also determines the vector operand
11783 type of OPERAND and stores it to *VECTYPE. If the definition of
11784 OPERAND is vect_uninitialized_def, vect_constant_def or
11785 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11786 is responsible to compute the best suited vector type for the
11790 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11791 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11792 gimple
**def_stmt_out
)
11794 stmt_vec_info def_stmt_info
;
11796 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11800 *def_stmt_out
= def_stmt
;
11801 if (def_stmt_info_out
)
11802 *def_stmt_info_out
= def_stmt_info
;
11804 /* Now get a vector type if the def is internal, otherwise supply
11805 NULL_TREE and leave it up to the caller to figure out a proper
11806 type for the use stmt. */
11807 if (*dt
== vect_internal_def
11808 || *dt
== vect_induction_def
11809 || *dt
== vect_reduction_def
11810 || *dt
== vect_double_reduction_def
11811 || *dt
== vect_nested_cycle
)
11813 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11814 gcc_assert (*vectype
!= NULL_TREE
);
11815 if (dump_enabled_p ())
11816 dump_printf_loc (MSG_NOTE
, vect_location
,
11817 "vect_is_simple_use: vectype %T\n", *vectype
);
11819 else if (*dt
== vect_uninitialized_def
11820 || *dt
== vect_constant_def
11821 || *dt
== vect_external_def
)
11822 *vectype
= NULL_TREE
;
11824 gcc_unreachable ();
11829 /* Function vect_is_simple_use.
11831 Same as vect_is_simple_use but determines the operand by operand
11832 position OPERAND from either STMT or SLP_NODE, filling in *OP
11833 and *SLP_DEF (when SLP_NODE is not NULL). */
11836 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
11837 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
11838 enum vect_def_type
*dt
,
11839 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
11843 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
11845 *vectype
= SLP_TREE_VECTYPE (child
);
11846 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
11848 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
11849 return vect_is_simple_use (*op
, vinfo
, dt
, def_stmt_info_out
);
11853 if (def_stmt_info_out
)
11854 *def_stmt_info_out
= NULL
;
11855 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
11856 *dt
= SLP_TREE_DEF_TYPE (child
);
11863 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
11865 if (gimple_assign_rhs_code (ass
) == COND_EXPR
11866 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
11869 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
11871 *op
= gimple_op (ass
, operand
);
11873 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
11874 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
11876 *op
= gimple_op (ass
, operand
+ 1);
11878 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
11879 *op
= gimple_call_arg (call
, operand
);
11881 gcc_unreachable ();
11882 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
11886 /* If OP is not NULL and is external or constant update its vector
11887 type with VECTYPE. Returns true if successful or false if not,
11888 for example when conflicting vector types are present. */
11891 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
11893 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
11895 if (SLP_TREE_VECTYPE (op
))
11896 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
11897 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
11898 should be handled by patters. Allow vect_constant_def for now. */
11899 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
11900 && SLP_TREE_DEF_TYPE (op
) == vect_external_def
)
11902 SLP_TREE_VECTYPE (op
) = vectype
;
11906 /* Function supportable_widening_operation
11908 Check whether an operation represented by the code CODE is a
11909 widening operation that is supported by the target platform in
11910 vector form (i.e., when operating on arguments of type VECTYPE_IN
11911 producing a result of type VECTYPE_OUT).
11913 Widening operations we currently support are NOP (CONVERT), FLOAT,
11914 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11915 are supported by the target platform either directly (via vector
11916 tree-codes), or via target builtins.
11919 - CODE1 and CODE2 are codes of vector operations to be used when
11920 vectorizing the operation, if available.
11921 - MULTI_STEP_CVT determines the number of required intermediate steps in
11922 case of multi-step conversion (like char->short->int - in that case
11923 MULTI_STEP_CVT will be 1).
11924 - INTERM_TYPES contains the intermediate type required to perform the
11925 widening operation (short in the above example). */
11928 supportable_widening_operation (vec_info
*vinfo
,
11929 enum tree_code code
, stmt_vec_info stmt_info
,
11930 tree vectype_out
, tree vectype_in
,
11931 enum tree_code
*code1
, enum tree_code
*code2
,
11932 int *multi_step_cvt
,
11933 vec
<tree
> *interm_types
)
11935 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
11936 class loop
*vect_loop
= NULL
;
11937 machine_mode vec_mode
;
11938 enum insn_code icode1
, icode2
;
11939 optab optab1
, optab2
;
11940 tree vectype
= vectype_in
;
11941 tree wide_vectype
= vectype_out
;
11942 enum tree_code c1
, c2
;
11944 tree prev_type
, intermediate_type
;
11945 machine_mode intermediate_mode
, prev_mode
;
11946 optab optab3
, optab4
;
11948 *multi_step_cvt
= 0;
11950 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11954 case WIDEN_MULT_EXPR
:
11955 /* The result of a vectorized widening operation usually requires
11956 two vectors (because the widened results do not fit into one vector).
11957 The generated vector results would normally be expected to be
11958 generated in the same order as in the original scalar computation,
11959 i.e. if 8 results are generated in each vector iteration, they are
11960 to be organized as follows:
11961 vect1: [res1,res2,res3,res4],
11962 vect2: [res5,res6,res7,res8].
11964 However, in the special case that the result of the widening
11965 operation is used in a reduction computation only, the order doesn't
11966 matter (because when vectorizing a reduction we change the order of
11967 the computation). Some targets can take advantage of this and
11968 generate more efficient code. For example, targets like Altivec,
11969 that support widen_mult using a sequence of {mult_even,mult_odd}
11970 generate the following vectors:
11971 vect1: [res1,res3,res5,res7],
11972 vect2: [res2,res4,res6,res8].
11974 When vectorizing outer-loops, we execute the inner-loop sequentially
11975 (each vectorized inner-loop iteration contributes to VF outer-loop
11976 iterations in parallel). We therefore don't allow to change the
11977 order of the computation in the inner-loop during outer-loop
11979 /* TODO: Another case in which order doesn't *really* matter is when we
11980 widen and then contract again, e.g. (short)((int)x * y >> 8).
11981 Normally, pack_trunc performs an even/odd permute, whereas the
11982 repack from an even/odd expansion would be an interleave, which
11983 would be significantly simpler for e.g. AVX2. */
11984 /* In any case, in order to avoid duplicating the code below, recurse
11985 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11986 are properly set up for the caller. If we fail, we'll continue with
11987 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11989 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
11990 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
11991 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
11992 stmt_info
, vectype_out
,
11993 vectype_in
, code1
, code2
,
11994 multi_step_cvt
, interm_types
))
11996 /* Elements in a vector with vect_used_by_reduction property cannot
11997 be reordered if the use chain with this property does not have the
11998 same operation. One such an example is s += a * b, where elements
11999 in a and b cannot be reordered. Here we check if the vector defined
12000 by STMT is only directly used in the reduction statement. */
12001 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
12002 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
12004 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
12007 c1
= VEC_WIDEN_MULT_LO_EXPR
;
12008 c2
= VEC_WIDEN_MULT_HI_EXPR
;
12011 case DOT_PROD_EXPR
:
12012 c1
= DOT_PROD_EXPR
;
12013 c2
= DOT_PROD_EXPR
;
12021 case VEC_WIDEN_MULT_EVEN_EXPR
:
12022 /* Support the recursion induced just above. */
12023 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
12024 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
12027 case WIDEN_LSHIFT_EXPR
:
12028 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
12029 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
12032 case WIDEN_PLUS_EXPR
:
12033 c1
= VEC_WIDEN_PLUS_LO_EXPR
;
12034 c2
= VEC_WIDEN_PLUS_HI_EXPR
;
12037 case WIDEN_MINUS_EXPR
:
12038 c1
= VEC_WIDEN_MINUS_LO_EXPR
;
12039 c2
= VEC_WIDEN_MINUS_HI_EXPR
;
12043 c1
= VEC_UNPACK_LO_EXPR
;
12044 c2
= VEC_UNPACK_HI_EXPR
;
12048 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
12049 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
12052 case FIX_TRUNC_EXPR
:
12053 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
12054 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
12058 gcc_unreachable ();
12061 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
12062 std::swap (c1
, c2
);
12064 if (code
== FIX_TRUNC_EXPR
)
12066 /* The signedness is determined from output operand. */
12067 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12068 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
12070 else if (CONVERT_EXPR_CODE_P (code
)
12071 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
12072 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12073 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
12074 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
12076 /* If the input and result modes are the same, a different optab
12077 is needed where we pass in the number of units in vectype. */
12078 optab1
= vec_unpacks_sbool_lo_optab
;
12079 optab2
= vec_unpacks_sbool_hi_optab
;
12083 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12084 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
12087 if (!optab1
|| !optab2
)
12090 vec_mode
= TYPE_MODE (vectype
);
12091 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
12092 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
12098 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12099 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12101 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12103 /* For scalar masks we may have different boolean
12104 vector types having the same QImode. Thus we
12105 add additional check for elements number. */
12106 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
12107 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12111 /* Check if it's a multi-step conversion that can be done using intermediate
12114 prev_type
= vectype
;
12115 prev_mode
= vec_mode
;
12117 if (!CONVERT_EXPR_CODE_P (code
))
12120 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12121 intermediate steps in promotion sequence. We try
12122 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12124 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12125 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12127 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12128 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12130 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
12133 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
12134 TYPE_UNSIGNED (prev_type
));
12136 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12137 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12138 && intermediate_mode
== prev_mode
12139 && SCALAR_INT_MODE_P (prev_mode
))
12141 /* If the input and result modes are the same, a different optab
12142 is needed where we pass in the number of units in vectype. */
12143 optab3
= vec_unpacks_sbool_lo_optab
;
12144 optab4
= vec_unpacks_sbool_hi_optab
;
12148 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12149 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
12152 if (!optab3
|| !optab4
12153 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
12154 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12155 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
12156 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
12157 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
12158 == CODE_FOR_nothing
)
12159 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
12160 == CODE_FOR_nothing
))
12163 interm_types
->quick_push (intermediate_type
);
12164 (*multi_step_cvt
)++;
12166 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12167 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12169 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12171 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
12172 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12176 prev_type
= intermediate_type
;
12177 prev_mode
= intermediate_mode
;
12180 interm_types
->release ();
12185 /* Function supportable_narrowing_operation
12187 Check whether an operation represented by the code CODE is a
12188 narrowing operation that is supported by the target platform in
12189 vector form (i.e., when operating on arguments of type VECTYPE_IN
12190 and producing a result of type VECTYPE_OUT).
12192 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12193 and FLOAT. This function checks if these operations are supported by
12194 the target platform directly via vector tree-codes.
12197 - CODE1 is the code of a vector operation to be used when
12198 vectorizing the operation, if available.
12199 - MULTI_STEP_CVT determines the number of required intermediate steps in
12200 case of multi-step conversion (like int->short->char - in that case
12201 MULTI_STEP_CVT will be 1).
12202 - INTERM_TYPES contains the intermediate type required to perform the
12203 narrowing operation (short in the above example). */
12206 supportable_narrowing_operation (enum tree_code code
,
12207 tree vectype_out
, tree vectype_in
,
12208 enum tree_code
*code1
, int *multi_step_cvt
,
12209 vec
<tree
> *interm_types
)
12211 machine_mode vec_mode
;
12212 enum insn_code icode1
;
12213 optab optab1
, interm_optab
;
12214 tree vectype
= vectype_in
;
12215 tree narrow_vectype
= vectype_out
;
12217 tree intermediate_type
, prev_type
;
12218 machine_mode intermediate_mode
, prev_mode
;
12220 unsigned HOST_WIDE_INT n_elts
;
12223 *multi_step_cvt
= 0;
12227 c1
= VEC_PACK_TRUNC_EXPR
;
12228 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
12229 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12230 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
))
12231 && TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&n_elts
)
12232 && n_elts
< BITS_PER_UNIT
)
12233 optab1
= vec_pack_sbool_trunc_optab
;
12235 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12238 case FIX_TRUNC_EXPR
:
12239 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
12240 /* The signedness is determined from output operand. */
12241 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12245 c1
= VEC_PACK_FLOAT_EXPR
;
12246 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12250 gcc_unreachable ();
12256 vec_mode
= TYPE_MODE (vectype
);
12257 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
12262 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12264 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12266 /* For scalar masks we may have different boolean
12267 vector types having the same QImode. Thus we
12268 add additional check for elements number. */
12269 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
12270 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12274 if (code
== FLOAT_EXPR
)
12277 /* Check if it's a multi-step conversion that can be done using intermediate
12279 prev_mode
= vec_mode
;
12280 prev_type
= vectype
;
12281 if (code
== FIX_TRUNC_EXPR
)
12282 uns
= TYPE_UNSIGNED (vectype_out
);
12284 uns
= TYPE_UNSIGNED (vectype
);
12286 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12287 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12288 costly than signed. */
12289 if (code
== FIX_TRUNC_EXPR
&& uns
)
12291 enum insn_code icode2
;
12294 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
12296 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12297 if (interm_optab
!= unknown_optab
12298 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
12299 && insn_data
[icode1
].operand
[0].mode
12300 == insn_data
[icode2
].operand
[0].mode
)
12303 optab1
= interm_optab
;
12308 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12309 intermediate steps in promotion sequence. We try
12310 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12311 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12312 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12314 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12315 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12317 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
12320 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
12321 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12322 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12323 && SCALAR_INT_MODE_P (prev_mode
)
12324 && TYPE_VECTOR_SUBPARTS (intermediate_type
).is_constant (&n_elts
)
12325 && n_elts
< BITS_PER_UNIT
)
12326 interm_optab
= vec_pack_sbool_trunc_optab
;
12329 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
12332 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
12333 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12334 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
12335 == CODE_FOR_nothing
))
12338 interm_types
->quick_push (intermediate_type
);
12339 (*multi_step_cvt
)++;
12341 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12343 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12345 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
12346 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12350 prev_mode
= intermediate_mode
;
12351 prev_type
= intermediate_type
;
12352 optab1
= interm_optab
;
12355 interm_types
->release ();
12359 /* Generate and return a vector mask of MASK_TYPE such that
12360 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12361 Add the statements to SEQ. */
12364 vect_gen_while (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12365 tree end_index
, const char *name
)
12367 tree cmp_type
= TREE_TYPE (start_index
);
12368 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
12369 cmp_type
, mask_type
,
12370 OPTIMIZE_FOR_SPEED
));
12371 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
12372 start_index
, end_index
,
12373 build_zero_cst (mask_type
));
12376 tmp
= make_temp_ssa_name (mask_type
, NULL
, name
);
12378 tmp
= make_ssa_name (mask_type
);
12379 gimple_call_set_lhs (call
, tmp
);
12380 gimple_seq_add_stmt (seq
, call
);
12384 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12385 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12388 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12391 tree tmp
= vect_gen_while (seq
, mask_type
, start_index
, end_index
);
12392 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
12395 /* Try to compute the vector types required to vectorize STMT_INFO,
12396 returning true on success and false if vectorization isn't possible.
12397 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12398 take sure that the number of elements in the vectors is no bigger
12403 - Set *STMT_VECTYPE_OUT to:
12404 - NULL_TREE if the statement doesn't need to be vectorized;
12405 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12407 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12408 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12409 statement does not help to determine the overall number of units. */
12412 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12413 tree
*stmt_vectype_out
,
12414 tree
*nunits_vectype_out
,
12415 unsigned int group_size
)
12417 gimple
*stmt
= stmt_info
->stmt
;
12419 /* For BB vectorization, we should always have a group size once we've
12420 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12421 are tentative requests during things like early data reference
12422 analysis and pattern recognition. */
12423 if (is_a
<bb_vec_info
> (vinfo
))
12424 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
12428 *stmt_vectype_out
= NULL_TREE
;
12429 *nunits_vectype_out
= NULL_TREE
;
12431 if (gimple_get_lhs (stmt
) == NULL_TREE
12432 /* MASK_STORE has no lhs, but is ok. */
12433 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12435 if (is_a
<gcall
*> (stmt
))
12437 /* Ignore calls with no lhs. These must be calls to
12438 #pragma omp simd functions, and what vectorization factor
12439 it really needs can't be determined until
12440 vectorizable_simd_clone_call. */
12441 if (dump_enabled_p ())
12442 dump_printf_loc (MSG_NOTE
, vect_location
,
12443 "defer to SIMD clone analysis.\n");
12444 return opt_result::success ();
12447 return opt_result::failure_at (stmt
,
12448 "not vectorized: irregular stmt.%G", stmt
);
12452 tree scalar_type
= NULL_TREE
;
12453 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
12455 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12456 if (dump_enabled_p ())
12457 dump_printf_loc (MSG_NOTE
, vect_location
,
12458 "precomputed vectype: %T\n", vectype
);
12460 else if (vect_use_mask_type_p (stmt_info
))
12462 unsigned int precision
= stmt_info
->mask_precision
;
12463 scalar_type
= build_nonstandard_integer_type (precision
, 1);
12464 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
12466 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
12467 " data-type %T\n", scalar_type
);
12468 if (dump_enabled_p ())
12469 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12473 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
12474 scalar_type
= TREE_TYPE (DR_REF (dr
));
12475 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12476 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
12478 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
12480 if (dump_enabled_p ())
12483 dump_printf_loc (MSG_NOTE
, vect_location
,
12484 "get vectype for scalar type (group size %d):"
12485 " %T\n", group_size
, scalar_type
);
12487 dump_printf_loc (MSG_NOTE
, vect_location
,
12488 "get vectype for scalar type: %T\n", scalar_type
);
12490 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
12492 return opt_result::failure_at (stmt
,
12494 " unsupported data-type %T\n",
12497 if (dump_enabled_p ())
12498 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12501 if (scalar_type
&& VECTOR_MODE_P (TYPE_MODE (scalar_type
)))
12502 return opt_result::failure_at (stmt
,
12503 "not vectorized: vector stmt in loop:%G",
12506 *stmt_vectype_out
= vectype
;
12508 /* Don't try to compute scalar types if the stmt produces a boolean
12509 vector; use the existing vector type instead. */
12510 tree nunits_vectype
= vectype
;
12511 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12513 /* The number of units is set according to the smallest scalar
12514 type (or the largest vector size, but we only support one
12515 vector size per vectorization). */
12516 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
12517 TREE_TYPE (vectype
));
12518 if (scalar_type
!= TREE_TYPE (vectype
))
12520 if (dump_enabled_p ())
12521 dump_printf_loc (MSG_NOTE
, vect_location
,
12522 "get vectype for smallest scalar type: %T\n",
12524 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12526 if (!nunits_vectype
)
12527 return opt_result::failure_at
12528 (stmt
, "not vectorized: unsupported data-type %T\n",
12530 if (dump_enabled_p ())
12531 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
12536 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
12537 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)))
12538 return opt_result::failure_at (stmt
,
12539 "Not vectorized: Incompatible number "
12540 "of vector subparts between %T and %T\n",
12541 nunits_vectype
, *stmt_vectype_out
);
12543 if (dump_enabled_p ())
12545 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12546 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12547 dump_printf (MSG_NOTE
, "\n");
12550 *nunits_vectype_out
= nunits_vectype
;
12551 return opt_result::success ();
12554 /* Generate and return statement sequence that sets vector length LEN that is:
12556 min_of_start_and_end = min (START_INDEX, END_INDEX);
12557 left_len = END_INDEX - min_of_start_and_end;
12558 rhs = min (left_len, LEN_LIMIT);
12561 Note: the cost of the code generated by this function is modeled
12562 by vect_estimate_min_profitable_iters, so changes here may need
12563 corresponding changes there. */
12566 vect_gen_len (tree len
, tree start_index
, tree end_index
, tree len_limit
)
12568 gimple_seq stmts
= NULL
;
12569 tree len_type
= TREE_TYPE (len
);
12570 gcc_assert (TREE_TYPE (start_index
) == len_type
);
12572 tree min
= gimple_build (&stmts
, MIN_EXPR
, len_type
, start_index
, end_index
);
12573 tree left_len
= gimple_build (&stmts
, MINUS_EXPR
, len_type
, end_index
, min
);
12574 tree rhs
= gimple_build (&stmts
, MIN_EXPR
, len_type
, left_len
, len_limit
);
12575 gimple
* stmt
= gimple_build_assign (len
, rhs
);
12576 gimple_seq_add_stmt (&stmts
, stmt
);