1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2024 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
59 #include "optabs-libfuncs.h"
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
64 /* Return the vectorized type for the given statement. */
67 stmt_vectype (class _stmt_vec_info
*stmt_info
)
69 return STMT_VINFO_VECTYPE (stmt_info
);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
75 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
77 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
78 basic_block bb
= gimple_bb (stmt
);
79 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
85 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
87 return (bb
->loop_father
== loop
->inner
);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
95 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
96 enum vect_cost_for_stmt kind
,
97 stmt_vec_info stmt_info
, slp_tree node
,
98 tree vectype
, int misalign
,
99 enum vect_cost_model_location where
)
101 if ((kind
== vector_load
|| kind
== unaligned_load
)
102 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
103 kind
= vector_gather_load
;
104 if ((kind
== vector_store
|| kind
== unaligned_store
)
105 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
106 kind
= vector_scatter_store
;
108 stmt_info_for_cost si
109 = { count
, kind
, where
, stmt_info
, node
, vectype
, misalign
};
110 body_cost_vec
->safe_push (si
);
113 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
117 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
118 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
119 tree vectype
, int misalign
,
120 enum vect_cost_model_location where
)
122 return record_stmt_cost (body_cost_vec
, count
, kind
, stmt_info
, NULL
,
123 vectype
, misalign
, where
);
127 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
128 enum vect_cost_for_stmt kind
, slp_tree node
,
129 tree vectype
, int misalign
,
130 enum vect_cost_model_location where
)
132 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, node
,
133 vectype
, misalign
, where
);
137 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
138 enum vect_cost_for_stmt kind
,
139 enum vect_cost_model_location where
)
141 gcc_assert (kind
== cond_branch_taken
|| kind
== cond_branch_not_taken
142 || kind
== scalar_stmt
);
143 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, NULL
,
144 NULL_TREE
, 0, where
);
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
150 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
152 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
162 read_vector_array (vec_info
*vinfo
,
163 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
164 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
166 tree vect_type
, vect
, vect_name
, array_ref
;
169 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
170 vect_type
= TREE_TYPE (TREE_TYPE (array
));
171 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
172 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
173 build_int_cst (size_type_node
, n
),
174 NULL_TREE
, NULL_TREE
);
176 new_stmt
= gimple_build_assign (vect
, array_ref
);
177 vect_name
= make_ssa_name (vect
, new_stmt
);
178 gimple_assign_set_lhs (new_stmt
, vect_name
);
179 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
189 write_vector_array (vec_info
*vinfo
,
190 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
191 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
196 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
197 build_int_cst (size_type_node
, n
),
198 NULL_TREE
, NULL_TREE
);
200 new_stmt
= gimple_build_assign (array_ref
, vect
);
201 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
209 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
213 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
223 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
224 gimple_stmt_iterator
*gsi
, tree var
)
226 tree clobber
= build_clobber (TREE_TYPE (var
));
227 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
228 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
233 /* Function vect_mark_relevant.
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
238 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
239 enum vect_relevant relevant
, bool live_p
)
241 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
242 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE
, vect_location
,
246 "mark relevant %d, live %d: %G", relevant
, live_p
,
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE
, vect_location
,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
265 stmt_vec_info old_stmt_info
= stmt_info
;
266 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
268 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
269 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
271 if (live_p
&& relevant
== vect_unused_in_scope
)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE
, vect_location
,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
277 relevant
= vect_used_only_live
;
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE
, vect_location
,
282 "mark relevant %d, live %d: %G", relevant
, live_p
,
286 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
287 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
288 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
290 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE
, vect_location
,
295 "already marked relevant/live.\n");
299 worklist
->safe_push (stmt_info
);
303 /* Function is_simple_and_all_uses_invariant
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
309 loop_vec_info loop_vinfo
)
314 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
318 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
320 enum vect_def_type dt
= vect_uninitialized_def
;
322 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
326 "use not simple.\n");
330 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
336 /* Function vect_stmt_relevant_p.
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
345 - it is an induction and we have multiple exits.
347 CHECKME: what other side effects would the vectorizer allow? */
350 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
351 enum vect_relevant
*relevant
, bool *live_p
)
353 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
355 imm_use_iterator imm_iter
;
359 *relevant
= vect_unused_in_scope
;
362 /* cond stmt other than loop exit cond. */
363 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
364 if (is_ctrl_stmt (stmt
)
365 && LOOP_VINFO_LOOP_IV_COND (loop_vinfo
) != stmt
366 && (!loop
->inner
|| gimple_bb (stmt
)->loop_father
== loop
))
367 *relevant
= vect_used_in_scope
;
369 /* changing memory. */
370 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
371 if (gimple_vdef (stmt_info
->stmt
)
372 && !gimple_clobber_p (stmt_info
->stmt
))
374 if (dump_enabled_p ())
375 dump_printf_loc (MSG_NOTE
, vect_location
,
376 "vec_stmt_relevant_p: stmt has vdefs.\n");
377 *relevant
= vect_used_in_scope
;
380 /* uses outside the loop. */
381 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
383 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
385 basic_block bb
= gimple_bb (USE_STMT (use_p
));
386 if (!flow_bb_inside_loop_p (loop
, bb
))
388 if (is_gimple_debug (USE_STMT (use_p
)))
391 if (dump_enabled_p ())
392 dump_printf_loc (MSG_NOTE
, vect_location
,
393 "vec_stmt_relevant_p: used out of loop.\n");
395 /* We expect all such uses to be in the loop exit phis
396 (because of loop closed form) */
397 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
404 /* Check if it's an induction and multiple exits. In this case there will be
405 a usage later on after peeling which is needed for the alternate exit. */
406 if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo
)
407 && STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
)
409 if (dump_enabled_p ())
410 dump_printf_loc (MSG_NOTE
, vect_location
,
411 "vec_stmt_relevant_p: induction forced for "
417 if (*live_p
&& *relevant
== vect_unused_in_scope
418 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
420 if (dump_enabled_p ())
421 dump_printf_loc (MSG_NOTE
, vect_location
,
422 "vec_stmt_relevant_p: stmt live but not relevant.\n");
423 *relevant
= vect_used_only_live
;
426 return (*live_p
|| *relevant
);
430 /* Function exist_non_indexing_operands_for_use_p
432 USE is one of the uses attached to STMT_INFO. Check if USE is
433 used in STMT_INFO for anything other than indexing an array. */
436 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
440 /* USE corresponds to some operand in STMT. If there is no data
441 reference in STMT, then any operand that corresponds to USE
442 is not indexing an array. */
443 if (!STMT_VINFO_DATA_REF (stmt_info
))
446 /* STMT has a data_ref. FORNOW this means that its of one of
450 (This should have been verified in analyze_data_refs).
452 'var' in the second case corresponds to a def, not a use,
453 so USE cannot correspond to any operands that are not used
456 Therefore, all we need to check is if STMT falls into the
457 first case, and whether var corresponds to USE. */
459 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
460 if (!assign
|| !gimple_assign_copy_p (assign
))
462 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
463 if (call
&& gimple_call_internal_p (call
))
465 internal_fn ifn
= gimple_call_internal_fn (call
);
466 int mask_index
= internal_fn_mask_index (ifn
);
468 && use
== gimple_call_arg (call
, mask_index
))
470 int stored_value_index
= internal_fn_stored_value_index (ifn
);
471 if (stored_value_index
>= 0
472 && use
== gimple_call_arg (call
, stored_value_index
))
474 if (internal_gather_scatter_fn_p (ifn
)
475 && use
== gimple_call_arg (call
, 1))
481 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
483 operand
= gimple_assign_rhs1 (assign
);
484 if (TREE_CODE (operand
) != SSA_NAME
)
495 Function process_use.
498 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
499 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
500 that defined USE. This is done by calling mark_relevant and passing it
501 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
502 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
506 Generally, LIVE_P and RELEVANT are used to define the liveness and
507 relevance info of the DEF_STMT of this USE:
508 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
509 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
511 - case 1: If USE is used only for address computations (e.g. array indexing),
512 which does not need to be directly vectorized, then the liveness/relevance
513 of the respective DEF_STMT is left unchanged.
514 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
515 we skip DEF_STMT cause it had already been processed.
516 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
517 "relevant" will be modified accordingly.
519 Return true if everything is as expected. Return false otherwise. */
522 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
523 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
526 stmt_vec_info dstmt_vinfo
;
527 enum vect_def_type dt
;
529 /* case 1: we are only interested in uses that need to be vectorized. Uses
530 that are used for address computation are not considered relevant. */
531 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
532 return opt_result::success ();
534 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
535 return opt_result::failure_at (stmt_vinfo
->stmt
,
537 " unsupported use in stmt.\n");
540 return opt_result::success ();
542 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
543 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
545 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
546 We have to force the stmt live since the epilogue loop needs it to
547 continue computing the reduction. */
548 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
549 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
550 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
551 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
552 && bb
->loop_father
== def_bb
->loop_father
)
554 if (dump_enabled_p ())
555 dump_printf_loc (MSG_NOTE
, vect_location
,
556 "reduc-stmt defining reduc-phi in the same nest.\n");
557 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
558 return opt_result::success ();
561 /* case 3a: outer-loop stmt defining an inner-loop stmt:
562 outer-loop-header-bb:
568 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
570 if (dump_enabled_p ())
571 dump_printf_loc (MSG_NOTE
, vect_location
,
572 "outer-loop def-stmt defining inner-loop stmt.\n");
576 case vect_unused_in_scope
:
577 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
578 vect_used_in_scope
: vect_unused_in_scope
;
581 case vect_used_in_outer_by_reduction
:
582 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
583 relevant
= vect_used_by_reduction
;
586 case vect_used_in_outer
:
587 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
588 relevant
= vect_used_in_scope
;
591 case vect_used_in_scope
:
599 /* case 3b: inner-loop stmt defining an outer-loop stmt:
600 outer-loop-header-bb:
604 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
606 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
608 if (dump_enabled_p ())
609 dump_printf_loc (MSG_NOTE
, vect_location
,
610 "inner-loop def-stmt defining outer-loop stmt.\n");
614 case vect_unused_in_scope
:
615 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
616 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
617 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
620 case vect_used_by_reduction
:
621 case vect_used_only_live
:
622 relevant
= vect_used_in_outer_by_reduction
;
625 case vect_used_in_scope
:
626 relevant
= vect_used_in_outer
;
633 /* We are also not interested in uses on loop PHI backedges that are
634 inductions. Otherwise we'll needlessly vectorize the IV increment
635 and cause hybrid SLP for SLP inductions. Unless the PHI is live
637 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
638 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
639 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
640 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
641 loop_latch_edge (bb
->loop_father
))
644 if (dump_enabled_p ())
645 dump_printf_loc (MSG_NOTE
, vect_location
,
646 "induction value on backedge.\n");
647 return opt_result::success ();
651 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
652 return opt_result::success ();
656 /* Function vect_mark_stmts_to_be_vectorized.
658 Not all stmts in the loop need to be vectorized. For example:
667 Stmt 1 and 3 do not need to be vectorized, because loop control and
668 addressing of vectorized data-refs are handled differently.
670 This pass detects such stmts. */
673 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
675 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
676 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
677 unsigned int nbbs
= loop
->num_nodes
;
678 gimple_stmt_iterator si
;
682 enum vect_relevant relevant
;
684 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
686 auto_vec
<stmt_vec_info
, 64> worklist
;
688 /* 1. Init worklist. */
689 for (i
= 0; i
< nbbs
; i
++)
692 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
694 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
695 if (dump_enabled_p ())
696 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
699 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
700 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
702 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
704 if (is_gimple_debug (gsi_stmt (si
)))
706 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
707 if (dump_enabled_p ())
708 dump_printf_loc (MSG_NOTE
, vect_location
,
709 "init: stmt relevant? %G", stmt_info
->stmt
);
711 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
712 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
716 /* 2. Process_worklist */
717 while (worklist
.length () > 0)
722 stmt_vec_info stmt_vinfo
= worklist
.pop ();
723 if (dump_enabled_p ())
724 dump_printf_loc (MSG_NOTE
, vect_location
,
725 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
727 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
728 (DEF_STMT) as relevant/irrelevant according to the relevance property
730 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
732 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
733 propagated as is to the DEF_STMTs of its USEs.
735 One exception is when STMT has been identified as defining a reduction
736 variable; in this case we set the relevance to vect_used_by_reduction.
737 This is because we distinguish between two kinds of relevant stmts -
738 those that are used by a reduction computation, and those that are
739 (also) used by a regular computation. This allows us later on to
740 identify stmts that are used solely by a reduction, and therefore the
741 order of the results that they produce does not have to be kept. */
743 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
745 case vect_reduction_def
:
746 gcc_assert (relevant
!= vect_unused_in_scope
);
747 if (relevant
!= vect_unused_in_scope
748 && relevant
!= vect_used_in_scope
749 && relevant
!= vect_used_by_reduction
750 && relevant
!= vect_used_only_live
)
751 return opt_result::failure_at
752 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
755 case vect_nested_cycle
:
756 if (relevant
!= vect_unused_in_scope
757 && relevant
!= vect_used_in_outer_by_reduction
758 && relevant
!= vect_used_in_outer
)
759 return opt_result::failure_at
760 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
763 case vect_double_reduction_def
:
764 if (relevant
!= vect_unused_in_scope
765 && relevant
!= vect_used_by_reduction
766 && relevant
!= vect_used_only_live
)
767 return opt_result::failure_at
768 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
775 if (is_pattern_stmt_p (stmt_vinfo
))
777 /* Pattern statements are not inserted into the code, so
778 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
779 have to scan the RHS or function arguments instead. */
780 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
782 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
783 tree op
= gimple_assign_rhs1 (assign
);
786 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
789 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
790 loop_vinfo
, relevant
, &worklist
, false);
793 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
794 loop_vinfo
, relevant
, &worklist
, false);
799 for (; i
< gimple_num_ops (assign
); i
++)
801 op
= gimple_op (assign
, i
);
802 if (TREE_CODE (op
) == SSA_NAME
)
805 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
812 else if (gcond
*cond
= dyn_cast
<gcond
*> (stmt_vinfo
->stmt
))
814 tree_code rhs_code
= gimple_cond_code (cond
);
815 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
817 = process_use (stmt_vinfo
, gimple_cond_lhs (cond
),
818 loop_vinfo
, relevant
, &worklist
, false);
821 res
= process_use (stmt_vinfo
, gimple_cond_rhs (cond
),
822 loop_vinfo
, relevant
, &worklist
, false);
826 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
828 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
830 tree arg
= gimple_call_arg (call
, i
);
832 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
842 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
844 tree op
= USE_FROM_PTR (use_p
);
846 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
852 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
854 gather_scatter_info gs_info
;
855 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
858 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
867 } /* while worklist */
869 return opt_result::success ();
872 /* Function vect_model_simple_cost.
874 Models cost for simple operations, i.e. those that only emit ncopies of a
875 single op. Right now, this does not account for multiple insns that could
876 be generated for the single vector op. We will handle that shortly. */
879 vect_model_simple_cost (vec_info
*,
880 stmt_vec_info stmt_info
, int ncopies
,
881 enum vect_def_type
*dt
,
884 stmt_vector_for_cost
*cost_vec
,
885 vect_cost_for_stmt kind
= vector_stmt
)
887 int inside_cost
= 0, prologue_cost
= 0;
889 gcc_assert (cost_vec
!= NULL
);
891 /* ??? Somehow we need to fix this at the callers. */
893 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
896 /* Cost the "broadcast" of a scalar operand in to a vector operand.
897 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
899 for (int i
= 0; i
< ndts
; i
++)
900 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
901 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
902 stmt_info
, 0, vect_prologue
);
904 /* Pass the inside-of-loop statements to the target-specific cost model. */
905 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
906 stmt_info
, 0, vect_body
);
908 if (dump_enabled_p ())
909 dump_printf_loc (MSG_NOTE
, vect_location
,
910 "vect_model_simple_cost: inside_cost = %d, "
911 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
915 /* Model cost for type demotion and promotion operations. PWR is
916 normally zero for single-step promotions and demotions. It will be
917 one if two-step promotion/demotion is required, and so on. NCOPIES
918 is the number of vector results (and thus number of instructions)
919 for the narrowest end of the operation chain. Each additional
920 step doubles the number of instructions required. If WIDEN_ARITH
921 is true the stmt is doing widening arithmetic. */
924 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
925 enum vect_def_type
*dt
,
926 unsigned int ncopies
, int pwr
,
927 stmt_vector_for_cost
*cost_vec
,
931 int inside_cost
= 0, prologue_cost
= 0;
933 for (i
= 0; i
< pwr
+ 1; i
++)
935 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
,
937 ? vector_stmt
: vec_promote_demote
,
938 stmt_info
, 0, vect_body
);
942 /* FORNOW: Assuming maximum 2 args per stmts. */
943 for (i
= 0; i
< 2; i
++)
944 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
945 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
946 stmt_info
, 0, vect_prologue
);
948 if (dump_enabled_p ())
949 dump_printf_loc (MSG_NOTE
, vect_location
,
950 "vect_model_promotion_demotion_cost: inside_cost = %d, "
951 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
954 /* Returns true if the current function returns DECL. */
957 cfun_returns (tree decl
)
961 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
963 greturn
*ret
= safe_dyn_cast
<greturn
*> (*gsi_last_bb (e
->src
));
966 if (gimple_return_retval (ret
) == decl
)
968 /* We often end up with an aggregate copy to the result decl,
969 handle that case as well. First skip intermediate clobbers
974 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
976 while (gimple_clobber_p (def
));
977 if (is_a
<gassign
*> (def
)
978 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
979 && gimple_assign_rhs1 (def
) == decl
)
985 /* Calculate cost of DR's memory access. */
987 vect_get_store_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
988 dr_alignment_support alignment_support_scheme
,
990 unsigned int *inside_cost
,
991 stmt_vector_for_cost
*body_cost_vec
)
993 switch (alignment_support_scheme
)
997 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
998 vector_store
, stmt_info
, 0,
1001 if (dump_enabled_p ())
1002 dump_printf_loc (MSG_NOTE
, vect_location
,
1003 "vect_model_store_cost: aligned.\n");
1007 case dr_unaligned_supported
:
1009 /* Here, we assign an additional cost for the unaligned store. */
1010 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1011 unaligned_store
, stmt_info
,
1012 misalignment
, vect_body
);
1013 if (dump_enabled_p ())
1014 dump_printf_loc (MSG_NOTE
, vect_location
,
1015 "vect_model_store_cost: unaligned supported by "
1020 case dr_unaligned_unsupported
:
1022 *inside_cost
= VECT_MAX_COST
;
1024 if (dump_enabled_p ())
1025 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1026 "vect_model_store_cost: unsupported access.\n");
1035 /* Calculate cost of DR's memory access. */
1037 vect_get_load_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1038 dr_alignment_support alignment_support_scheme
,
1040 bool add_realign_cost
, unsigned int *inside_cost
,
1041 unsigned int *prologue_cost
,
1042 stmt_vector_for_cost
*prologue_cost_vec
,
1043 stmt_vector_for_cost
*body_cost_vec
,
1044 bool record_prologue_costs
)
1046 switch (alignment_support_scheme
)
1050 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1051 stmt_info
, 0, vect_body
);
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE
, vect_location
,
1055 "vect_model_load_cost: aligned.\n");
1059 case dr_unaligned_supported
:
1061 /* Here, we assign an additional cost for the unaligned load. */
1062 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1063 unaligned_load
, stmt_info
,
1064 misalignment
, vect_body
);
1066 if (dump_enabled_p ())
1067 dump_printf_loc (MSG_NOTE
, vect_location
,
1068 "vect_model_load_cost: unaligned supported by "
1073 case dr_explicit_realign
:
1075 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1076 vector_load
, stmt_info
, 0, vect_body
);
1077 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1078 vec_perm
, stmt_info
, 0, vect_body
);
1080 /* FIXME: If the misalignment remains fixed across the iterations of
1081 the containing loop, the following cost should be added to the
1083 if (targetm
.vectorize
.builtin_mask_for_load
)
1084 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1085 stmt_info
, 0, vect_body
);
1087 if (dump_enabled_p ())
1088 dump_printf_loc (MSG_NOTE
, vect_location
,
1089 "vect_model_load_cost: explicit realign\n");
1093 case dr_explicit_realign_optimized
:
1095 if (dump_enabled_p ())
1096 dump_printf_loc (MSG_NOTE
, vect_location
,
1097 "vect_model_load_cost: unaligned software "
1100 /* Unaligned software pipeline has a load of an address, an initial
1101 load, and possibly a mask operation to "prime" the loop. However,
1102 if this is an access in a group of loads, which provide grouped
1103 access, then the above cost should only be considered for one
1104 access in the group. Inside the loop, there is a load op
1105 and a realignment op. */
1107 if (add_realign_cost
&& record_prologue_costs
)
1109 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1110 vector_stmt
, stmt_info
,
1112 if (targetm
.vectorize
.builtin_mask_for_load
)
1113 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1114 vector_stmt
, stmt_info
,
1118 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1119 stmt_info
, 0, vect_body
);
1120 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1121 stmt_info
, 0, vect_body
);
1123 if (dump_enabled_p ())
1124 dump_printf_loc (MSG_NOTE
, vect_location
,
1125 "vect_model_load_cost: explicit realign optimized"
1131 case dr_unaligned_unsupported
:
1133 *inside_cost
= VECT_MAX_COST
;
1135 if (dump_enabled_p ())
1136 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1137 "vect_model_load_cost: unsupported access.\n");
1146 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1147 the loop preheader for the vectorized stmt STMT_VINFO. */
1150 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1151 gimple_stmt_iterator
*gsi
)
1154 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1156 vinfo
->insert_on_entry (stmt_vinfo
, new_stmt
);
1158 if (dump_enabled_p ())
1159 dump_printf_loc (MSG_NOTE
, vect_location
,
1160 "created new init_stmt: %G", new_stmt
);
1163 /* Function vect_init_vector.
1165 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1166 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1167 vector type a vector with all elements equal to VAL is created first.
1168 Place the initialization at GSI if it is not NULL. Otherwise, place the
1169 initialization at the loop preheader.
1170 Return the DEF of INIT_STMT.
1171 It will be used in the vectorization of STMT_INFO. */
1174 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1175 gimple_stmt_iterator
*gsi
)
1180 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1181 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1183 gcc_assert (VECTOR_TYPE_P (type
));
1184 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1186 /* Scalar boolean value should be transformed into
1187 all zeros or all ones value before building a vector. */
1188 if (VECTOR_BOOLEAN_TYPE_P (type
))
1190 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1191 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1193 if (CONSTANT_CLASS_P (val
))
1194 val
= integer_zerop (val
) ? false_val
: true_val
;
1197 new_temp
= make_ssa_name (TREE_TYPE (type
));
1198 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1199 val
, true_val
, false_val
);
1200 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1206 gimple_seq stmts
= NULL
;
1207 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1208 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1209 TREE_TYPE (type
), val
);
1211 /* ??? Condition vectorization expects us to do
1212 promotion of invariant/external defs. */
1213 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1214 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1215 !gsi_end_p (gsi2
); )
1217 init_stmt
= gsi_stmt (gsi2
);
1218 gsi_remove (&gsi2
, false);
1219 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1223 val
= build_vector_from_val (type
, val
);
1226 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1227 init_stmt
= gimple_build_assign (new_temp
, val
);
1228 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1233 /* Function vect_get_vec_defs_for_operand.
1235 OP is an operand in STMT_VINFO. This function returns a vector of
1236 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1238 In the case that OP is an SSA_NAME which is defined in the loop, then
1239 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1241 In case OP is an invariant or constant, a new stmt that creates a vector def
1242 needs to be introduced. VECTYPE may be used to specify a required type for
1243 vector invariant. */
1246 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1248 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1251 enum vect_def_type dt
;
1253 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE
, vect_location
,
1257 "vect_get_vec_defs_for_operand: %T\n", op
);
1259 stmt_vec_info def_stmt_info
;
1260 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1261 &def_stmt_info
, &def_stmt
);
1262 gcc_assert (is_simple_use
);
1263 if (def_stmt
&& dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1266 vec_oprnds
->create (ncopies
);
1267 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1269 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1273 vector_type
= vectype
;
1274 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1275 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1276 vector_type
= truth_type_for (stmt_vectype
);
1278 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1280 gcc_assert (vector_type
);
1281 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1283 vec_oprnds
->quick_push (vop
);
1287 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1288 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1289 for (unsigned i
= 0; i
< ncopies
; ++i
)
1290 vec_oprnds
->quick_push (gimple_get_lhs
1291 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1296 /* Get vectorized definitions for OP0 and OP1. */
1299 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1301 tree op0
, tree vectype0
, vec
<tree
> *vec_oprnds0
,
1302 tree op1
, tree vectype1
, vec
<tree
> *vec_oprnds1
,
1303 tree op2
, tree vectype2
, vec
<tree
> *vec_oprnds2
,
1304 tree op3
, tree vectype3
, vec
<tree
> *vec_oprnds3
)
1309 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1311 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1313 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1315 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1320 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1321 op0
, vec_oprnds0
, vectype0
);
1323 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1324 op1
, vec_oprnds1
, vectype1
);
1326 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1327 op2
, vec_oprnds2
, vectype2
);
1329 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1330 op3
, vec_oprnds3
, vectype3
);
1335 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1337 tree op0
, vec
<tree
> *vec_oprnds0
,
1338 tree op1
, vec
<tree
> *vec_oprnds1
,
1339 tree op2
, vec
<tree
> *vec_oprnds2
,
1340 tree op3
, vec
<tree
> *vec_oprnds3
)
1342 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1343 op0
, NULL_TREE
, vec_oprnds0
,
1344 op1
, NULL_TREE
, vec_oprnds1
,
1345 op2
, NULL_TREE
, vec_oprnds2
,
1346 op3
, NULL_TREE
, vec_oprnds3
);
1349 /* Helper function called by vect_finish_replace_stmt and
1350 vect_finish_stmt_generation. Set the location of the new
1351 statement and create and return a stmt_vec_info for it. */
1354 vect_finish_stmt_generation_1 (vec_info
*,
1355 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1357 if (dump_enabled_p ())
1358 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1362 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1364 /* While EH edges will generally prevent vectorization, stmt might
1365 e.g. be in a must-not-throw region. Ensure newly created stmts
1366 that could throw are part of the same region. */
1367 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1368 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1369 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1372 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1375 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1376 which sets the same scalar result as STMT_INFO did. Create and return a
1377 stmt_vec_info for VEC_STMT. */
1380 vect_finish_replace_stmt (vec_info
*vinfo
,
1381 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1383 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1384 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1386 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1387 gsi_replace (&gsi
, vec_stmt
, true);
1389 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1392 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1393 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1396 vect_finish_stmt_generation (vec_info
*vinfo
,
1397 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1398 gimple_stmt_iterator
*gsi
)
1400 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1402 if (!gsi_end_p (*gsi
)
1403 && gimple_has_mem_ops (vec_stmt
))
1405 gimple
*at_stmt
= gsi_stmt (*gsi
);
1406 tree vuse
= gimple_vuse (at_stmt
);
1407 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1409 tree vdef
= gimple_vdef (at_stmt
);
1410 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1411 gimple_set_modified (vec_stmt
, true);
1412 /* If we have an SSA vuse and insert a store, update virtual
1413 SSA form to avoid triggering the renamer. Do so only
1414 if we can easily see all uses - which is what almost always
1415 happens with the way vectorized stmts are inserted. */
1416 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1417 && ((is_gimple_assign (vec_stmt
)
1418 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1419 || (is_gimple_call (vec_stmt
)
1420 && (!(gimple_call_flags (vec_stmt
)
1421 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
))
1422 || (gimple_call_lhs (vec_stmt
)
1423 && !is_gimple_reg (gimple_call_lhs (vec_stmt
)))))))
1425 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1426 gimple_set_vdef (vec_stmt
, new_vdef
);
1427 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1431 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1432 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1435 /* We want to vectorize a call to combined function CFN with function
1436 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1437 as the types of all inputs. Check whether this is possible using
1438 an internal function, returning its code if so or IFN_LAST if not. */
1441 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1442 tree vectype_out
, tree vectype_in
)
1445 if (internal_fn_p (cfn
))
1446 ifn
= as_internal_fn (cfn
);
1448 ifn
= associated_internal_fn (fndecl
);
1449 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1451 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1452 if (info
.vectorizable
)
1454 bool same_size_p
= TYPE_SIZE (vectype_in
) == TYPE_SIZE (vectype_out
);
1455 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1456 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1458 /* The type size of both the vectype_in and vectype_out should be
1459 exactly the same when vectype_out isn't participating the optab.
1460 While there is no restriction for type size when vectype_out
1461 is part of the optab query. */
1462 if (type0
!= vectype_out
&& type1
!= vectype_out
&& !same_size_p
)
1465 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1466 OPTIMIZE_FOR_SPEED
))
1474 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1475 gimple_stmt_iterator
*);
1477 /* Check whether a load or store statement in the loop described by
1478 LOOP_VINFO is possible in a loop using partial vectors. This is
1479 testing whether the vectorizer pass has the appropriate support,
1480 as well as whether the target does.
1482 VLS_TYPE says whether the statement is a load or store and VECTYPE
1483 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1484 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1485 says how the load or store is going to be implemented and GROUP_SIZE
1486 is the number of load or store statements in the containing group.
1487 If the access is a gather load or scatter store, GS_INFO describes
1488 its arguments. If the load or store is conditional, SCALAR_MASK is the
1489 condition under which it occurs.
1491 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1492 vectors is not supported, otherwise record the required rgroup control
1496 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo
, tree vectype
,
1498 vec_load_store_type vls_type
,
1500 vect_memory_access_type
1502 gather_scatter_info
*gs_info
,
1505 /* Invariant loads need no special support. */
1506 if (memory_access_type
== VMAT_INVARIANT
)
1509 unsigned int nvectors
;
1511 nvectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1513 nvectors
= vect_get_num_copies (loop_vinfo
, vectype
);
1515 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1516 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
1517 machine_mode vecmode
= TYPE_MODE (vectype
);
1518 bool is_load
= (vls_type
== VLS_LOAD
);
1519 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1522 = (is_load
? vect_load_lanes_supported (vectype
, group_size
, true)
1523 : vect_store_lanes_supported (vectype
, group_size
, true));
1524 if (ifn
== IFN_MASK_LEN_LOAD_LANES
|| ifn
== IFN_MASK_LEN_STORE_LANES
)
1525 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, 1);
1526 else if (ifn
== IFN_MASK_LOAD_LANES
|| ifn
== IFN_MASK_STORE_LANES
)
1527 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1531 if (dump_enabled_p ())
1532 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1533 "can't operate on partial vectors because"
1534 " the target doesn't have an appropriate"
1535 " load/store-lanes instruction.\n");
1536 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1541 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1543 internal_fn ifn
= (is_load
1544 ? IFN_MASK_GATHER_LOAD
1545 : IFN_MASK_SCATTER_STORE
);
1546 internal_fn len_ifn
= (is_load
1547 ? IFN_MASK_LEN_GATHER_LOAD
1548 : IFN_MASK_LEN_SCATTER_STORE
);
1549 if (internal_gather_scatter_fn_supported_p (len_ifn
, vectype
,
1550 gs_info
->memory_type
,
1551 gs_info
->offset_vectype
,
1553 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, 1);
1554 else if (internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1555 gs_info
->memory_type
,
1556 gs_info
->offset_vectype
,
1558 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1562 if (dump_enabled_p ())
1563 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1564 "can't operate on partial vectors because"
1565 " the target doesn't have an appropriate"
1566 " gather load or scatter store instruction.\n");
1567 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1572 if (memory_access_type
!= VMAT_CONTIGUOUS
1573 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1575 /* Element X of the data must come from iteration i * VF + X of the
1576 scalar loop. We need more work to support other mappings. */
1577 if (dump_enabled_p ())
1578 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1579 "can't operate on partial vectors because an"
1580 " access isn't contiguous.\n");
1581 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1585 if (!VECTOR_MODE_P (vecmode
))
1587 if (dump_enabled_p ())
1588 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1589 "can't operate on partial vectors when emulating"
1590 " vector operations.\n");
1591 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1595 /* We might load more scalars than we need for permuting SLP loads.
1596 We checked in get_group_load_store_type that the extra elements
1597 don't leak into a new vector. */
1598 auto group_memory_nvectors
= [](poly_uint64 size
, poly_uint64 nunits
)
1600 unsigned int nvectors
;
1601 if (can_div_away_from_zero_p (size
, nunits
, &nvectors
))
1606 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1607 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1608 machine_mode mask_mode
;
1610 bool using_partial_vectors_p
= false;
1611 if (get_len_load_store_mode (vecmode
, is_load
).exists (&vmode
))
1613 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1614 unsigned factor
= (vecmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vecmode
);
1615 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, factor
);
1616 using_partial_vectors_p
= true;
1618 else if (targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1619 && can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1621 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1622 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1623 using_partial_vectors_p
= true;
1626 if (!using_partial_vectors_p
)
1628 if (dump_enabled_p ())
1629 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1630 "can't operate on partial vectors because the"
1631 " target doesn't have the appropriate partial"
1632 " vectorization load or store.\n");
1633 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1637 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1638 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1639 that needs to be applied to all loads and stores in a vectorized loop.
1640 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1641 otherwise return VEC_MASK & LOOP_MASK.
1643 MASK_TYPE is the type of both masks. If new statements are needed,
1644 insert them before GSI. */
1647 prepare_vec_mask (loop_vec_info loop_vinfo
, tree mask_type
, tree loop_mask
,
1648 tree vec_mask
, gimple_stmt_iterator
*gsi
)
1650 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1654 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1656 if (loop_vinfo
->vec_cond_masked_set
.contains ({ vec_mask
, loop_mask
}))
1659 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1660 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1661 vec_mask
, loop_mask
);
1663 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1667 /* Determine whether we can use a gather load or scatter store to vectorize
1668 strided load or store STMT_INFO by truncating the current offset to a
1669 smaller width. We need to be able to construct an offset vector:
1671 { 0, X, X*2, X*3, ... }
1673 without loss of precision, where X is STMT_INFO's DR_STEP.
1675 Return true if this is possible, describing the gather load or scatter
1676 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1679 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1680 loop_vec_info loop_vinfo
, bool masked_p
,
1681 gather_scatter_info
*gs_info
)
1683 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1684 data_reference
*dr
= dr_info
->dr
;
1685 tree step
= DR_STEP (dr
);
1686 if (TREE_CODE (step
) != INTEGER_CST
)
1688 /* ??? Perhaps we could use range information here? */
1689 if (dump_enabled_p ())
1690 dump_printf_loc (MSG_NOTE
, vect_location
,
1691 "cannot truncate variable step.\n");
1695 /* Get the number of bits in an element. */
1696 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1697 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1698 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1700 /* Set COUNT to the upper limit on the number of elements - 1.
1701 Start with the maximum vectorization factor. */
1702 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1704 /* Try lowering COUNT to the number of scalar latch iterations. */
1705 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1706 widest_int max_iters
;
1707 if (max_loop_iterations (loop
, &max_iters
)
1708 && max_iters
< count
)
1709 count
= max_iters
.to_shwi ();
1711 /* Try scales of 1 and the element size. */
1712 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1713 wi::overflow_type overflow
= wi::OVF_NONE
;
1714 for (int i
= 0; i
< 2; ++i
)
1716 int scale
= scales
[i
];
1718 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1721 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1722 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1725 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1726 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1728 /* Find the narrowest viable offset type. */
1729 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1730 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1733 /* See whether the target supports the operation with an offset
1734 no narrower than OFFSET_TYPE. */
1735 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1736 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1737 vectype
, memory_type
, offset_type
, scale
,
1738 &gs_info
->ifn
, &gs_info
->offset_vectype
)
1739 || gs_info
->ifn
== IFN_LAST
)
1742 gs_info
->decl
= NULL_TREE
;
1743 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1744 but we don't need to store that here. */
1745 gs_info
->base
= NULL_TREE
;
1746 gs_info
->element_type
= TREE_TYPE (vectype
);
1747 gs_info
->offset
= fold_convert (offset_type
, step
);
1748 gs_info
->offset_dt
= vect_constant_def
;
1749 gs_info
->scale
= scale
;
1750 gs_info
->memory_type
= memory_type
;
1754 if (overflow
&& dump_enabled_p ())
1755 dump_printf_loc (MSG_NOTE
, vect_location
,
1756 "truncating gather/scatter offset to %d bits"
1757 " might change its value.\n", element_bits
);
1762 /* Return true if we can use gather/scatter internal functions to
1763 vectorize STMT_INFO, which is a grouped or strided load or store.
1764 MASKED_P is true if load or store is conditional. When returning
1765 true, fill in GS_INFO with the information required to perform the
1769 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1770 loop_vec_info loop_vinfo
, bool masked_p
,
1771 gather_scatter_info
*gs_info
)
1773 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1774 || gs_info
->ifn
== IFN_LAST
)
1775 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1778 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1779 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1781 gcc_assert (TYPE_PRECISION (new_offset_type
)
1782 >= TYPE_PRECISION (old_offset_type
));
1783 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1785 if (dump_enabled_p ())
1786 dump_printf_loc (MSG_NOTE
, vect_location
,
1787 "using gather/scatter for strided/grouped access,"
1788 " scale = %d\n", gs_info
->scale
);
1793 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1794 elements with a known constant step. Return -1 if that step
1795 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1798 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
1800 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1801 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
1805 /* If the target supports a permute mask that reverses the elements in
1806 a vector of type VECTYPE, return that mask, otherwise return null. */
1809 perm_mask_for_reverse (tree vectype
)
1811 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1813 /* The encoding has a single stepped pattern. */
1814 vec_perm_builder
sel (nunits
, 1, 3);
1815 for (int i
= 0; i
< 3; ++i
)
1816 sel
.quick_push (nunits
- 1 - i
);
1818 vec_perm_indices
indices (sel
, 1, nunits
);
1819 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), TYPE_MODE (vectype
),
1822 return vect_gen_perm_mask_checked (vectype
, indices
);
1825 /* A subroutine of get_load_store_type, with a subset of the same
1826 arguments. Handle the case where STMT_INFO is a load or store that
1827 accesses consecutive elements with a negative step. Sets *POFFSET
1828 to the offset to be applied to the DR for the first access. */
1830 static vect_memory_access_type
1831 get_negative_load_store_type (vec_info
*vinfo
,
1832 stmt_vec_info stmt_info
, tree vectype
,
1833 vec_load_store_type vls_type
,
1834 unsigned int ncopies
, poly_int64
*poffset
)
1836 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1837 dr_alignment_support alignment_support_scheme
;
1841 if (dump_enabled_p ())
1842 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1843 "multiple types with negative step.\n");
1844 return VMAT_ELEMENTWISE
;
1847 /* For backward running DRs the first access in vectype actually is
1848 N-1 elements before the address of the DR. */
1849 *poffset
= ((-TYPE_VECTOR_SUBPARTS (vectype
) + 1)
1850 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype
))));
1852 int misalignment
= dr_misalignment (dr_info
, vectype
, *poffset
);
1853 alignment_support_scheme
1854 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
, misalignment
);
1855 if (alignment_support_scheme
!= dr_aligned
1856 && alignment_support_scheme
!= dr_unaligned_supported
)
1858 if (dump_enabled_p ())
1859 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1860 "negative step but alignment required.\n");
1862 return VMAT_ELEMENTWISE
;
1865 if (vls_type
== VLS_STORE_INVARIANT
)
1867 if (dump_enabled_p ())
1868 dump_printf_loc (MSG_NOTE
, vect_location
,
1869 "negative step with invariant source;"
1870 " no permute needed.\n");
1871 return VMAT_CONTIGUOUS_DOWN
;
1874 if (!perm_mask_for_reverse (vectype
))
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1878 "negative step and reversing not supported.\n");
1880 return VMAT_ELEMENTWISE
;
1883 return VMAT_CONTIGUOUS_REVERSE
;
1886 /* STMT_INFO is either a masked or unconditional store. Return the value
1890 vect_get_store_rhs (stmt_vec_info stmt_info
)
1892 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1894 gcc_assert (gimple_assign_single_p (assign
));
1895 return gimple_assign_rhs1 (assign
);
1897 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1899 internal_fn ifn
= gimple_call_internal_fn (call
);
1900 int index
= internal_fn_stored_value_index (ifn
);
1901 gcc_assert (index
>= 0);
1902 return gimple_call_arg (call
, index
);
1907 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1909 This function returns a vector type which can be composed with NETLS pieces,
1910 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1911 same vector size as the return vector. It checks target whether supports
1912 pieces-size vector mode for construction firstly, if target fails to, check
1913 pieces-size scalar mode for construction further. It returns NULL_TREE if
1914 fails to find the available composition.
1916 For example, for (vtype=V16QI, nelts=4), we can probably get:
1917 - V16QI with PTYPE V4QI.
1918 - V4SI with PTYPE SI.
1922 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
1924 gcc_assert (VECTOR_TYPE_P (vtype
));
1925 gcc_assert (known_gt (nelts
, 0U));
1927 machine_mode vmode
= TYPE_MODE (vtype
);
1928 if (!VECTOR_MODE_P (vmode
))
1931 /* When we are asked to compose the vector from its components let
1932 that happen directly. */
1933 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype
), nelts
))
1935 *ptype
= TREE_TYPE (vtype
);
1939 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
1940 unsigned int pbsize
;
1941 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
1943 /* First check if vec_init optab supports construction from
1944 vector pieces directly. */
1945 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
1946 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
1948 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
1949 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
1950 != CODE_FOR_nothing
))
1952 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
1956 /* Otherwise check if exists an integer type of the same piece size and
1957 if vec_init optab supports construction from it directly. */
1958 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
1959 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
1960 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
1961 != CODE_FOR_nothing
))
1963 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
1964 return build_vector_type (*ptype
, nelts
);
1971 /* A subroutine of get_load_store_type, with a subset of the same
1972 arguments. Handle the case where STMT_INFO is part of a grouped load
1975 For stores, the statements in the group are all consecutive
1976 and there is no gap at the end. For loads, the statements in the
1977 group might not be consecutive; there can be gaps between statements
1978 as well as at the end. */
1981 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
1982 tree vectype
, slp_tree slp_node
,
1983 bool masked_p
, vec_load_store_type vls_type
,
1984 vect_memory_access_type
*memory_access_type
,
1985 poly_int64
*poffset
,
1986 dr_alignment_support
*alignment_support_scheme
,
1988 gather_scatter_info
*gs_info
,
1989 internal_fn
*lanes_ifn
)
1991 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1992 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
1993 stmt_vec_info first_stmt_info
;
1994 unsigned int group_size
;
1995 unsigned HOST_WIDE_INT gap
;
1996 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1998 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1999 group_size
= DR_GROUP_SIZE (first_stmt_info
);
2000 gap
= DR_GROUP_GAP (first_stmt_info
);
2004 first_stmt_info
= stmt_info
;
2008 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2009 bool single_element_p
= (stmt_info
== first_stmt_info
2010 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2011 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2013 /* True if the vectorized statements would access beyond the last
2014 statement in the group. */
2015 bool overrun_p
= false;
2017 /* True if we can cope with such overrun by peeling for gaps, so that
2018 there is at least one final scalar iteration after the vector loop. */
2019 bool can_overrun_p
= (!masked_p
2020 && vls_type
== VLS_LOAD
2024 /* There can only be a gap at the end of the group if the stride is
2025 known at compile time. */
2026 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2028 /* Stores can't yet have gaps. */
2029 gcc_assert (slp_node
|| vls_type
== VLS_LOAD
|| gap
== 0);
2033 /* For SLP vectorization we directly vectorize a subchain
2034 without permutation. */
2035 if (! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
2037 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node
)[0]);
2038 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2040 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2041 separated by the stride, until we have a complete vector.
2042 Fall back to scalar accesses if that isn't possible. */
2043 if (multiple_p (nunits
, group_size
))
2044 *memory_access_type
= VMAT_STRIDED_SLP
;
2046 *memory_access_type
= VMAT_ELEMENTWISE
;
2050 overrun_p
= loop_vinfo
&& gap
!= 0;
2051 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2053 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2054 "Grouped store with gaps requires"
2055 " non-consecutive accesses\n");
2058 /* An overrun is fine if the trailing elements are smaller
2059 than the alignment boundary B. Every vector access will
2060 be a multiple of B and so we are guaranteed to access a
2061 non-gap element in the same B-sized block. */
2063 && gap
< (vect_known_alignment_in_bytes (first_dr_info
,
2065 / vect_get_scalar_dr_size (first_dr_info
)))
2068 /* If the gap splits the vector in half and the target
2069 can do half-vector operations avoid the epilogue peeling
2070 by simply loading half of the vector only. Usually
2071 the construction with an upper zero half will be elided. */
2072 dr_alignment_support alss
;
2073 int misalign
= dr_misalignment (first_dr_info
, vectype
);
2077 && (((alss
= vect_supportable_dr_alignment (vinfo
, first_dr_info
,
2078 vectype
, misalign
)))
2080 || alss
== dr_unaligned_supported
)
2081 && known_eq (nunits
, (group_size
- gap
) * 2)
2082 && known_eq (nunits
, group_size
)
2083 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2087 if (overrun_p
&& !can_overrun_p
)
2089 if (dump_enabled_p ())
2090 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2091 "Peeling for outer loop is not supported\n");
2094 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2097 if (single_element_p
)
2098 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2099 only correct for single element "interleaving" SLP. */
2100 *memory_access_type
= get_negative_load_store_type
2101 (vinfo
, stmt_info
, vectype
, vls_type
, 1, poffset
);
2104 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2105 separated by the stride, until we have a complete vector.
2106 Fall back to scalar accesses if that isn't possible. */
2107 if (multiple_p (nunits
, group_size
))
2108 *memory_access_type
= VMAT_STRIDED_SLP
;
2110 *memory_access_type
= VMAT_ELEMENTWISE
;
2113 else if (cmp
== 0 && loop_vinfo
)
2115 gcc_assert (vls_type
== VLS_LOAD
);
2116 *memory_access_type
= VMAT_INVARIANT
;
2117 /* Invariant accesses perform only component accesses, alignment
2118 is irrelevant for them. */
2119 *alignment_support_scheme
= dr_unaligned_supported
;
2122 *memory_access_type
= VMAT_CONTIGUOUS
;
2124 /* When we have a contiguous access across loop iterations
2125 but the access in the loop doesn't cover the full vector
2126 we can end up with no gap recorded but still excess
2127 elements accessed, see PR103116. Make sure we peel for
2128 gaps if necessary and sufficient and give up if not.
2130 If there is a combination of the access not covering the full
2131 vector and a gap recorded then we may need to peel twice. */
2133 && *memory_access_type
== VMAT_CONTIGUOUS
2134 && SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
2135 && !multiple_p (group_size
* LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
2138 unsigned HOST_WIDE_INT cnunits
, cvf
;
2140 || !nunits
.is_constant (&cnunits
)
2141 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&cvf
)
2142 /* Peeling for gaps assumes that a single scalar iteration
2143 is enough to make sure the last vector iteration doesn't
2144 access excess elements.
2145 ??? Enhancements include peeling multiple iterations
2146 or using masked loads with a static mask. */
2147 || (group_size
* cvf
) % cnunits
+ group_size
- gap
< cnunits
)
2149 if (dump_enabled_p ())
2150 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2151 "peeling for gaps insufficient for "
2161 /* We can always handle this case using elementwise accesses,
2162 but see if something more efficient is available. */
2163 *memory_access_type
= VMAT_ELEMENTWISE
;
2165 /* If there is a gap at the end of the group then these optimizations
2166 would access excess elements in the last iteration. */
2167 bool would_overrun_p
= (gap
!= 0);
2168 /* An overrun is fine if the trailing elements are smaller than the
2169 alignment boundary B. Every vector access will be a multiple of B
2170 and so we are guaranteed to access a non-gap element in the
2171 same B-sized block. */
2174 && gap
< (vect_known_alignment_in_bytes (first_dr_info
, vectype
)
2175 / vect_get_scalar_dr_size (first_dr_info
)))
2176 would_overrun_p
= false;
2178 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2179 && (can_overrun_p
|| !would_overrun_p
)
2180 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2182 /* First cope with the degenerate case of a single-element
2184 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2189 /* Otherwise try using LOAD/STORE_LANES. */
2191 = vls_type
== VLS_LOAD
2192 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2193 : vect_store_lanes_supported (vectype
, group_size
,
2195 if (*lanes_ifn
!= IFN_LAST
)
2197 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2198 overrun_p
= would_overrun_p
;
2201 /* If that fails, try using permuting loads. */
2202 else if (vls_type
== VLS_LOAD
2203 ? vect_grouped_load_supported (vectype
,
2206 : vect_grouped_store_supported (vectype
, group_size
))
2208 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2209 overrun_p
= would_overrun_p
;
2214 /* As a last resort, trying using a gather load or scatter store.
2216 ??? Although the code can handle all group sizes correctly,
2217 it probably isn't a win to use separate strided accesses based
2218 on nearby locations. Or, even if it's a win over scalar code,
2219 it might not be a win over vectorizing at a lower VF, if that
2220 allows us to use contiguous accesses. */
2221 if (*memory_access_type
== VMAT_ELEMENTWISE
2224 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2226 *memory_access_type
= VMAT_GATHER_SCATTER
;
2229 if (*memory_access_type
== VMAT_GATHER_SCATTER
2230 || *memory_access_type
== VMAT_ELEMENTWISE
)
2232 *alignment_support_scheme
= dr_unaligned_supported
;
2233 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2237 *misalignment
= dr_misalignment (first_dr_info
, vectype
, *poffset
);
2238 *alignment_support_scheme
2239 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, vectype
,
2243 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2245 /* STMT is the leader of the group. Check the operands of all the
2246 stmts of the group. */
2247 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2248 while (next_stmt_info
)
2250 tree op
= vect_get_store_rhs (next_stmt_info
);
2251 enum vect_def_type dt
;
2252 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2254 if (dump_enabled_p ())
2255 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2256 "use not simple.\n");
2259 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2265 gcc_assert (can_overrun_p
);
2266 if (dump_enabled_p ())
2267 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2268 "Data access with gaps requires scalar "
2270 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2276 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2277 if there is a memory access type that the vectorized form can use,
2278 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2279 or scatters, fill in GS_INFO accordingly. In addition
2280 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2281 the target does not support the alignment scheme. *MISALIGNMENT
2282 is set according to the alignment of the access (including
2283 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2285 SLP says whether we're performing SLP rather than loop vectorization.
2286 MASKED_P is true if the statement is conditional on a vectorized mask.
2287 VECTYPE is the vector type that the vectorized statements will use.
2288 NCOPIES is the number of vector statements that will be needed. */
2291 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2292 tree vectype
, slp_tree slp_node
,
2293 bool masked_p
, vec_load_store_type vls_type
,
2294 unsigned int ncopies
,
2295 vect_memory_access_type
*memory_access_type
,
2296 poly_int64
*poffset
,
2297 dr_alignment_support
*alignment_support_scheme
,
2299 gather_scatter_info
*gs_info
,
2300 internal_fn
*lanes_ifn
)
2302 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2303 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2304 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2306 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2308 *memory_access_type
= VMAT_GATHER_SCATTER
;
2309 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2311 /* When using internal functions, we rely on pattern recognition
2312 to convert the type of the offset to the type that the target
2313 requires, with the result being a call to an internal function.
2314 If that failed for some reason (e.g. because another pattern
2315 took priority), just handle cases in which the offset already
2316 has the right type. */
2317 else if (gs_info
->ifn
!= IFN_LAST
2318 && !is_gimple_call (stmt_info
->stmt
)
2319 && !tree_nop_conversion_p (TREE_TYPE (gs_info
->offset
),
2320 TREE_TYPE (gs_info
->offset_vectype
)))
2322 if (dump_enabled_p ())
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2324 "%s offset requires a conversion\n",
2325 vls_type
== VLS_LOAD
? "gather" : "scatter");
2328 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2329 &gs_info
->offset_dt
,
2330 &gs_info
->offset_vectype
))
2332 if (dump_enabled_p ())
2333 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2334 "%s index use not simple.\n",
2335 vls_type
== VLS_LOAD
? "gather" : "scatter");
2338 else if (gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
2340 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant ()
2341 || !TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
).is_constant ()
2342 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2343 (gs_info
->offset_vectype
),
2344 TYPE_VECTOR_SUBPARTS (vectype
)))
2346 if (dump_enabled_p ())
2347 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2348 "unsupported vector types for emulated "
2353 /* Gather-scatter accesses perform only component accesses, alignment
2354 is irrelevant for them. */
2355 *alignment_support_scheme
= dr_unaligned_supported
;
2357 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) || slp_node
)
2359 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
,
2361 vls_type
, memory_access_type
, poffset
,
2362 alignment_support_scheme
,
2363 misalignment
, gs_info
, lanes_ifn
))
2366 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2368 gcc_assert (!slp_node
);
2370 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2372 *memory_access_type
= VMAT_GATHER_SCATTER
;
2374 *memory_access_type
= VMAT_ELEMENTWISE
;
2375 /* Alignment is irrelevant here. */
2376 *alignment_support_scheme
= dr_unaligned_supported
;
2380 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2383 gcc_assert (vls_type
== VLS_LOAD
);
2384 *memory_access_type
= VMAT_INVARIANT
;
2385 /* Invariant accesses perform only component accesses, alignment
2386 is irrelevant for them. */
2387 *alignment_support_scheme
= dr_unaligned_supported
;
2392 *memory_access_type
= get_negative_load_store_type
2393 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
, poffset
);
2395 *memory_access_type
= VMAT_CONTIGUOUS
;
2396 *misalignment
= dr_misalignment (STMT_VINFO_DR_INFO (stmt_info
),
2398 *alignment_support_scheme
2399 = vect_supportable_dr_alignment (vinfo
,
2400 STMT_VINFO_DR_INFO (stmt_info
),
2401 vectype
, *misalignment
);
2405 if ((*memory_access_type
== VMAT_ELEMENTWISE
2406 || *memory_access_type
== VMAT_STRIDED_SLP
)
2407 && !nunits
.is_constant ())
2409 if (dump_enabled_p ())
2410 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2411 "Not using elementwise accesses due to variable "
2412 "vectorization factor.\n");
2416 if (*alignment_support_scheme
== dr_unaligned_unsupported
)
2418 if (dump_enabled_p ())
2419 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2420 "unsupported unaligned access\n");
2424 /* FIXME: At the moment the cost model seems to underestimate the
2425 cost of using elementwise accesses. This check preserves the
2426 traditional behavior until that can be fixed. */
2427 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2428 if (!first_stmt_info
)
2429 first_stmt_info
= stmt_info
;
2430 if (*memory_access_type
== VMAT_ELEMENTWISE
2431 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2432 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2433 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2434 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2436 if (dump_enabled_p ())
2437 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2438 "not falling back to elementwise accesses\n");
2444 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2445 conditional operation STMT_INFO. When returning true, store the mask
2446 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2447 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2448 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2451 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2452 slp_tree slp_node
, unsigned mask_index
,
2453 tree
*mask
, slp_tree
*mask_node
,
2454 vect_def_type
*mask_dt_out
, tree
*mask_vectype_out
)
2456 enum vect_def_type mask_dt
;
2458 slp_tree mask_node_1
;
2459 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, mask_index
,
2460 mask
, &mask_node_1
, &mask_dt
, &mask_vectype
))
2462 if (dump_enabled_p ())
2463 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2464 "mask use not simple.\n");
2468 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask
)))
2470 if (dump_enabled_p ())
2471 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2472 "mask argument is not a boolean.\n");
2476 /* If the caller is not prepared for adjusting an external/constant
2477 SLP mask vector type fail. */
2480 && SLP_TREE_DEF_TYPE (mask_node_1
) != vect_internal_def
)
2482 if (dump_enabled_p ())
2483 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2484 "SLP mask argument is not vectorized.\n");
2488 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2490 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
),
2493 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2495 if (dump_enabled_p ())
2496 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2497 "could not find an appropriate vector mask type.\n");
2501 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2502 TYPE_VECTOR_SUBPARTS (vectype
)))
2504 if (dump_enabled_p ())
2505 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2506 "vector mask type %T"
2507 " does not match vector data type %T.\n",
2508 mask_vectype
, vectype
);
2513 *mask_dt_out
= mask_dt
;
2514 *mask_vectype_out
= mask_vectype
;
2516 *mask_node
= mask_node_1
;
2520 /* Return true if stored value is suitable for vectorizing store
2521 statement STMT_INFO. When returning true, store the scalar stored
2522 in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
2523 the type of the vectorized store value in
2524 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2527 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2528 slp_tree slp_node
, tree
*rhs
, slp_tree
*rhs_node
,
2529 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2530 vec_load_store_type
*vls_type_out
)
2533 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2535 if (gimple_call_internal_p (call
)
2536 && internal_store_fn_p (gimple_call_internal_fn (call
)))
2537 op_no
= internal_fn_stored_value_index (gimple_call_internal_fn (call
));
2540 op_no
= vect_slp_child_index_for_operand
2541 (stmt_info
->stmt
, op_no
, STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
2543 enum vect_def_type rhs_dt
;
2545 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, op_no
,
2546 rhs
, rhs_node
, &rhs_dt
, &rhs_vectype
))
2548 if (dump_enabled_p ())
2549 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2550 "use not simple.\n");
2554 /* In the case this is a store from a constant make sure
2555 native_encode_expr can handle it. */
2556 if (CONSTANT_CLASS_P (*rhs
) && native_encode_expr (*rhs
, NULL
, 64) == 0)
2558 if (dump_enabled_p ())
2559 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2560 "cannot encode constant as a byte sequence.\n");
2564 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2565 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2567 if (dump_enabled_p ())
2568 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2569 "incompatible vector types.\n");
2573 *rhs_dt_out
= rhs_dt
;
2574 *rhs_vectype_out
= rhs_vectype
;
2575 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2576 *vls_type_out
= VLS_STORE_INVARIANT
;
2578 *vls_type_out
= VLS_STORE
;
2582 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2583 Note that we support masks with floating-point type, in which case the
2584 floats are interpreted as a bitmask. */
2587 vect_build_all_ones_mask (vec_info
*vinfo
,
2588 stmt_vec_info stmt_info
, tree masktype
)
2590 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2591 return build_int_cst (masktype
, -1);
2592 else if (VECTOR_BOOLEAN_TYPE_P (masktype
)
2593 || TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2595 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2596 mask
= build_vector_from_val (masktype
, mask
);
2597 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2599 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2603 for (int j
= 0; j
< 6; ++j
)
2605 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2606 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2607 mask
= build_vector_from_val (masktype
, mask
);
2608 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2613 /* Build an all-zero merge value of type VECTYPE while vectorizing
2614 STMT_INFO as a gather load. */
2617 vect_build_zero_merge_argument (vec_info
*vinfo
,
2618 stmt_vec_info stmt_info
, tree vectype
)
2621 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2622 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2623 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2627 for (int j
= 0; j
< 6; ++j
)
2629 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2630 merge
= build_real (TREE_TYPE (vectype
), r
);
2634 merge
= build_vector_from_val (vectype
, merge
);
2635 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2638 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2639 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2640 the gather load operation. If the load is conditional, MASK is the
2641 vectorized condition, otherwise MASK is null. PTR is the base
2642 pointer and OFFSET is the vectorized offset. */
2645 vect_build_one_gather_load_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2646 gimple_stmt_iterator
*gsi
,
2647 gather_scatter_info
*gs_info
,
2648 tree ptr
, tree offset
, tree mask
)
2650 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2651 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2652 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2653 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2654 /* ptrtype */ arglist
= TREE_CHAIN (arglist
);
2655 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2656 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2657 tree scaletype
= TREE_VALUE (arglist
);
2659 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2661 || TREE_CODE (masktype
) == INTEGER_TYPE
2662 || types_compatible_p (srctype
, masktype
)));
2665 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2667 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2668 TYPE_VECTOR_SUBPARTS (idxtype
)));
2669 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2670 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2671 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2672 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2676 tree src_op
= NULL_TREE
;
2677 tree mask_op
= NULL_TREE
;
2680 if (!useless_type_conversion_p (masktype
, TREE_TYPE (mask
)))
2682 tree utype
, optype
= TREE_TYPE (mask
);
2683 if (VECTOR_TYPE_P (masktype
)
2684 || TYPE_MODE (masktype
) == TYPE_MODE (optype
))
2687 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2688 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2689 tree mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask
);
2691 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2692 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2694 if (!useless_type_conversion_p (masktype
, utype
))
2696 gcc_assert (TYPE_PRECISION (utype
)
2697 <= TYPE_PRECISION (masktype
));
2698 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
2699 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2700 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2703 src_op
= build_zero_cst (srctype
);
2714 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2715 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2718 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2719 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2722 if (!useless_type_conversion_p (vectype
, rettype
))
2724 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2725 TYPE_VECTOR_SUBPARTS (rettype
)));
2726 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2727 gimple_call_set_lhs (new_stmt
, op
);
2728 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2729 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2730 new_stmt
= gimple_build_assign (NULL_TREE
, VIEW_CONVERT_EXPR
, op
);
2736 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2737 instructions before GSI. GS_INFO describes the scatter store operation.
2738 PTR is the base pointer, OFFSET the vectorized offsets and OPRND the
2739 vectorized data to store.
2740 If the store is conditional, MASK is the vectorized condition, otherwise
2744 vect_build_one_scatter_store_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2745 gimple_stmt_iterator
*gsi
,
2746 gather_scatter_info
*gs_info
,
2747 tree ptr
, tree offset
, tree oprnd
, tree mask
)
2749 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2750 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2751 /* tree ptrtype = TREE_VALUE (arglist); */ arglist
= TREE_CHAIN (arglist
);
2752 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2753 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2754 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2755 tree scaletype
= TREE_VALUE (arglist
);
2756 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
2757 && TREE_CODE (rettype
) == VOID_TYPE
);
2759 tree mask_arg
= NULL_TREE
;
2763 tree optype
= TREE_TYPE (mask_arg
);
2765 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
2768 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2769 tree var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2770 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
2772 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2773 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2775 if (!useless_type_conversion_p (masktype
, utype
))
2777 gcc_assert (TYPE_PRECISION (utype
) <= TYPE_PRECISION (masktype
));
2778 tree var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
2779 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2780 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2786 mask_arg
= build_int_cst (masktype
, -1);
2787 mask_arg
= vect_init_vector (vinfo
, stmt_info
, mask_arg
, masktype
, NULL
);
2791 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
2793 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
2794 TYPE_VECTOR_SUBPARTS (srctype
)));
2795 tree var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
2796 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
2797 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
2798 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2803 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2805 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2806 TYPE_VECTOR_SUBPARTS (idxtype
)));
2807 tree var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2808 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2809 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2810 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2814 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2816 = gimple_build_call (gs_info
->decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
2820 /* Prepare the base and offset in GS_INFO for vectorization.
2821 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2822 to the vectorized offset argument for the first copy of STMT_INFO.
2823 STMT_INFO is the statement described by GS_INFO and LOOP is the
2827 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo
,
2828 class loop
*loop
, stmt_vec_info stmt_info
,
2829 slp_tree slp_node
, gather_scatter_info
*gs_info
,
2830 tree
*dataref_ptr
, vec
<tree
> *vec_offset
)
2832 gimple_seq stmts
= NULL
;
2833 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2837 edge pe
= loop_preheader_edge (loop
);
2838 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2839 gcc_assert (!new_bb
);
2842 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_offset
);
2846 = vect_get_num_copies (loop_vinfo
, gs_info
->offset_vectype
);
2847 vect_get_vec_defs_for_operand (loop_vinfo
, stmt_info
, ncopies
,
2848 gs_info
->offset
, vec_offset
,
2849 gs_info
->offset_vectype
);
2853 /* Prepare to implement a grouped or strided load or store using
2854 the gather load or scatter store operation described by GS_INFO.
2855 STMT_INFO is the load or store statement.
2857 Set *DATAREF_BUMP to the amount that should be added to the base
2858 address after each copy of the vectorized statement. Set *VEC_OFFSET
2859 to an invariant offset vector in which element I has the value
2860 I * DR_STEP / SCALE. */
2863 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2864 loop_vec_info loop_vinfo
,
2865 gimple_stmt_iterator
*gsi
,
2866 gather_scatter_info
*gs_info
,
2867 tree
*dataref_bump
, tree
*vec_offset
,
2868 vec_loop_lens
*loop_lens
)
2870 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2871 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2873 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
))
2875 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
2876 ivtmp_8 = _31 * 16 (step in bytes);
2877 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2878 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
2880 = vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
, 1, vectype
, 0, 0);
2882 = fold_build2 (MULT_EXPR
, sizetype
,
2883 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
2885 *dataref_bump
= force_gimple_operand_gsi (gsi
, tmp
, true, NULL_TREE
, true,
2891 = size_binop (MULT_EXPR
,
2892 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
2893 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2894 *dataref_bump
= cse_and_gimplify_to_preheader (loop_vinfo
, bump
);
2897 /* The offset given in GS_INFO can have pointer type, so use the element
2898 type of the vector instead. */
2899 tree offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
2901 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2902 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
2903 ssize_int (gs_info
->scale
));
2904 step
= fold_convert (offset_type
, step
);
2906 /* Create {0, X, X*2, X*3, ...}. */
2907 tree offset
= fold_build2 (VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
2908 build_zero_cst (offset_type
), step
);
2909 *vec_offset
= cse_and_gimplify_to_preheader (loop_vinfo
, offset
);
2912 /* Prepare the pointer IVs which needs to be updated by a variable amount.
2913 Such variable amount is the outcome of .SELECT_VL. In this case, we can
2914 allow each iteration process the flexible number of elements as long as
2915 the number <= vf elments.
2917 Return data reference according to SELECT_VL.
2918 If new statements are needed, insert them before GSI. */
2921 vect_get_loop_variant_data_ptr_increment (
2922 vec_info
*vinfo
, tree aggr_type
, gimple_stmt_iterator
*gsi
,
2923 vec_loop_lens
*loop_lens
, dr_vec_info
*dr_info
,
2924 vect_memory_access_type memory_access_type
)
2926 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2927 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
2929 /* gather/scatter never reach here. */
2930 gcc_assert (memory_access_type
!= VMAT_GATHER_SCATTER
);
2932 /* When we support SELECT_VL pattern, we dynamic adjust
2933 the memory address by .SELECT_VL result.
2935 The result of .SELECT_VL is the number of elements to
2936 be processed of each iteration. So the memory address
2937 adjustment operation should be:
2939 addr = addr + .SELECT_VL (ARG..) * step;
2942 = vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
, 1, aggr_type
, 0, 0);
2943 tree len_type
= TREE_TYPE (loop_len
);
2944 /* Since the outcome of .SELECT_VL is element size, we should adjust
2945 it into bytesize so that it can be used in address pointer variable
2946 amount IVs adjustment. */
2947 tree tmp
= fold_build2 (MULT_EXPR
, len_type
, loop_len
,
2948 wide_int_to_tree (len_type
, wi::to_widest (step
)));
2949 tree bump
= make_temp_ssa_name (len_type
, NULL
, "ivtmp");
2950 gassign
*assign
= gimple_build_assign (bump
, tmp
);
2951 gsi_insert_before (gsi
, assign
, GSI_SAME_STMT
);
2955 /* Return the amount that should be added to a vector pointer to move
2956 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2957 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2961 vect_get_data_ptr_increment (vec_info
*vinfo
, gimple_stmt_iterator
*gsi
,
2962 dr_vec_info
*dr_info
, tree aggr_type
,
2963 vect_memory_access_type memory_access_type
,
2964 vec_loop_lens
*loop_lens
= nullptr)
2966 if (memory_access_type
== VMAT_INVARIANT
)
2967 return size_zero_node
;
2969 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2970 if (loop_vinfo
&& LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
))
2971 return vect_get_loop_variant_data_ptr_increment (vinfo
, aggr_type
, gsi
,
2973 memory_access_type
);
2975 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
2976 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
2977 if (tree_int_cst_sgn (step
) == -1)
2978 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
2982 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2985 vectorizable_bswap (vec_info
*vinfo
,
2986 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
2987 gimple
**vec_stmt
, slp_tree slp_node
,
2989 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
2992 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
2993 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2996 op
= gimple_call_arg (stmt
, 0);
2997 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2998 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3000 /* Multiple types in SLP are handled by creating the appropriate number of
3001 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3006 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3008 gcc_assert (ncopies
>= 1);
3010 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype
))
3012 if (dump_enabled_p ())
3013 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3014 "mismatched vector sizes %T and %T\n",
3015 vectype_in
, vectype
);
3019 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3023 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3024 unsigned word_bytes
;
3025 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3028 /* The encoding uses one stepped pattern for each byte in the word. */
3029 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3030 for (unsigned i
= 0; i
< 3; ++i
)
3031 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3032 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3034 vec_perm_indices
indices (elts
, 1, num_bytes
);
3035 machine_mode vmode
= TYPE_MODE (char_vectype
);
3036 if (!can_vec_perm_const_p (vmode
, vmode
, indices
))
3042 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3044 if (dump_enabled_p ())
3045 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3046 "incompatible vector types for invariants\n");
3050 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3051 DUMP_VECT_SCOPE ("vectorizable_bswap");
3052 record_stmt_cost (cost_vec
,
3053 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3054 record_stmt_cost (cost_vec
,
3056 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
,
3057 vec_perm
, stmt_info
, 0, vect_body
);
3061 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3064 vec
<tree
> vec_oprnds
= vNULL
;
3065 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
3067 /* Arguments are ready. create the new vector stmt. */
3070 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3073 tree tem
= make_ssa_name (char_vectype
);
3074 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3075 char_vectype
, vop
));
3076 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3077 tree tem2
= make_ssa_name (char_vectype
);
3078 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3079 tem
, tem
, bswap_vconst
);
3080 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3081 tem
= make_ssa_name (vectype
);
3082 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3084 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3086 slp_node
->push_vec_def (new_stmt
);
3088 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3092 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3094 vec_oprnds
.release ();
3098 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3099 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3100 in a single step. On success, store the binary pack code in
3104 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3105 code_helper
*convert_code
)
3107 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3108 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3112 int multi_step_cvt
= 0;
3113 auto_vec
<tree
, 8> interm_types
;
3114 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3115 &code
, &multi_step_cvt
, &interm_types
)
3119 *convert_code
= code
;
3123 /* Function vectorizable_call.
3125 Check if STMT_INFO performs a function call that can be vectorized.
3126 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3127 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3128 Return true if STMT_INFO is vectorizable in this way. */
3131 vectorizable_call (vec_info
*vinfo
,
3132 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3133 gimple
**vec_stmt
, slp_tree slp_node
,
3134 stmt_vector_for_cost
*cost_vec
)
3140 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3141 tree vectype_out
, vectype_in
;
3142 poly_uint64 nunits_in
;
3143 poly_uint64 nunits_out
;
3144 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3145 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3146 tree fndecl
, new_temp
, rhs_type
;
3147 enum vect_def_type dt
[4]
3148 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3149 vect_unknown_def_type
};
3150 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3151 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3152 int ndts
= ARRAY_SIZE (dt
);
3154 auto_vec
<tree
, 8> vargs
;
3155 enum { NARROW
, NONE
, WIDEN
} modifier
;
3158 tree clz_ctz_arg1
= NULL_TREE
;
3160 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3163 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3167 /* Is STMT_INFO a vectorizable call? */
3168 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3172 if (gimple_call_internal_p (stmt
)
3173 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3174 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3175 /* Handled by vectorizable_load and vectorizable_store. */
3178 if (gimple_call_lhs (stmt
) == NULL_TREE
3179 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3182 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3184 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3186 /* Process function arguments. */
3187 rhs_type
= NULL_TREE
;
3188 vectype_in
= NULL_TREE
;
3189 nargs
= gimple_call_num_args (stmt
);
3191 /* Bail out if the function has more than four arguments, we do not have
3192 interesting builtin functions to vectorize with more than two arguments
3193 except for fma. No arguments is also not good. */
3194 if (nargs
== 0 || nargs
> 4)
3197 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3198 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3199 if (cfn
== CFN_GOMP_SIMD_LANE
)
3202 rhs_type
= unsigned_type_node
;
3204 /* Similarly pretend IFN_CLZ and IFN_CTZ only has one argument, the second
3205 argument just says whether it is well-defined at zero or not and what
3206 value should be returned for it. */
3207 if ((cfn
== CFN_CLZ
|| cfn
== CFN_CTZ
) && nargs
== 2)
3210 clz_ctz_arg1
= gimple_call_arg (stmt
, 1);
3214 if (internal_fn_p (cfn
))
3215 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3217 for (i
= 0; i
< nargs
; i
++)
3219 if ((int) i
== mask_opno
)
3221 if (!vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_opno
,
3222 &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3227 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3228 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3230 if (dump_enabled_p ())
3231 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3232 "use not simple.\n");
3236 /* We can only handle calls with arguments of the same type. */
3238 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3240 if (dump_enabled_p ())
3241 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3242 "argument types differ.\n");
3246 rhs_type
= TREE_TYPE (op
);
3249 vectype_in
= vectypes
[i
];
3250 else if (vectypes
[i
]
3251 && !types_compatible_p (vectypes
[i
], vectype_in
))
3253 if (dump_enabled_p ())
3254 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3255 "argument vector types differ.\n");
3259 /* If all arguments are external or constant defs, infer the vector type
3260 from the scalar type. */
3262 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3264 gcc_assert (vectype_in
);
3267 if (dump_enabled_p ())
3268 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3269 "no vectype for scalar type %T\n", rhs_type
);
3274 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3275 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3277 if (dump_enabled_p ())
3278 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3279 "mixed mask and nonmask vector types\n");
3283 if (vect_emulated_vector_p (vectype_in
) || vect_emulated_vector_p (vectype_out
))
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3287 "use emulated vector type for call\n");
3292 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3293 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3294 if (known_eq (nunits_in
* 2, nunits_out
))
3296 else if (known_eq (nunits_out
, nunits_in
))
3298 else if (known_eq (nunits_out
* 2, nunits_in
))
3303 /* We only handle functions that do not read or clobber memory. */
3304 if (gimple_vuse (stmt
))
3306 if (dump_enabled_p ())
3307 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3308 "function reads from or writes to memory.\n");
3312 /* For now, we only vectorize functions if a target specific builtin
3313 is available. TODO -- in some cases, it might be profitable to
3314 insert the calls for pieces of the vector, in order to be able
3315 to vectorize other operations in the loop. */
3317 internal_fn ifn
= IFN_LAST
;
3318 tree callee
= gimple_call_fndecl (stmt
);
3320 /* First try using an internal function. */
3321 code_helper convert_code
= MAX_TREE_CODES
;
3323 && (modifier
== NONE
3324 || (modifier
== NARROW
3325 && simple_integer_narrowing (vectype_out
, vectype_in
,
3327 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3330 /* If that fails, try asking for a target-specific built-in function. */
3331 if (ifn
== IFN_LAST
)
3333 if (cfn
!= CFN_LAST
)
3334 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3335 (cfn
, vectype_out
, vectype_in
);
3336 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3337 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3338 (callee
, vectype_out
, vectype_in
);
3341 if (ifn
== IFN_LAST
&& !fndecl
)
3343 if (cfn
== CFN_GOMP_SIMD_LANE
3346 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3347 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3348 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3349 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3351 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3352 { 0, 1, 2, ... vf - 1 } vector. */
3353 gcc_assert (nargs
== 0);
3355 else if (modifier
== NONE
3356 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3357 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3358 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3359 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3360 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3361 slp_op
, vectype_in
, cost_vec
);
3364 if (dump_enabled_p ())
3365 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3366 "function is not vectorizable.\n");
3373 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3374 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3376 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3378 /* Sanity check: make sure that at least one copy of the vectorized stmt
3379 needs to be generated. */
3380 gcc_assert (ncopies
>= 1);
3382 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
3383 internal_fn cond_fn
= get_conditional_internal_fn (ifn
);
3384 internal_fn cond_len_fn
= get_len_internal_fn (ifn
);
3385 int len_opno
= internal_fn_len_index (cond_len_fn
);
3386 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3387 vec_loop_lens
*lens
= (loop_vinfo
? &LOOP_VINFO_LENS (loop_vinfo
) : NULL
);
3388 if (!vec_stmt
) /* transformation not required. */
3391 for (i
= 0; i
< nargs
; ++i
)
3392 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
],
3394 ? vectypes
[i
] : vectype_in
))
3396 if (dump_enabled_p ())
3397 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3398 "incompatible vector types for invariants\n");
3401 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3402 DUMP_VECT_SCOPE ("vectorizable_call");
3403 vect_model_simple_cost (vinfo
, stmt_info
,
3404 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3405 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3406 record_stmt_cost (cost_vec
, ncopies
/ 2,
3407 vec_promote_demote
, stmt_info
, 0, vect_body
);
3410 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
3411 && (reduc_idx
>= 0 || mask_opno
>= 0))
3414 && (cond_fn
== IFN_LAST
3415 || !direct_internal_fn_supported_p (cond_fn
, vectype_out
,
3416 OPTIMIZE_FOR_SPEED
))
3417 && (cond_len_fn
== IFN_LAST
3418 || !direct_internal_fn_supported_p (cond_len_fn
, vectype_out
,
3419 OPTIMIZE_FOR_SPEED
)))
3421 if (dump_enabled_p ())
3422 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3423 "can't use a fully-masked loop because no"
3424 " conditional operation is available.\n");
3425 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
3429 unsigned int nvectors
3431 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3433 tree scalar_mask
= NULL_TREE
;
3435 scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3436 if (cond_len_fn
!= IFN_LAST
3437 && direct_internal_fn_supported_p (cond_len_fn
, vectype_out
,
3438 OPTIMIZE_FOR_SPEED
))
3439 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype_out
,
3442 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype_out
,
3451 if (dump_enabled_p ())
3452 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3455 scalar_dest
= gimple_call_lhs (stmt
);
3456 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3458 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3459 bool len_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
);
3460 unsigned int vect_nargs
= nargs
;
3466 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3469 else if (reduc_idx
>= 0)
3472 else if (masked_loop_p
&& reduc_idx
>= 0)
3480 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3482 tree prev_res
= NULL_TREE
;
3483 vargs
.safe_grow (vect_nargs
, true);
3484 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3485 for (j
= 0; j
< ncopies
; ++j
)
3487 /* Build argument list for the vectorized call. */
3490 vec
<tree
> vec_oprnds0
;
3492 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3493 vec_oprnds0
= vec_defs
[0];
3495 /* Arguments are ready. Create the new vector stmt. */
3496 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3499 if (masked_loop_p
&& reduc_idx
>= 0)
3501 unsigned int vec_num
= vec_oprnds0
.length ();
3502 /* Always true for SLP. */
3503 gcc_assert (ncopies
== 1);
3504 vargs
[varg
++] = vect_get_loop_mask (loop_vinfo
,
3505 gsi
, masks
, vec_num
,
3509 for (k
= 0; k
< nargs
; k
++)
3511 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3512 vargs
[varg
++] = vec_oprndsk
[i
];
3514 if (masked_loop_p
&& reduc_idx
>= 0)
3515 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3517 vargs
[varg
++] = clz_ctz_arg1
;
3520 if (modifier
== NARROW
)
3522 /* We don't define any narrowing conditional functions
3524 gcc_assert (mask_opno
< 0);
3525 tree half_res
= make_ssa_name (vectype_in
);
3527 = gimple_build_call_internal_vec (ifn
, vargs
);
3528 gimple_call_set_lhs (call
, half_res
);
3529 gimple_call_set_nothrow (call
, true);
3530 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3533 prev_res
= half_res
;
3536 new_temp
= make_ssa_name (vec_dest
);
3537 new_stmt
= vect_gimple_build (new_temp
, convert_code
,
3538 prev_res
, half_res
);
3539 vect_finish_stmt_generation (vinfo
, stmt_info
,
3544 if (len_opno
>= 0 && len_loop_p
)
3546 unsigned int vec_num
= vec_oprnds0
.length ();
3547 /* Always true for SLP. */
3548 gcc_assert (ncopies
== 1);
3550 = vect_get_loop_len (loop_vinfo
, gsi
, lens
, vec_num
,
3553 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
3554 tree bias
= build_int_cst (intQI_type_node
, biasval
);
3555 vargs
[len_opno
] = len
;
3556 vargs
[len_opno
+ 1] = bias
;
3558 else if (mask_opno
>= 0 && masked_loop_p
)
3560 unsigned int vec_num
= vec_oprnds0
.length ();
3561 /* Always true for SLP. */
3562 gcc_assert (ncopies
== 1);
3563 tree mask
= vect_get_loop_mask (loop_vinfo
,
3564 gsi
, masks
, vec_num
,
3566 vargs
[mask_opno
] = prepare_vec_mask
3567 (loop_vinfo
, TREE_TYPE (mask
), mask
,
3568 vargs
[mask_opno
], gsi
);
3572 if (ifn
!= IFN_LAST
)
3573 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3575 call
= gimple_build_call_vec (fndecl
, vargs
);
3576 new_temp
= make_ssa_name (vec_dest
, call
);
3577 gimple_call_set_lhs (call
, new_temp
);
3578 gimple_call_set_nothrow (call
, true);
3579 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3582 slp_node
->push_vec_def (new_stmt
);
3588 if (masked_loop_p
&& reduc_idx
>= 0)
3589 vargs
[varg
++] = vect_get_loop_mask (loop_vinfo
, gsi
, masks
, ncopies
,
3591 for (i
= 0; i
< nargs
; i
++)
3593 op
= gimple_call_arg (stmt
, i
);
3596 vec_defs
.quick_push (vNULL
);
3597 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3601 vargs
[varg
++] = vec_defs
[i
][j
];
3603 if (masked_loop_p
&& reduc_idx
>= 0)
3604 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3606 vargs
[varg
++] = clz_ctz_arg1
;
3608 if (len_opno
>= 0 && len_loop_p
)
3610 tree len
= vect_get_loop_len (loop_vinfo
, gsi
, lens
, ncopies
,
3613 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
3614 tree bias
= build_int_cst (intQI_type_node
, biasval
);
3615 vargs
[len_opno
] = len
;
3616 vargs
[len_opno
+ 1] = bias
;
3618 else if (mask_opno
>= 0 && masked_loop_p
)
3620 tree mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
, ncopies
,
3623 = prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
3624 vargs
[mask_opno
], gsi
);
3628 if (cfn
== CFN_GOMP_SIMD_LANE
)
3630 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3632 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3633 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3634 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3635 new_temp
= make_ssa_name (vec_dest
);
3636 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3637 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3639 else if (modifier
== NARROW
)
3641 /* We don't define any narrowing conditional functions at
3643 gcc_assert (mask_opno
< 0);
3644 tree half_res
= make_ssa_name (vectype_in
);
3645 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3646 gimple_call_set_lhs (call
, half_res
);
3647 gimple_call_set_nothrow (call
, true);
3648 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3651 prev_res
= half_res
;
3654 new_temp
= make_ssa_name (vec_dest
);
3655 new_stmt
= vect_gimple_build (new_temp
, convert_code
, prev_res
,
3657 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3662 if (ifn
!= IFN_LAST
)
3663 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3665 call
= gimple_build_call_vec (fndecl
, vargs
);
3666 new_temp
= make_ssa_name (vec_dest
, call
);
3667 gimple_call_set_lhs (call
, new_temp
);
3668 gimple_call_set_nothrow (call
, true);
3669 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3673 if (j
== (modifier
== NARROW
? 1 : 0))
3674 *vec_stmt
= new_stmt
;
3675 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3677 for (i
= 0; i
< nargs
; i
++)
3679 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3680 vec_oprndsi
.release ();
3683 else if (modifier
== NARROW
)
3685 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3686 /* We don't define any narrowing conditional functions at present. */
3687 gcc_assert (mask_opno
< 0);
3688 for (j
= 0; j
< ncopies
; ++j
)
3690 /* Build argument list for the vectorized call. */
3692 vargs
.create (nargs
* 2);
3698 vec
<tree
> vec_oprnds0
;
3700 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3701 vec_oprnds0
= vec_defs
[0];
3703 /* Arguments are ready. Create the new vector stmt. */
3704 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3708 for (k
= 0; k
< nargs
; k
++)
3710 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3711 vargs
.quick_push (vec_oprndsk
[i
]);
3712 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3715 if (ifn
!= IFN_LAST
)
3716 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3718 call
= gimple_build_call_vec (fndecl
, vargs
);
3719 new_temp
= make_ssa_name (vec_dest
, call
);
3720 gimple_call_set_lhs (call
, new_temp
);
3721 gimple_call_set_nothrow (call
, true);
3722 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3723 slp_node
->push_vec_def (call
);
3728 for (i
= 0; i
< nargs
; i
++)
3730 op
= gimple_call_arg (stmt
, i
);
3733 vec_defs
.quick_push (vNULL
);
3734 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
3735 op
, &vec_defs
[i
], vectypes
[i
]);
3737 vec_oprnd0
= vec_defs
[i
][2*j
];
3738 vec_oprnd1
= vec_defs
[i
][2*j
+1];
3740 vargs
.quick_push (vec_oprnd0
);
3741 vargs
.quick_push (vec_oprnd1
);
3744 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3745 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3746 gimple_call_set_lhs (new_stmt
, new_temp
);
3747 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3749 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3753 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3755 for (i
= 0; i
< nargs
; i
++)
3757 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3758 vec_oprndsi
.release ();
3762 /* No current target implements this case. */
3767 /* The call in STMT might prevent it from being removed in dce.
3768 We however cannot remove it here, due to the way the ssa name
3769 it defines is mapped to the new definition. So just replace
3770 rhs of the statement with something harmless. */
3775 stmt_info
= vect_orig_stmt (stmt_info
);
3776 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3779 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3780 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3786 struct simd_call_arg_info
3790 HOST_WIDE_INT linear_step
;
3791 enum vect_def_type dt
;
3793 bool simd_lane_linear
;
3796 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3797 is linear within simd lane (but not within whole loop), note it in
3801 vect_simd_lane_linear (tree op
, class loop
*loop
,
3802 struct simd_call_arg_info
*arginfo
)
3804 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3806 if (!is_gimple_assign (def_stmt
)
3807 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3808 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3811 tree base
= gimple_assign_rhs1 (def_stmt
);
3812 HOST_WIDE_INT linear_step
= 0;
3813 tree v
= gimple_assign_rhs2 (def_stmt
);
3814 while (TREE_CODE (v
) == SSA_NAME
)
3817 def_stmt
= SSA_NAME_DEF_STMT (v
);
3818 if (is_gimple_assign (def_stmt
))
3819 switch (gimple_assign_rhs_code (def_stmt
))
3822 t
= gimple_assign_rhs2 (def_stmt
);
3823 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3825 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3826 v
= gimple_assign_rhs1 (def_stmt
);
3829 t
= gimple_assign_rhs2 (def_stmt
);
3830 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3832 linear_step
= tree_to_shwi (t
);
3833 v
= gimple_assign_rhs1 (def_stmt
);
3836 t
= gimple_assign_rhs1 (def_stmt
);
3837 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3838 || (TYPE_PRECISION (TREE_TYPE (v
))
3839 < TYPE_PRECISION (TREE_TYPE (t
))))
3848 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3850 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3851 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3856 arginfo
->linear_step
= linear_step
;
3858 arginfo
->simd_lane_linear
= true;
3864 /* Function vectorizable_simd_clone_call.
3866 Check if STMT_INFO performs a function call that can be vectorized
3867 by calling a simd clone of the function.
3868 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3869 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3870 Return true if STMT_INFO is vectorizable in this way. */
3873 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3874 gimple_stmt_iterator
*gsi
,
3875 gimple
**vec_stmt
, slp_tree slp_node
,
3876 stmt_vector_for_cost
*)
3881 tree vec_oprnd0
= NULL_TREE
;
3884 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3885 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3886 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3887 tree fndecl
, new_temp
;
3889 auto_vec
<simd_call_arg_info
> arginfo
;
3890 vec
<tree
> vargs
= vNULL
;
3892 tree lhs
, rtype
, ratype
;
3893 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3894 int masked_call_offset
= 0;
3896 /* Is STMT a vectorizable call? */
3897 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3901 fndecl
= gimple_call_fndecl (stmt
);
3902 if (fndecl
== NULL_TREE
3903 && gimple_call_internal_p (stmt
, IFN_MASK_CALL
))
3905 fndecl
= gimple_call_arg (stmt
, 0);
3906 gcc_checking_assert (TREE_CODE (fndecl
) == ADDR_EXPR
);
3907 fndecl
= TREE_OPERAND (fndecl
, 0);
3908 gcc_checking_assert (TREE_CODE (fndecl
) == FUNCTION_DECL
);
3909 masked_call_offset
= 1;
3911 if (fndecl
== NULL_TREE
)
3914 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3915 if (node
== NULL
|| node
->simd_clones
== NULL
)
3918 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3921 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3925 if (gimple_call_lhs (stmt
)
3926 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3929 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3931 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3933 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3936 /* Process function arguments. */
3937 nargs
= gimple_call_num_args (stmt
) - masked_call_offset
;
3939 /* Bail out if the function has zero arguments. */
3943 vec
<tree
>& simd_clone_info
= (slp_node
? SLP_TREE_SIMD_CLONE_INFO (slp_node
)
3944 : STMT_VINFO_SIMD_CLONE_INFO (stmt_info
));
3945 arginfo
.reserve (nargs
, true);
3946 auto_vec
<slp_tree
> slp_op
;
3947 slp_op
.safe_grow_cleared (nargs
);
3949 for (i
= 0; i
< nargs
; i
++)
3951 simd_call_arg_info thisarginfo
;
3954 thisarginfo
.linear_step
= 0;
3955 thisarginfo
.align
= 0;
3956 thisarginfo
.op
= NULL_TREE
;
3957 thisarginfo
.simd_lane_linear
= false;
3959 int op_no
= i
+ masked_call_offset
;
3961 op_no
= vect_slp_child_index_for_operand (stmt
, op_no
, false);
3962 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3963 op_no
, &op
, &slp_op
[i
],
3964 &thisarginfo
.dt
, &thisarginfo
.vectype
)
3965 || thisarginfo
.dt
== vect_uninitialized_def
)
3967 if (dump_enabled_p ())
3968 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3969 "use not simple.\n");
3973 if (thisarginfo
.dt
== vect_constant_def
3974 || thisarginfo
.dt
== vect_external_def
)
3976 /* With SLP we determine the vector type of constants/externals
3977 at analysis time, handling conflicts via
3978 vect_maybe_update_slp_op_vectype. At transform time
3979 we have a vector type recorded for SLP. */
3980 gcc_assert (!vec_stmt
3982 || thisarginfo
.vectype
!= NULL_TREE
);
3984 thisarginfo
.vectype
= get_vectype_for_scalar_type (vinfo
,
3989 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3991 /* For linear arguments, the analyze phase should have saved
3992 the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. */
3993 if (i
* 3 + 4 <= simd_clone_info
.length ()
3994 && simd_clone_info
[i
* 3 + 2])
3996 gcc_assert (vec_stmt
);
3997 thisarginfo
.linear_step
= tree_to_shwi (simd_clone_info
[i
* 3 + 2]);
3998 thisarginfo
.op
= simd_clone_info
[i
* 3 + 1];
3999 thisarginfo
.simd_lane_linear
4000 = (simd_clone_info
[i
* 3 + 3] == boolean_true_node
);
4001 /* If loop has been peeled for alignment, we need to adjust it. */
4002 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
4003 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
4004 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
4006 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
4007 tree step
= simd_clone_info
[i
* 3 + 2];
4008 tree opt
= TREE_TYPE (thisarginfo
.op
);
4009 bias
= fold_convert (TREE_TYPE (step
), bias
);
4010 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
4012 = fold_build2 (POINTER_TYPE_P (opt
)
4013 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
4014 thisarginfo
.op
, bias
);
4018 && thisarginfo
.dt
!= vect_constant_def
4019 && thisarginfo
.dt
!= vect_external_def
4021 && TREE_CODE (op
) == SSA_NAME
4022 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
4024 && tree_fits_shwi_p (iv
.step
))
4026 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
4027 thisarginfo
.op
= iv
.base
;
4029 else if ((thisarginfo
.dt
== vect_constant_def
4030 || thisarginfo
.dt
== vect_external_def
)
4031 && POINTER_TYPE_P (TREE_TYPE (op
)))
4032 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
4033 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4035 if (POINTER_TYPE_P (TREE_TYPE (op
))
4036 && !thisarginfo
.linear_step
4038 && thisarginfo
.dt
!= vect_constant_def
4039 && thisarginfo
.dt
!= vect_external_def
4041 && TREE_CODE (op
) == SSA_NAME
)
4042 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
4044 arginfo
.quick_push (thisarginfo
);
4047 poly_uint64 vf
= loop_vinfo
? LOOP_VINFO_VECT_FACTOR (loop_vinfo
) : 1;
4048 unsigned group_size
= slp_node
? SLP_TREE_LANES (slp_node
) : 1;
4049 unsigned int badness
= 0;
4050 struct cgraph_node
*bestn
= NULL
;
4051 if (simd_clone_info
.exists ())
4052 bestn
= cgraph_node::get (simd_clone_info
[0]);
4054 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4055 n
= n
->simdclone
->next_clone
)
4057 unsigned int this_badness
= 0;
4058 unsigned int num_calls
;
4059 /* The number of arguments in the call and the number of parameters in
4060 the simdclone should match. However, when the simdclone is
4061 'inbranch', it could have one more paramater than nargs when using
4062 an inbranch simdclone to call a non-inbranch call, either in a
4063 non-masked loop using a all true constant mask, or inside a masked
4064 loop using it's mask. */
4065 size_t simd_nargs
= n
->simdclone
->nargs
;
4066 if (!masked_call_offset
&& n
->simdclone
->inbranch
)
4068 if (!constant_multiple_p (vf
* group_size
, n
->simdclone
->simdlen
,
4070 || (!n
->simdclone
->inbranch
&& (masked_call_offset
> 0))
4071 || (nargs
!= simd_nargs
))
4074 this_badness
+= floor_log2 (num_calls
) * 4096;
4075 if (n
->simdclone
->inbranch
)
4076 this_badness
+= 8192;
4077 int target_badness
= targetm
.simd_clone
.usable (n
);
4078 if (target_badness
< 0)
4080 this_badness
+= target_badness
* 512;
4081 for (i
= 0; i
< nargs
; i
++)
4083 switch (n
->simdclone
->args
[i
].arg_type
)
4085 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4086 if (!useless_type_conversion_p
4087 (n
->simdclone
->args
[i
].orig_type
,
4088 TREE_TYPE (gimple_call_arg (stmt
,
4089 i
+ masked_call_offset
))))
4091 else if (arginfo
[i
].dt
== vect_constant_def
4092 || arginfo
[i
].dt
== vect_external_def
4093 || arginfo
[i
].linear_step
)
4096 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4097 if (arginfo
[i
].dt
!= vect_constant_def
4098 && arginfo
[i
].dt
!= vect_external_def
)
4101 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4102 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4103 if (arginfo
[i
].dt
== vect_constant_def
4104 || arginfo
[i
].dt
== vect_external_def
4105 || (arginfo
[i
].linear_step
4106 != n
->simdclone
->args
[i
].linear_step
))
4109 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4110 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4111 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4112 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4113 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4114 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4118 case SIMD_CLONE_ARG_TYPE_MASK
:
4119 /* While we can create a traditional data vector from
4120 an incoming integer mode mask we have no good way to
4121 force generate an integer mode mask from a traditional
4122 boolean vector input. */
4123 if (SCALAR_INT_MODE_P (n
->simdclone
->mask_mode
)
4124 && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo
[i
].vectype
)))
4126 else if (!SCALAR_INT_MODE_P (n
->simdclone
->mask_mode
)
4127 && SCALAR_INT_MODE_P (TYPE_MODE (arginfo
[i
].vectype
)))
4128 this_badness
+= 2048;
4131 if (i
== (size_t) -1)
4133 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4138 if (arginfo
[i
].align
)
4139 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4140 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4142 if (i
== (size_t) -1)
4144 if (masked_call_offset
== 0
4145 && n
->simdclone
->inbranch
4146 && n
->simdclone
->nargs
> nargs
)
4148 gcc_assert (n
->simdclone
->args
[n
->simdclone
->nargs
- 1].arg_type
==
4149 SIMD_CLONE_ARG_TYPE_MASK
);
4150 /* Penalize using a masked SIMD clone in a non-masked loop, that is
4151 not in a branch, as we'd have to construct an all-true mask. */
4152 if (!loop_vinfo
|| !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
4155 if (bestn
== NULL
|| this_badness
< badness
)
4158 badness
= this_badness
;
4165 unsigned int num_mask_args
= 0;
4166 if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4167 for (i
= 0; i
< nargs
; i
++)
4168 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_MASK
)
4171 for (i
= 0; i
< nargs
; i
++)
4173 if ((arginfo
[i
].dt
== vect_constant_def
4174 || arginfo
[i
].dt
== vect_external_def
)
4175 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4177 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
,
4178 i
+ masked_call_offset
));
4179 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4181 if (arginfo
[i
].vectype
== NULL
4182 || !constant_multiple_p (bestn
->simdclone
->simdlen
,
4183 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4187 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
4188 && VECTOR_BOOLEAN_TYPE_P (bestn
->simdclone
->args
[i
].vector_type
))
4190 if (dump_enabled_p ())
4191 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4192 "vector mask arguments are not supported.\n");
4196 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_MASK
)
4198 tree clone_arg_vectype
= bestn
->simdclone
->args
[i
].vector_type
;
4199 if (bestn
->simdclone
->mask_mode
== VOIDmode
)
4201 if (maybe_ne (TYPE_VECTOR_SUBPARTS (clone_arg_vectype
),
4202 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4204 /* FORNOW we only have partial support for vector-type masks
4205 that can't hold all of simdlen. */
4206 if (dump_enabled_p ())
4207 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4209 "in-branch vector clones are not yet"
4210 " supported for mismatched vector sizes.\n");
4213 if (!expand_vec_cond_expr_p (clone_arg_vectype
,
4214 arginfo
[i
].vectype
, ERROR_MARK
))
4216 if (dump_enabled_p ())
4217 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4219 "cannot compute mask argument for"
4220 " in-branch vector clones.\n");
4224 else if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4226 if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo
[i
].vectype
))
4227 || maybe_ne (exact_div (bestn
->simdclone
->simdlen
,
4229 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4231 /* FORNOW we only have partial support for integer-type masks
4232 that represent the same number of lanes as the
4233 vectorized mask inputs. */
4234 if (dump_enabled_p ())
4235 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4237 "in-branch vector clones are not yet "
4238 "supported for mismatched vector sizes.\n");
4244 if (dump_enabled_p ())
4245 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4247 "in-branch vector clones not supported"
4248 " on this target.\n");
4254 fndecl
= bestn
->decl
;
4255 nunits
= bestn
->simdclone
->simdlen
;
4257 ncopies
= vector_unroll_factor (vf
* group_size
, nunits
);
4259 ncopies
= vector_unroll_factor (vf
, nunits
);
4261 /* If the function isn't const, only allow it in simd loops where user
4262 has asserted that at least nunits consecutive iterations can be
4263 performed using SIMD instructions. */
4264 if ((loop
== NULL
|| maybe_lt ((unsigned) loop
->safelen
, nunits
))
4265 && gimple_vuse (stmt
))
4268 /* Sanity check: make sure that at least one copy of the vectorized stmt
4269 needs to be generated. */
4270 gcc_assert (ncopies
>= 1);
4272 if (!vec_stmt
) /* transformation not required. */
4275 for (unsigned i
= 0; i
< nargs
; ++i
)
4276 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
], arginfo
[i
].vectype
))
4278 if (dump_enabled_p ())
4279 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4280 "incompatible vector types for invariants\n");
4283 /* When the original call is pure or const but the SIMD ABI dictates
4284 an aggregate return we will have to use a virtual definition and
4285 in a loop eventually even need to add a virtual PHI. That's
4286 not straight-forward so allow to fix this up via renaming. */
4287 if (gimple_call_lhs (stmt
)
4288 && !gimple_vdef (stmt
)
4289 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn
->decl
))) == ARRAY_TYPE
)
4290 vinfo
->any_known_not_updated_vssa
= true;
4291 /* ??? For SLP code-gen we end up inserting after the last
4292 vector argument def rather than at the original call position
4293 so automagic virtual operand updating doesn't work. */
4294 if (gimple_vuse (stmt
) && slp_node
)
4295 vinfo
->any_known_not_updated_vssa
= true;
4296 simd_clone_info
.safe_push (bestn
->decl
);
4297 for (i
= 0; i
< bestn
->simdclone
->nargs
; i
++)
4299 switch (bestn
->simdclone
->args
[i
].arg_type
)
4303 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4304 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4306 simd_clone_info
.safe_grow_cleared (i
* 3 + 1, true);
4307 simd_clone_info
.safe_push (arginfo
[i
].op
);
4308 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4309 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4310 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4311 simd_clone_info
.safe_push (ls
);
4312 tree sll
= arginfo
[i
].simd_lane_linear
4313 ? boolean_true_node
: boolean_false_node
;
4314 simd_clone_info
.safe_push (sll
);
4317 case SIMD_CLONE_ARG_TYPE_MASK
:
4319 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
4320 vect_record_loop_mask (loop_vinfo
,
4321 &LOOP_VINFO_MASKS (loop_vinfo
),
4322 ncopies
, vectype
, op
);
4328 if (!bestn
->simdclone
->inbranch
&& loop_vinfo
)
4330 if (dump_enabled_p ()
4331 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
4332 dump_printf_loc (MSG_NOTE
, vect_location
,
4333 "can't use a fully-masked loop because a"
4334 " non-masked simd clone was selected.\n");
4335 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
4338 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4339 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4340 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4341 dt, slp_node, cost_vec); */
4347 if (dump_enabled_p ())
4348 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4351 scalar_dest
= gimple_call_lhs (stmt
);
4352 vec_dest
= NULL_TREE
;
4357 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4358 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4359 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4362 rtype
= TREE_TYPE (ratype
);
4366 auto_vec
<vec
<tree
> > vec_oprnds
;
4367 auto_vec
<unsigned> vec_oprnds_i
;
4368 vec_oprnds_i
.safe_grow_cleared (nargs
, true);
4371 vec_oprnds
.reserve_exact (nargs
);
4372 vect_get_slp_defs (vinfo
, slp_node
, &vec_oprnds
);
4375 vec_oprnds
.safe_grow_cleared (nargs
, true);
4376 for (j
= 0; j
< ncopies
; ++j
)
4378 poly_uint64 callee_nelements
;
4379 poly_uint64 caller_nelements
;
4380 /* Build argument list for the vectorized call. */
4382 vargs
.create (nargs
);
4386 for (i
= 0; i
< nargs
; i
++)
4388 unsigned int k
, l
, m
, o
;
4390 op
= gimple_call_arg (stmt
, i
+ masked_call_offset
);
4391 switch (bestn
->simdclone
->args
[i
].arg_type
)
4393 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4394 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4395 caller_nelements
= TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
);
4396 callee_nelements
= TYPE_VECTOR_SUBPARTS (atype
);
4397 o
= vector_unroll_factor (nunits
, callee_nelements
);
4398 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4400 if (known_lt (callee_nelements
, caller_nelements
))
4402 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4403 if (!constant_multiple_p (caller_nelements
,
4404 callee_nelements
, &k
))
4407 gcc_assert ((k
& (k
- 1)) == 0);
4411 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4412 ncopies
* o
/ k
, op
,
4414 vec_oprnds_i
[i
] = 0;
4415 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4419 vec_oprnd0
= arginfo
[i
].op
;
4420 if ((m
& (k
- 1)) == 0)
4421 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4423 arginfo
[i
].op
= vec_oprnd0
;
4425 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4427 bitsize_int ((m
& (k
- 1)) * prec
));
4429 = gimple_build_assign (make_ssa_name (atype
),
4431 vect_finish_stmt_generation (vinfo
, stmt_info
,
4433 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4437 if (!constant_multiple_p (callee_nelements
,
4438 caller_nelements
, &k
))
4440 gcc_assert ((k
& (k
- 1)) == 0);
4441 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4443 vec_alloc (ctor_elts
, k
);
4446 for (l
= 0; l
< k
; l
++)
4448 if (m
== 0 && l
== 0)
4451 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4455 vec_oprnds_i
[i
] = 0;
4456 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4459 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4460 arginfo
[i
].op
= vec_oprnd0
;
4463 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4467 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0
),
4470 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, atype
,
4473 = gimple_build_assign (make_ssa_name (atype
),
4475 vect_finish_stmt_generation (vinfo
, stmt_info
,
4477 vargs
.safe_push (gimple_get_lhs (new_stmt
));
4480 vargs
.safe_push (vec_oprnd0
);
4483 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4485 = gimple_build_assign (make_ssa_name (atype
),
4487 vect_finish_stmt_generation (vinfo
, stmt_info
,
4489 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4494 case SIMD_CLONE_ARG_TYPE_MASK
:
4495 if (bestn
->simdclone
->mask_mode
== VOIDmode
)
4497 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4498 tree elt_type
= TREE_TYPE (atype
);
4499 tree one
= fold_convert (elt_type
, integer_one_node
);
4500 tree zero
= fold_convert (elt_type
, integer_zero_node
);
4501 callee_nelements
= TYPE_VECTOR_SUBPARTS (atype
);
4502 caller_nelements
= TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
);
4503 o
= vector_unroll_factor (nunits
, callee_nelements
);
4504 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4506 if (maybe_lt (callee_nelements
, caller_nelements
))
4508 /* The mask type has fewer elements than simdlen. */
4513 else if (known_eq (callee_nelements
, caller_nelements
))
4515 /* The SIMD clone function has the same number of
4516 elements as the current function. */
4520 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4524 vec_oprnds_i
[i
] = 0;
4526 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4528 && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
4530 vec_loop_masks
*loop_masks
4531 = &LOOP_VINFO_MASKS (loop_vinfo
);
4533 = vect_get_loop_mask (loop_vinfo
, gsi
,
4534 loop_masks
, ncopies
,
4537 = prepare_vec_mask (loop_vinfo
,
4538 TREE_TYPE (loop_mask
),
4539 loop_mask
, vec_oprnd0
,
4541 loop_vinfo
->vec_cond_masked_set
.add ({ vec_oprnd0
,
4546 = build3 (VEC_COND_EXPR
, atype
, vec_oprnd0
,
4547 build_vector_from_val (atype
, one
),
4548 build_vector_from_val (atype
, zero
));
4550 = gimple_build_assign (make_ssa_name (atype
),
4552 vect_finish_stmt_generation (vinfo
, stmt_info
,
4554 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4558 /* The mask type has more elements than simdlen. */
4565 else if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4567 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4568 /* Guess the number of lanes represented by atype. */
4569 poly_uint64 atype_subparts
4570 = exact_div (bestn
->simdclone
->simdlen
,
4572 o
= vector_unroll_factor (nunits
, atype_subparts
);
4573 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4578 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4582 vec_oprnds_i
[i
] = 0;
4584 if (maybe_lt (atype_subparts
,
4585 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4587 /* The mask argument has fewer elements than the
4592 else if (known_eq (atype_subparts
,
4593 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4595 /* The vector mask argument matches the input
4596 in the number of lanes, but not necessarily
4598 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4599 tree st
= lang_hooks
.types
.type_for_mode
4600 (TYPE_MODE (TREE_TYPE (vec_oprnd0
)), 1);
4601 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, st
,
4604 = gimple_build_assign (make_ssa_name (st
),
4606 vect_finish_stmt_generation (vinfo
, stmt_info
,
4608 if (!types_compatible_p (atype
, st
))
4611 = gimple_build_assign (make_ssa_name (atype
),
4615 vect_finish_stmt_generation (vinfo
, stmt_info
,
4618 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4622 /* The mask argument has more elements than the
4632 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4633 vargs
.safe_push (op
);
4635 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4636 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4641 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4642 &stmts
, true, NULL_TREE
);
4646 edge pe
= loop_preheader_edge (loop
);
4647 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4648 gcc_assert (!new_bb
);
4650 if (arginfo
[i
].simd_lane_linear
)
4652 vargs
.safe_push (arginfo
[i
].op
);
4655 tree phi_res
= copy_ssa_name (op
);
4656 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4657 add_phi_arg (new_phi
, arginfo
[i
].op
,
4658 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4660 = POINTER_TYPE_P (TREE_TYPE (op
))
4661 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4662 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4663 ? sizetype
: TREE_TYPE (op
);
4665 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4667 tree tcst
= wide_int_to_tree (type
, cst
);
4668 tree phi_arg
= copy_ssa_name (op
);
4670 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4671 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4672 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4673 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4675 arginfo
[i
].op
= phi_res
;
4676 vargs
.safe_push (phi_res
);
4681 = POINTER_TYPE_P (TREE_TYPE (op
))
4682 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4683 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4684 ? sizetype
: TREE_TYPE (op
);
4686 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4688 tree tcst
= wide_int_to_tree (type
, cst
);
4689 new_temp
= make_ssa_name (TREE_TYPE (op
));
4691 = gimple_build_assign (new_temp
, code
,
4692 arginfo
[i
].op
, tcst
);
4693 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4694 vargs
.safe_push (new_temp
);
4697 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4698 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4699 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4700 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4701 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4702 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4708 if (masked_call_offset
== 0
4709 && bestn
->simdclone
->inbranch
4710 && bestn
->simdclone
->nargs
> nargs
)
4713 size_t mask_i
= bestn
->simdclone
->nargs
- 1;
4715 gcc_assert (bestn
->simdclone
->args
[mask_i
].arg_type
==
4716 SIMD_CLONE_ARG_TYPE_MASK
);
4718 tree masktype
= bestn
->simdclone
->args
[mask_i
].vector_type
;
4719 callee_nelements
= TYPE_VECTOR_SUBPARTS (masktype
);
4720 o
= vector_unroll_factor (nunits
, callee_nelements
);
4721 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4723 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
4725 vec_loop_masks
*loop_masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
4726 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
4727 ncopies
, vectype
, j
);
4730 mask
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
4733 if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4735 /* This means we are dealing with integer mask modes.
4736 First convert to an integer type with the same size as
4737 the current vector type. */
4738 unsigned HOST_WIDE_INT intermediate_size
4739 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask
)));
4741 build_nonstandard_integer_type (intermediate_size
, 1);
4742 mask
= build1 (VIEW_CONVERT_EXPR
, mid_int_type
, mask
);
4744 = gimple_build_assign (make_ssa_name (mid_int_type
),
4746 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
4747 /* Then zero-extend to the mask mode. */
4748 mask
= fold_build1 (NOP_EXPR
, masktype
,
4749 gimple_get_lhs (new_stmt
));
4751 else if (bestn
->simdclone
->mask_mode
== VOIDmode
)
4753 tree one
= fold_convert (TREE_TYPE (masktype
),
4755 tree zero
= fold_convert (TREE_TYPE (masktype
),
4757 mask
= build3 (VEC_COND_EXPR
, masktype
, mask
,
4758 build_vector_from_val (masktype
, one
),
4759 build_vector_from_val (masktype
, zero
));
4764 new_stmt
= gimple_build_assign (make_ssa_name (masktype
), mask
);
4765 vect_finish_stmt_generation (vinfo
, stmt_info
,
4767 mask
= gimple_assign_lhs (new_stmt
);
4768 vargs
.safe_push (mask
);
4772 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4776 || known_eq (TYPE_VECTOR_SUBPARTS (rtype
), nunits
));
4778 new_temp
= create_tmp_var (ratype
);
4779 else if (useless_type_conversion_p (vectype
, rtype
))
4780 new_temp
= make_ssa_name (vec_dest
, new_call
);
4782 new_temp
= make_ssa_name (rtype
, new_call
);
4783 gimple_call_set_lhs (new_call
, new_temp
);
4785 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4786 gimple
*new_stmt
= new_call
;
4790 if (!multiple_p (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
4793 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4794 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4795 k
= vector_unroll_factor (nunits
,
4796 TYPE_VECTOR_SUBPARTS (vectype
));
4797 gcc_assert ((k
& (k
- 1)) == 0);
4798 for (l
= 0; l
< k
; l
++)
4803 t
= build_fold_addr_expr (new_temp
);
4804 t
= build2 (MEM_REF
, vectype
, t
,
4805 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4808 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4809 bitsize_int (prec
), bitsize_int (l
* prec
));
4810 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4811 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4813 if (j
== 0 && l
== 0)
4814 *vec_stmt
= new_stmt
;
4816 SLP_TREE_VEC_DEFS (slp_node
)
4817 .quick_push (gimple_assign_lhs (new_stmt
));
4819 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4823 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4826 else if (!multiple_p (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
4829 if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype
),
4830 TYPE_VECTOR_SUBPARTS (rtype
), &k
))
4832 gcc_assert ((k
& (k
- 1)) == 0);
4833 if ((j
& (k
- 1)) == 0)
4834 vec_alloc (ret_ctor_elts
, k
);
4838 o
= vector_unroll_factor (nunits
,
4839 TYPE_VECTOR_SUBPARTS (rtype
));
4840 for (m
= 0; m
< o
; m
++)
4842 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4843 size_int (m
), NULL_TREE
, NULL_TREE
);
4844 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4846 vect_finish_stmt_generation (vinfo
, stmt_info
,
4848 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4849 gimple_assign_lhs (new_stmt
));
4851 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4854 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4855 if ((j
& (k
- 1)) != k
- 1)
4857 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4859 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4860 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4862 if ((unsigned) j
== k
- 1)
4863 *vec_stmt
= new_stmt
;
4865 SLP_TREE_VEC_DEFS (slp_node
)
4866 .quick_push (gimple_assign_lhs (new_stmt
));
4868 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4873 tree t
= build_fold_addr_expr (new_temp
);
4874 t
= build2 (MEM_REF
, vectype
, t
,
4875 build_int_cst (TREE_TYPE (t
), 0));
4876 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4877 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4878 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4880 else if (!useless_type_conversion_p (vectype
, rtype
))
4882 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
4884 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4885 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4890 *vec_stmt
= new_stmt
;
4892 SLP_TREE_VEC_DEFS (slp_node
).quick_push (gimple_get_lhs (new_stmt
));
4894 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4897 for (i
= 0; i
< nargs
; ++i
)
4899 vec
<tree
> oprndsi
= vec_oprnds
[i
];
4904 /* Mark the clone as no longer being a candidate for GC. */
4905 bestn
->gc_candidate
= false;
4907 /* The call in STMT might prevent it from being removed in dce.
4908 We however cannot remove it here, due to the way the ssa name
4909 it defines is mapped to the new definition. So just replace
4910 rhs of the statement with something harmless. */
4918 type
= TREE_TYPE (scalar_dest
);
4919 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4920 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4923 new_stmt
= gimple_build_nop ();
4924 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4925 unlink_stmt_vdef (stmt
);
4931 /* Function vect_gen_widened_results_half
4933 Create a vector stmt whose code, type, number of arguments, and result
4934 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4935 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4936 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4937 needs to be created (DECL is a function-decl of a target-builtin).
4938 STMT_INFO is the original scalar stmt that we are vectorizing. */
4941 vect_gen_widened_results_half (vec_info
*vinfo
, code_helper ch
,
4942 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4943 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4944 stmt_vec_info stmt_info
)
4949 /* Generate half of the widened result: */
4950 if (op_type
!= binary_op
)
4952 new_stmt
= vect_gimple_build (vec_dest
, ch
, vec_oprnd0
, vec_oprnd1
);
4953 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4954 gimple_set_lhs (new_stmt
, new_temp
);
4955 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4961 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4962 For multi-step conversions store the resulting vectors and call the function
4963 recursively. When NARROW_SRC_P is true, there's still a conversion after
4964 narrowing, don't store the vectors in the SLP_NODE or in vector info of
4965 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
4968 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4970 stmt_vec_info stmt_info
,
4971 vec
<tree
> &vec_dsts
,
4972 gimple_stmt_iterator
*gsi
,
4973 slp_tree slp_node
, code_helper code
,
4977 tree vop0
, vop1
, new_tmp
, vec_dest
;
4979 vec_dest
= vec_dsts
.pop ();
4981 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4983 /* Create demotion operation. */
4984 vop0
= (*vec_oprnds
)[i
];
4985 vop1
= (*vec_oprnds
)[i
+ 1];
4986 gimple
*new_stmt
= vect_gimple_build (vec_dest
, code
, vop0
, vop1
);
4987 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4988 gimple_set_lhs (new_stmt
, new_tmp
);
4989 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4990 if (multi_step_cvt
|| narrow_src_p
)
4991 /* Store the resulting vector for next recursive call,
4992 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
4993 (*vec_oprnds
)[i
/2] = new_tmp
;
4996 /* This is the last step of the conversion sequence. Store the
4997 vectors in SLP_NODE or in vector info of the scalar statement
4998 (or in STMT_VINFO_RELATED_STMT chain). */
5000 slp_node
->push_vec_def (new_stmt
);
5002 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5006 /* For multi-step demotion operations we first generate demotion operations
5007 from the source type to the intermediate types, and then combine the
5008 results (stored in VEC_OPRNDS) in demotion operation to the destination
5012 /* At each level of recursion we have half of the operands we had at the
5014 vec_oprnds
->truncate ((i
+1)/2);
5015 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
5017 stmt_info
, vec_dsts
, gsi
,
5018 slp_node
, VEC_PACK_TRUNC_EXPR
,
5022 vec_dsts
.quick_push (vec_dest
);
5026 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
5027 and VEC_OPRNDS1, for a binary operation associated with scalar statement
5028 STMT_INFO. For multi-step conversions store the resulting vectors and
5029 call the function recursively. */
5032 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
5033 vec
<tree
> *vec_oprnds0
,
5034 vec
<tree
> *vec_oprnds1
,
5035 stmt_vec_info stmt_info
, tree vec_dest
,
5036 gimple_stmt_iterator
*gsi
,
5038 code_helper ch2
, int op_type
)
5041 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
5042 gimple
*new_stmt1
, *new_stmt2
;
5043 vec
<tree
> vec_tmp
= vNULL
;
5045 vec_tmp
.create (vec_oprnds0
->length () * 2);
5046 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
5048 if (op_type
== binary_op
)
5049 vop1
= (*vec_oprnds1
)[i
];
5053 /* Generate the two halves of promotion operation. */
5054 new_stmt1
= vect_gen_widened_results_half (vinfo
, ch1
, vop0
, vop1
,
5055 op_type
, vec_dest
, gsi
,
5057 new_stmt2
= vect_gen_widened_results_half (vinfo
, ch2
, vop0
, vop1
,
5058 op_type
, vec_dest
, gsi
,
5060 if (is_gimple_call (new_stmt1
))
5062 new_tmp1
= gimple_call_lhs (new_stmt1
);
5063 new_tmp2
= gimple_call_lhs (new_stmt2
);
5067 new_tmp1
= gimple_assign_lhs (new_stmt1
);
5068 new_tmp2
= gimple_assign_lhs (new_stmt2
);
5071 /* Store the results for the next step. */
5072 vec_tmp
.quick_push (new_tmp1
);
5073 vec_tmp
.quick_push (new_tmp2
);
5076 vec_oprnds0
->release ();
5077 *vec_oprnds0
= vec_tmp
;
5080 /* Create vectorized promotion stmts for widening stmts using only half the
5081 potential vector size for input. */
5083 vect_create_half_widening_stmts (vec_info
*vinfo
,
5084 vec
<tree
> *vec_oprnds0
,
5085 vec
<tree
> *vec_oprnds1
,
5086 stmt_vec_info stmt_info
, tree vec_dest
,
5087 gimple_stmt_iterator
*gsi
,
5096 vec
<tree
> vec_tmp
= vNULL
;
5098 vec_tmp
.create (vec_oprnds0
->length ());
5099 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
5101 tree new_tmp1
, new_tmp2
, new_tmp3
, out_type
;
5103 gcc_assert (op_type
== binary_op
);
5104 vop1
= (*vec_oprnds1
)[i
];
5106 /* Widen the first vector input. */
5107 out_type
= TREE_TYPE (vec_dest
);
5108 new_tmp1
= make_ssa_name (out_type
);
5109 new_stmt1
= gimple_build_assign (new_tmp1
, NOP_EXPR
, vop0
);
5110 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt1
, gsi
);
5111 if (VECTOR_TYPE_P (TREE_TYPE (vop1
)))
5113 /* Widen the second vector input. */
5114 new_tmp2
= make_ssa_name (out_type
);
5115 new_stmt2
= gimple_build_assign (new_tmp2
, NOP_EXPR
, vop1
);
5116 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt2
, gsi
);
5117 /* Perform the operation. With both vector inputs widened. */
5118 new_stmt3
= vect_gimple_build (vec_dest
, code1
, new_tmp1
, new_tmp2
);
5122 /* Perform the operation. With the single vector input widened. */
5123 new_stmt3
= vect_gimple_build (vec_dest
, code1
, new_tmp1
, vop1
);
5126 new_tmp3
= make_ssa_name (vec_dest
, new_stmt3
);
5127 gimple_assign_set_lhs (new_stmt3
, new_tmp3
);
5128 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt3
, gsi
);
5130 /* Store the results for the next step. */
5131 vec_tmp
.quick_push (new_tmp3
);
5134 vec_oprnds0
->release ();
5135 *vec_oprnds0
= vec_tmp
;
5139 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5140 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5141 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5142 Return true if STMT_INFO is vectorizable in this way. */
5145 vectorizable_conversion (vec_info
*vinfo
,
5146 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5147 gimple
**vec_stmt
, slp_tree slp_node
,
5148 stmt_vector_for_cost
*cost_vec
)
5150 tree vec_dest
, cvt_op
= NULL_TREE
;
5152 tree op0
, op1
= NULL_TREE
;
5153 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5155 code_helper code
, code1
, code2
;
5156 code_helper codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
5158 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5160 poly_uint64 nunits_in
;
5161 poly_uint64 nunits_out
;
5162 tree vectype_out
, vectype_in
;
5164 tree lhs_type
, rhs_type
;
5165 /* For conversions between floating point and integer, there're 2 NARROW
5166 cases. NARROW_SRC is for FLOAT_EXPR, means
5167 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5168 This is safe when the range of the source integer can fit into the lower
5169 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5170 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5171 For other conversions, when there's narrowing, NARROW_DST is used as
5173 enum { NARROW_SRC
, NARROW_DST
, NONE
, WIDEN
} modifier
;
5174 vec
<tree
> vec_oprnds0
= vNULL
;
5175 vec
<tree
> vec_oprnds1
= vNULL
;
5177 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5178 int multi_step_cvt
= 0;
5179 vec
<tree
> interm_types
= vNULL
;
5180 tree intermediate_type
, cvt_type
= NULL_TREE
;
5182 unsigned short fltsz
;
5184 /* Is STMT a vectorizable conversion? */
5186 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5189 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5193 gimple
* stmt
= stmt_info
->stmt
;
5194 if (!(is_gimple_assign (stmt
) || is_gimple_call (stmt
)))
5197 if (gimple_get_lhs (stmt
) == NULL_TREE
5198 || TREE_CODE (gimple_get_lhs (stmt
)) != SSA_NAME
)
5201 if (TREE_CODE (gimple_get_lhs (stmt
)) != SSA_NAME
)
5204 if (is_gimple_assign (stmt
))
5206 code
= gimple_assign_rhs_code (stmt
);
5207 op_type
= TREE_CODE_LENGTH ((tree_code
) code
);
5209 else if (gimple_call_internal_p (stmt
))
5211 code
= gimple_call_internal_fn (stmt
);
5212 op_type
= gimple_call_num_args (stmt
);
5217 bool widen_arith
= (code
== WIDEN_MULT_EXPR
5218 || code
== WIDEN_LSHIFT_EXPR
5219 || widening_fn_p (code
));
5222 && !CONVERT_EXPR_CODE_P (code
)
5223 && code
!= FIX_TRUNC_EXPR
5224 && code
!= FLOAT_EXPR
)
5227 /* Check types of lhs and rhs. */
5228 scalar_dest
= gimple_get_lhs (stmt
);
5229 lhs_type
= TREE_TYPE (scalar_dest
);
5230 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5232 /* Check the operands of the operation. */
5233 slp_tree slp_op0
, slp_op1
= NULL
;
5234 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5235 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
5237 if (dump_enabled_p ())
5238 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5239 "use not simple.\n");
5243 rhs_type
= TREE_TYPE (op0
);
5244 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
5245 && !((INTEGRAL_TYPE_P (lhs_type
)
5246 && INTEGRAL_TYPE_P (rhs_type
))
5247 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
5248 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
5251 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5252 && ((INTEGRAL_TYPE_P (lhs_type
)
5253 && !type_has_mode_precision_p (lhs_type
))
5254 || (INTEGRAL_TYPE_P (rhs_type
)
5255 && !type_has_mode_precision_p (rhs_type
))))
5257 if (dump_enabled_p ())
5258 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5259 "type conversion to/from bit-precision unsupported."
5264 if (op_type
== binary_op
)
5266 gcc_assert (code
== WIDEN_MULT_EXPR
5267 || code
== WIDEN_LSHIFT_EXPR
5268 || widening_fn_p (code
));
5270 op1
= is_gimple_assign (stmt
) ? gimple_assign_rhs2 (stmt
) :
5271 gimple_call_arg (stmt
, 0);
5273 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
5274 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
5276 if (dump_enabled_p ())
5277 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5278 "use not simple.\n");
5281 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5284 vectype_in
= vectype1_in
;
5287 /* If op0 is an external or constant def, infer the vector type
5288 from the scalar type. */
5290 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
5292 gcc_assert (vectype_in
);
5295 if (dump_enabled_p ())
5296 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5297 "no vectype for scalar type %T\n", rhs_type
);
5302 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5303 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5305 if (dump_enabled_p ())
5306 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5307 "can't convert between boolean and non "
5308 "boolean vectors %T\n", rhs_type
);
5313 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
5314 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5315 if (known_eq (nunits_out
, nunits_in
))
5320 else if (multiple_p (nunits_out
, nunits_in
))
5321 modifier
= NARROW_DST
;
5324 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
5328 /* Multiple types in SLP are handled by creating the appropriate number of
5329 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5333 else if (modifier
== NARROW_DST
)
5334 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
5336 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
5338 /* Sanity check: make sure that at least one copy of the vectorized stmt
5339 needs to be generated. */
5340 gcc_assert (ncopies
>= 1);
5342 bool found_mode
= false;
5343 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
5344 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
5345 opt_scalar_mode rhs_mode_iter
;
5347 /* Supportable by target? */
5351 if (code
!= FIX_TRUNC_EXPR
5352 && code
!= FLOAT_EXPR
5353 && !CONVERT_EXPR_CODE_P (code
))
5355 gcc_assert (code
.is_tree_code ());
5356 if (supportable_convert_operation ((tree_code
) code
, vectype_out
,
5363 /* For conversions between float and integer types try whether
5364 we can use intermediate signed integer types to support the
5366 if (GET_MODE_SIZE (lhs_mode
) != GET_MODE_SIZE (rhs_mode
)
5367 && (code
== FLOAT_EXPR
||
5368 (code
== FIX_TRUNC_EXPR
&& !flag_trapping_math
)))
5370 bool demotion
= GET_MODE_SIZE (rhs_mode
) > GET_MODE_SIZE (lhs_mode
);
5371 bool float_expr_p
= code
== FLOAT_EXPR
;
5372 unsigned short target_size
;
5373 scalar_mode intermediate_mode
;
5376 intermediate_mode
= lhs_mode
;
5377 target_size
= GET_MODE_SIZE (rhs_mode
);
5381 target_size
= GET_MODE_SIZE (lhs_mode
);
5382 if (!int_mode_for_size
5383 (GET_MODE_BITSIZE (rhs_mode
), 0).exists (&intermediate_mode
))
5386 code1
= float_expr_p
? code
: NOP_EXPR
;
5387 codecvt1
= float_expr_p
? NOP_EXPR
: code
;
5388 opt_scalar_mode mode_iter
;
5389 FOR_EACH_2XWIDER_MODE (mode_iter
, intermediate_mode
)
5391 intermediate_mode
= mode_iter
.require ();
5393 if (GET_MODE_SIZE (intermediate_mode
) > target_size
)
5396 scalar_mode cvt_mode
;
5397 if (!int_mode_for_size
5398 (GET_MODE_BITSIZE (intermediate_mode
), 0).exists (&cvt_mode
))
5401 cvt_type
= build_nonstandard_integer_type
5402 (GET_MODE_BITSIZE (cvt_mode
), 0);
5404 /* Check if the intermediate type can hold OP0's range.
5405 When converting from float to integer this is not necessary
5406 because values that do not fit the (smaller) target type are
5407 unspecified anyway. */
5408 if (demotion
&& float_expr_p
)
5410 wide_int op_min_value
, op_max_value
;
5411 if (!vect_get_range_info (op0
, &op_min_value
, &op_max_value
))
5414 if (cvt_type
== NULL_TREE
5415 || (wi::min_precision (op_max_value
, SIGNED
)
5416 > TYPE_PRECISION (cvt_type
))
5417 || (wi::min_precision (op_min_value
, SIGNED
)
5418 > TYPE_PRECISION (cvt_type
)))
5422 cvt_type
= get_vectype_for_scalar_type (vinfo
, cvt_type
, slp_node
);
5423 /* This should only happened for SLP as long as loop vectorizer
5424 only supports same-sized vector. */
5425 if (cvt_type
== NULL_TREE
5426 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type
), nunits_in
)
5427 || !supportable_convert_operation ((tree_code
) code1
,
5430 || !supportable_convert_operation ((tree_code
) codecvt1
,
5442 interm_types
.safe_push (cvt_type
);
5443 cvt_type
= NULL_TREE
;
5451 if (dump_enabled_p ())
5452 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5453 "conversion not supported by target.\n");
5457 if (known_eq (nunits_in
, nunits_out
))
5459 if (!(code
.is_tree_code ()
5460 && supportable_half_widening_operation ((tree_code
) code
,
5461 vectype_out
, vectype_in
,
5465 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5468 if (supportable_widening_operation (vinfo
, code
, stmt_info
,
5469 vectype_out
, vectype_in
, &code1
,
5470 &code2
, &multi_step_cvt
,
5473 /* Binary widening operation can only be supported directly by the
5475 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5479 if (code
!= FLOAT_EXPR
5480 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
5483 fltsz
= GET_MODE_SIZE (lhs_mode
);
5484 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
5486 rhs_mode
= rhs_mode_iter
.require ();
5487 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
5491 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5492 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5493 if (cvt_type
== NULL_TREE
)
5496 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5499 gcc_assert (code
.is_tree_code ());
5500 if (!supportable_convert_operation ((tree_code
) code
, vectype_out
,
5505 else if (!supportable_widening_operation (vinfo
, code
,
5506 stmt_info
, vectype_out
,
5507 cvt_type
, &codecvt1
,
5508 &codecvt2
, &multi_step_cvt
,
5512 gcc_assert (multi_step_cvt
== 0);
5514 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
5517 &code2
, &multi_step_cvt
,
5528 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5529 codecvt2
= ERROR_MARK
;
5533 interm_types
.safe_push (cvt_type
);
5534 cvt_type
= NULL_TREE
;
5539 gcc_assert (op_type
== unary_op
);
5540 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
5541 &code1
, &multi_step_cvt
,
5545 if (GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
5548 if (code
== FIX_TRUNC_EXPR
)
5551 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5552 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5553 if (cvt_type
== NULL_TREE
)
5555 if (supportable_convert_operation ((tree_code
) code
, cvt_type
, vectype_in
,
5560 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
5561 &code1
, &multi_step_cvt
,
5565 /* If op0 can be represented with low precision integer,
5566 truncate it to cvt_type and the do FLOAT_EXPR. */
5567 else if (code
== FLOAT_EXPR
)
5569 wide_int op_min_value
, op_max_value
;
5570 if (!vect_get_range_info (op0
, &op_min_value
, &op_max_value
))
5574 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode
), 0);
5575 if (cvt_type
== NULL_TREE
5576 || (wi::min_precision (op_max_value
, SIGNED
)
5577 > TYPE_PRECISION (cvt_type
))
5578 || (wi::min_precision (op_min_value
, SIGNED
)
5579 > TYPE_PRECISION (cvt_type
)))
5582 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_out
);
5583 if (cvt_type
== NULL_TREE
)
5585 if (!supportable_narrowing_operation (NOP_EXPR
, cvt_type
, vectype_in
,
5586 &code1
, &multi_step_cvt
,
5589 if (supportable_convert_operation ((tree_code
) code
, vectype_out
,
5593 modifier
= NARROW_SRC
;
5604 if (!vec_stmt
) /* transformation not required. */
5607 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
5608 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
5610 if (dump_enabled_p ())
5611 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5612 "incompatible vector types for invariants\n");
5615 DUMP_VECT_SCOPE ("vectorizable_conversion");
5616 if (modifier
== NONE
)
5618 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
5619 vect_model_simple_cost (vinfo
, stmt_info
,
5620 ncopies
* (1 + multi_step_cvt
),
5621 dt
, ndts
, slp_node
, cost_vec
);
5623 else if (modifier
== NARROW_SRC
|| modifier
== NARROW_DST
)
5625 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
5626 /* The final packing step produces one vector result per copy. */
5627 unsigned int nvectors
5628 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
5629 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5630 multi_step_cvt
, cost_vec
,
5635 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5636 /* The initial unpacking step produces two vector results
5637 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5638 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5639 unsigned int nvectors
5641 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5643 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5644 multi_step_cvt
, cost_vec
,
5647 interm_types
.release ();
5652 if (dump_enabled_p ())
5653 dump_printf_loc (MSG_NOTE
, vect_location
,
5654 "transform conversion. ncopies = %d.\n", ncopies
);
5656 if (op_type
== binary_op
)
5658 if (CONSTANT_CLASS_P (op0
))
5659 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5660 else if (CONSTANT_CLASS_P (op1
))
5661 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5664 /* In case of multi-step conversion, we first generate conversion operations
5665 to the intermediate types, and then from that types to the final one.
5666 We create vector destinations for the intermediate type (TYPES) received
5667 from supportable_*_operation, and store them in the correct order
5668 for future use in vect_create_vectorized_*_stmts (). */
5669 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5670 bool widen_or_narrow_float_p
5671 = cvt_type
&& (modifier
== WIDEN
|| modifier
== NARROW_SRC
);
5672 vec_dest
= vect_create_destination_var (scalar_dest
,
5673 widen_or_narrow_float_p
5674 ? cvt_type
: vectype_out
);
5675 vec_dsts
.quick_push (vec_dest
);
5679 for (i
= interm_types
.length () - 1;
5680 interm_types
.iterate (i
, &intermediate_type
); i
--)
5682 vec_dest
= vect_create_destination_var (scalar_dest
,
5684 vec_dsts
.quick_push (vec_dest
);
5689 vec_dest
= vect_create_destination_var (scalar_dest
,
5690 widen_or_narrow_float_p
5691 ? vectype_out
: cvt_type
);
5696 if (modifier
== WIDEN
)
5698 else if (modifier
== NARROW_SRC
|| modifier
== NARROW_DST
)
5701 ninputs
= vect_pow2 (multi_step_cvt
);
5709 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5710 op0
, vectype_in
, &vec_oprnds0
);
5711 /* vec_dest is intermediate type operand when multi_step_cvt. */
5715 vec_dest
= vec_dsts
[0];
5718 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5720 /* Arguments are ready, create the new vector stmt. */
5724 gcc_assert (multi_step_cvt
== 1);
5725 new_stmt
= vect_gimple_build (cvt_op
, codecvt1
, vop0
);
5726 new_temp
= make_ssa_name (cvt_op
, new_stmt
);
5727 gimple_assign_set_lhs (new_stmt
, new_temp
);
5728 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5731 new_stmt
= vect_gimple_build (vec_dest
, code1
, vop0
);
5732 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5733 gimple_set_lhs (new_stmt
, new_temp
);
5734 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5737 slp_node
->push_vec_def (new_stmt
);
5739 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5744 /* In case the vectorization factor (VF) is bigger than the number
5745 of elements that we can fit in a vectype (nunits), we have to
5746 generate more than one vector stmt - i.e - we need to "unroll"
5747 the vector stmt by a factor VF/nunits. */
5748 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5749 op0
, vectype_in
, &vec_oprnds0
,
5750 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
5751 vectype_in
, &vec_oprnds1
);
5752 if (code
== WIDEN_LSHIFT_EXPR
)
5754 int oprnds_size
= vec_oprnds0
.length ();
5755 vec_oprnds1
.create (oprnds_size
);
5756 for (i
= 0; i
< oprnds_size
; ++i
)
5757 vec_oprnds1
.quick_push (op1
);
5759 /* Arguments are ready. Create the new vector stmts. */
5760 for (i
= multi_step_cvt
; i
>= 0; i
--)
5762 tree this_dest
= vec_dsts
[i
];
5763 code_helper c1
= code1
, c2
= code2
;
5764 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5769 if (known_eq (nunits_out
, nunits_in
))
5770 vect_create_half_widening_stmts (vinfo
, &vec_oprnds0
, &vec_oprnds1
,
5771 stmt_info
, this_dest
, gsi
, c1
,
5774 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5775 &vec_oprnds1
, stmt_info
,
5780 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5785 new_temp
= make_ssa_name (vec_dest
);
5786 new_stmt
= vect_gimple_build (new_temp
, codecvt1
, vop0
);
5787 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5790 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
5793 slp_node
->push_vec_def (new_stmt
);
5795 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5801 /* In case the vectorization factor (VF) is bigger than the number
5802 of elements that we can fit in a vectype (nunits), we have to
5803 generate more than one vector stmt - i.e - we need to "unroll"
5804 the vector stmt by a factor VF/nunits. */
5805 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5806 op0
, vectype_in
, &vec_oprnds0
);
5807 /* Arguments are ready. Create the new vector stmts. */
5808 if (cvt_type
&& modifier
== NARROW_DST
)
5809 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5811 new_temp
= make_ssa_name (vec_dest
);
5812 gimple
*new_stmt
= vect_gimple_build (new_temp
, codecvt1
, vop0
);
5813 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5814 vec_oprnds0
[i
] = new_temp
;
5817 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5819 stmt_info
, vec_dsts
, gsi
,
5821 modifier
== NARROW_SRC
);
5822 /* After demoting op0 to cvt_type, convert it to dest. */
5823 if (cvt_type
&& code
== FLOAT_EXPR
)
5825 for (unsigned int i
= 0; i
!= vec_oprnds0
.length() / 2; i
++)
5827 /* Arguments are ready, create the new vector stmt. */
5828 gcc_assert (TREE_CODE_LENGTH ((tree_code
) codecvt1
) == unary_op
);
5830 = vect_gimple_build (vec_dest
, codecvt1
, vec_oprnds0
[i
]);
5831 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5832 gimple_set_lhs (new_stmt
, new_temp
);
5833 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5835 /* This is the last step of the conversion sequence. Store the
5836 vectors in SLP_NODE or in vector info of the scalar statement
5837 (or in STMT_VINFO_RELATED_STMT chain). */
5839 slp_node
->push_vec_def (new_stmt
);
5841 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5847 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5849 vec_oprnds0
.release ();
5850 vec_oprnds1
.release ();
5851 interm_types
.release ();
5856 /* Return true if we can assume from the scalar form of STMT_INFO that
5857 neither the scalar nor the vector forms will generate code. STMT_INFO
5858 is known not to involve a data reference. */
5861 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5863 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5867 tree lhs
= gimple_assign_lhs (stmt
);
5868 tree_code code
= gimple_assign_rhs_code (stmt
);
5869 tree rhs
= gimple_assign_rhs1 (stmt
);
5871 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5874 if (CONVERT_EXPR_CODE_P (code
))
5875 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5880 /* Function vectorizable_assignment.
5882 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5883 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5884 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5885 Return true if STMT_INFO is vectorizable in this way. */
5888 vectorizable_assignment (vec_info
*vinfo
,
5889 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5890 gimple
**vec_stmt
, slp_tree slp_node
,
5891 stmt_vector_for_cost
*cost_vec
)
5896 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5898 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5902 vec
<tree
> vec_oprnds
= vNULL
;
5904 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5905 enum tree_code code
;
5908 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5911 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5915 /* Is vectorizable assignment? */
5916 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5920 scalar_dest
= gimple_assign_lhs (stmt
);
5921 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5924 if (STMT_VINFO_DATA_REF (stmt_info
))
5927 code
= gimple_assign_rhs_code (stmt
);
5928 if (!(gimple_assign_single_p (stmt
)
5929 || code
== PAREN_EXPR
5930 || CONVERT_EXPR_CODE_P (code
)))
5933 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5934 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5936 /* Multiple types in SLP are handled by creating the appropriate number of
5937 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5942 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5944 gcc_assert (ncopies
>= 1);
5947 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5948 &dt
[0], &vectype_in
))
5950 if (dump_enabled_p ())
5951 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5952 "use not simple.\n");
5956 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5958 /* We can handle NOP_EXPR conversions that do not change the number
5959 of elements or the vector size. */
5960 if ((CONVERT_EXPR_CODE_P (code
)
5961 || code
== VIEW_CONVERT_EXPR
)
5963 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5964 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5965 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5968 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5970 if (dump_enabled_p ())
5971 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5972 "can't convert between boolean and non "
5973 "boolean vectors %T\n", TREE_TYPE (op
));
5978 /* We do not handle bit-precision changes. */
5979 if ((CONVERT_EXPR_CODE_P (code
)
5980 || code
== VIEW_CONVERT_EXPR
)
5981 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5982 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5983 || (INTEGRAL_TYPE_P (TREE_TYPE (op
))
5984 && !type_has_mode_precision_p (TREE_TYPE (op
))))
5985 /* But a conversion that does not change the bit-pattern is ok. */
5986 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5987 && INTEGRAL_TYPE_P (TREE_TYPE (op
))
5988 && (((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5989 > TYPE_PRECISION (TREE_TYPE (op
)))
5990 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5991 || (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5992 == TYPE_PRECISION (TREE_TYPE (op
))))))
5994 if (dump_enabled_p ())
5995 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5996 "type conversion to/from bit-precision "
6001 if (!vec_stmt
) /* transformation not required. */
6004 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
6006 if (dump_enabled_p ())
6007 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6008 "incompatible vector types for invariants\n");
6011 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
6012 DUMP_VECT_SCOPE ("vectorizable_assignment");
6013 if (!vect_nop_conversion_p (stmt_info
))
6014 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
6020 if (dump_enabled_p ())
6021 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
6024 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6027 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
6029 /* Arguments are ready. create the new vector stmt. */
6030 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
6032 if (CONVERT_EXPR_CODE_P (code
)
6033 || code
== VIEW_CONVERT_EXPR
)
6034 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
6035 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
6036 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6037 gimple_assign_set_lhs (new_stmt
, new_temp
);
6038 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6040 slp_node
->push_vec_def (new_stmt
);
6042 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6045 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6047 vec_oprnds
.release ();
6052 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6053 either as shift by a scalar or by a vector. */
6056 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
6059 machine_mode vec_mode
;
6064 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
6068 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
6070 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
6072 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
6074 || (optab_handler (optab
, TYPE_MODE (vectype
))
6075 == CODE_FOR_nothing
))
6079 vec_mode
= TYPE_MODE (vectype
);
6080 icode
= (int) optab_handler (optab
, vec_mode
);
6081 if (icode
== CODE_FOR_nothing
)
6088 /* Function vectorizable_shift.
6090 Check if STMT_INFO performs a shift operation that can be vectorized.
6091 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6092 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6093 Return true if STMT_INFO is vectorizable in this way. */
6096 vectorizable_shift (vec_info
*vinfo
,
6097 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6098 gimple
**vec_stmt
, slp_tree slp_node
,
6099 stmt_vector_for_cost
*cost_vec
)
6103 tree op0
, op1
= NULL
;
6104 tree vec_oprnd1
= NULL_TREE
;
6106 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6107 enum tree_code code
;
6108 machine_mode vec_mode
;
6112 machine_mode optab_op2_mode
;
6113 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
6115 poly_uint64 nunits_in
;
6116 poly_uint64 nunits_out
;
6121 vec
<tree
> vec_oprnds0
= vNULL
;
6122 vec
<tree
> vec_oprnds1
= vNULL
;
6125 bool scalar_shift_arg
= true;
6126 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6127 bool incompatible_op1_vectype_p
= false;
6129 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6132 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6133 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
6137 /* Is STMT a vectorizable binary/unary operation? */
6138 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6142 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6145 code
= gimple_assign_rhs_code (stmt
);
6147 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
6148 || code
== RROTATE_EXPR
))
6151 scalar_dest
= gimple_assign_lhs (stmt
);
6152 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6153 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
6155 if (dump_enabled_p ())
6156 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6157 "bit-precision shifts not supported.\n");
6162 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6163 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6165 if (dump_enabled_p ())
6166 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6167 "use not simple.\n");
6170 /* If op0 is an external or constant def, infer the vector type
6171 from the scalar type. */
6173 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
6175 gcc_assert (vectype
);
6178 if (dump_enabled_p ())
6179 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6180 "no vectype for scalar type\n");
6184 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6185 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6186 if (maybe_ne (nunits_out
, nunits_in
))
6189 stmt_vec_info op1_def_stmt_info
;
6191 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
6192 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
6194 if (dump_enabled_p ())
6195 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6196 "use not simple.\n");
6200 /* Multiple types in SLP are handled by creating the appropriate number of
6201 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6206 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6208 gcc_assert (ncopies
>= 1);
6210 /* Determine whether the shift amount is a vector, or scalar. If the
6211 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6213 if ((dt
[1] == vect_internal_def
6214 || dt
[1] == vect_induction_def
6215 || dt
[1] == vect_nested_cycle
)
6217 scalar_shift_arg
= false;
6218 else if (dt
[1] == vect_constant_def
6219 || dt
[1] == vect_external_def
6220 || dt
[1] == vect_internal_def
)
6222 /* In SLP, need to check whether the shift count is the same,
6223 in loops if it is a constant or invariant, it is always
6227 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
6228 stmt_vec_info slpstmt_info
;
6230 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
6232 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
6233 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
6234 scalar_shift_arg
= false;
6237 /* For internal SLP defs we have to make sure we see scalar stmts
6238 for all vector elements.
6239 ??? For different vectors we could resort to a different
6240 scalar shift operand but code-generation below simply always
6242 if (dt
[1] == vect_internal_def
6243 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
6245 scalar_shift_arg
= false;
6248 /* If the shift amount is computed by a pattern stmt we cannot
6249 use the scalar amount directly thus give up and use a vector
6251 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
6252 scalar_shift_arg
= false;
6256 if (dump_enabled_p ())
6257 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6258 "operand mode requires invariant argument.\n");
6262 /* Vector shifted by vector. */
6263 bool was_scalar_shift_arg
= scalar_shift_arg
;
6264 if (!scalar_shift_arg
)
6266 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
6267 if (dump_enabled_p ())
6268 dump_printf_loc (MSG_NOTE
, vect_location
,
6269 "vector/vector shift/rotate found.\n");
6272 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
6274 incompatible_op1_vectype_p
6275 = (op1_vectype
== NULL_TREE
6276 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
6277 TYPE_VECTOR_SUBPARTS (vectype
))
6278 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
6279 if (incompatible_op1_vectype_p
6281 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
6282 || slp_op1
->refcnt
!= 1))
6284 if (dump_enabled_p ())
6285 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6286 "unusable type for last operand in"
6287 " vector/vector shift/rotate.\n");
6291 /* See if the machine has a vector shifted by scalar insn and if not
6292 then see if it has a vector shifted by vector insn. */
6295 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
6297 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
6299 if (dump_enabled_p ())
6300 dump_printf_loc (MSG_NOTE
, vect_location
,
6301 "vector/scalar shift/rotate found.\n");
6305 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
6307 && (optab_handler (optab
, TYPE_MODE (vectype
))
6308 != CODE_FOR_nothing
))
6310 scalar_shift_arg
= false;
6312 if (dump_enabled_p ())
6313 dump_printf_loc (MSG_NOTE
, vect_location
,
6314 "vector/vector shift/rotate found.\n");
6317 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
6321 /* Unlike the other binary operators, shifts/rotates have
6322 the rhs being int, instead of the same type as the lhs,
6323 so make sure the scalar is the right type if we are
6324 dealing with vectors of long long/long/short/char. */
6325 incompatible_op1_vectype_p
6327 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
6329 if (incompatible_op1_vectype_p
6330 && dt
[1] == vect_internal_def
)
6332 if (dump_enabled_p ())
6333 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6334 "unusable type for last operand in"
6335 " vector/vector shift/rotate.\n");
6342 /* Supportable by target? */
6345 if (dump_enabled_p ())
6346 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6350 vec_mode
= TYPE_MODE (vectype
);
6351 icode
= (int) optab_handler (optab
, vec_mode
);
6352 if (icode
== CODE_FOR_nothing
)
6354 if (dump_enabled_p ())
6355 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6356 "op not supported by target.\n");
6359 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6360 if (vect_emulated_vector_p (vectype
))
6363 if (!vec_stmt
) /* transformation not required. */
6366 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6367 || ((!scalar_shift_arg
|| dt
[1] == vect_internal_def
)
6368 && (!incompatible_op1_vectype_p
6369 || dt
[1] == vect_constant_def
)
6370 && !vect_maybe_update_slp_op_vectype
6372 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
6374 if (dump_enabled_p ())
6375 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6376 "incompatible vector types for invariants\n");
6379 /* Now adjust the constant shift amount in place. */
6381 && incompatible_op1_vectype_p
6382 && dt
[1] == vect_constant_def
)
6384 for (unsigned i
= 0;
6385 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
6387 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
6388 = fold_convert (TREE_TYPE (vectype
),
6389 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
6390 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
6394 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
6395 DUMP_VECT_SCOPE ("vectorizable_shift");
6396 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
6397 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
6403 if (dump_enabled_p ())
6404 dump_printf_loc (MSG_NOTE
, vect_location
,
6405 "transform binary/unary operation.\n");
6407 if (incompatible_op1_vectype_p
&& !slp_node
)
6409 gcc_assert (!scalar_shift_arg
&& was_scalar_shift_arg
);
6410 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
6411 if (dt
[1] != vect_constant_def
)
6412 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
6413 TREE_TYPE (vectype
), NULL
);
6417 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6419 if (scalar_shift_arg
&& dt
[1] != vect_internal_def
)
6421 /* Vector shl and shr insn patterns can be defined with scalar
6422 operand 2 (shift operand). In this case, use constant or loop
6423 invariant op1 directly, without extending it to vector mode
6425 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
6426 if (!VECTOR_MODE_P (optab_op2_mode
))
6428 if (dump_enabled_p ())
6429 dump_printf_loc (MSG_NOTE
, vect_location
,
6430 "operand 1 using scalar mode.\n");
6432 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
6433 vec_oprnds1
.quick_push (vec_oprnd1
);
6434 /* Store vec_oprnd1 for every vector stmt to be created.
6435 We check during the analysis that all the shift arguments
6437 TODO: Allow different constants for different vector
6438 stmts generated for an SLP instance. */
6440 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
6441 vec_oprnds1
.quick_push (vec_oprnd1
);
6444 else if (!scalar_shift_arg
&& slp_node
&& incompatible_op1_vectype_p
)
6446 if (was_scalar_shift_arg
)
6448 /* If the argument was the same in all lanes create
6449 the correctly typed vector shift amount directly. */
6450 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
6451 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
6452 !loop_vinfo
? gsi
: NULL
);
6453 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
6454 !loop_vinfo
? gsi
: NULL
);
6455 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
6456 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
6457 vec_oprnds1
.quick_push (vec_oprnd1
);
6459 else if (dt
[1] == vect_constant_def
)
6460 /* The constant shift amount has been adjusted in place. */
6463 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
6466 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6467 (a special case for certain kind of vector shifts); otherwise,
6468 operand 1 should be of a vector type (the usual case). */
6469 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6471 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
6473 /* Arguments are ready. Create the new vector stmt. */
6474 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6476 /* For internal defs where we need to use a scalar shift arg
6477 extract the first lane. */
6478 if (scalar_shift_arg
&& dt
[1] == vect_internal_def
)
6480 vop1
= vec_oprnds1
[0];
6481 new_temp
= make_ssa_name (TREE_TYPE (TREE_TYPE (vop1
)));
6483 = gimple_build_assign (new_temp
,
6484 build3 (BIT_FIELD_REF
, TREE_TYPE (new_temp
),
6486 TYPE_SIZE (TREE_TYPE (new_temp
)),
6487 bitsize_zero_node
));
6488 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6492 vop1
= vec_oprnds1
[i
];
6493 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
6494 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6495 gimple_assign_set_lhs (new_stmt
, new_temp
);
6496 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6498 slp_node
->push_vec_def (new_stmt
);
6500 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6504 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6506 vec_oprnds0
.release ();
6507 vec_oprnds1
.release ();
6512 /* Function vectorizable_operation.
6514 Check if STMT_INFO performs a binary, unary or ternary operation that can
6516 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6517 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6518 Return true if STMT_INFO is vectorizable in this way. */
6521 vectorizable_operation (vec_info
*vinfo
,
6522 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6523 gimple
**vec_stmt
, slp_tree slp_node
,
6524 stmt_vector_for_cost
*cost_vec
)
6528 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
6530 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6531 enum tree_code code
, orig_code
;
6532 machine_mode vec_mode
;
6536 bool target_support_p
;
6537 enum vect_def_type dt
[3]
6538 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
6540 poly_uint64 nunits_in
;
6541 poly_uint64 nunits_out
;
6543 int ncopies
, vec_num
;
6545 vec
<tree
> vec_oprnds0
= vNULL
;
6546 vec
<tree
> vec_oprnds1
= vNULL
;
6547 vec
<tree
> vec_oprnds2
= vNULL
;
6548 tree vop0
, vop1
, vop2
;
6549 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6551 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6554 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6558 /* Is STMT a vectorizable binary/unary operation? */
6559 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6563 /* Loads and stores are handled in vectorizable_{load,store}. */
6564 if (STMT_VINFO_DATA_REF (stmt_info
))
6567 orig_code
= code
= gimple_assign_rhs_code (stmt
);
6569 /* Shifts are handled in vectorizable_shift. */
6570 if (code
== LSHIFT_EXPR
6571 || code
== RSHIFT_EXPR
6572 || code
== LROTATE_EXPR
6573 || code
== RROTATE_EXPR
)
6576 /* Comparisons are handled in vectorizable_comparison. */
6577 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
6580 /* Conditions are handled in vectorizable_condition. */
6581 if (code
== COND_EXPR
)
6584 /* For pointer addition and subtraction, we should use the normal
6585 plus and minus for the vector operation. */
6586 if (code
== POINTER_PLUS_EXPR
)
6588 if (code
== POINTER_DIFF_EXPR
)
6591 /* Support only unary or binary operations. */
6592 op_type
= TREE_CODE_LENGTH (code
);
6593 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
6595 if (dump_enabled_p ())
6596 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6597 "num. args = %d (not unary/binary/ternary op).\n",
6602 scalar_dest
= gimple_assign_lhs (stmt
);
6603 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6605 /* Most operations cannot handle bit-precision types without extra
6607 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
6609 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
6610 /* Exception are bitwise binary operations. */
6611 && code
!= BIT_IOR_EXPR
6612 && code
!= BIT_XOR_EXPR
6613 && code
!= BIT_AND_EXPR
)
6615 if (dump_enabled_p ())
6616 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6617 "bit-precision arithmetic not supported.\n");
6622 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6623 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6625 if (dump_enabled_p ())
6626 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6627 "use not simple.\n");
6630 bool is_invariant
= (dt
[0] == vect_external_def
6631 || dt
[0] == vect_constant_def
);
6632 /* If op0 is an external or constant def, infer the vector type
6633 from the scalar type. */
6636 /* For boolean type we cannot determine vectype by
6637 invariant value (don't know whether it is a vector
6638 of booleans or vector of integers). We use output
6639 vectype because operations on boolean don't change
6641 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6643 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6645 if (dump_enabled_p ())
6646 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6647 "not supported operation on bool value.\n");
6650 vectype
= vectype_out
;
6653 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6657 gcc_assert (vectype
);
6660 if (dump_enabled_p ())
6661 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6662 "no vectype for scalar type %T\n",
6668 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6669 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6670 if (maybe_ne (nunits_out
, nunits_in
))
6673 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6674 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6675 if (op_type
== binary_op
|| op_type
== ternary_op
)
6677 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6678 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6680 if (dump_enabled_p ())
6681 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6682 "use not simple.\n");
6685 is_invariant
&= (dt
[1] == vect_external_def
6686 || dt
[1] == vect_constant_def
);
6688 && maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype2
)))
6691 if (op_type
== ternary_op
)
6693 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6694 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6696 if (dump_enabled_p ())
6697 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6698 "use not simple.\n");
6701 is_invariant
&= (dt
[2] == vect_external_def
6702 || dt
[2] == vect_constant_def
);
6704 && maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype3
)))
6708 /* Multiple types in SLP are handled by creating the appropriate number of
6709 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6714 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6718 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6722 gcc_assert (ncopies
>= 1);
6724 /* Reject attempts to combine mask types with nonmask types, e.g. if
6725 we have an AND between a (nonmask) boolean loaded from memory and
6726 a (mask) boolean result of a comparison.
6728 TODO: We could easily fix these cases up using pattern statements. */
6729 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6730 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6731 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6733 if (dump_enabled_p ())
6734 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6735 "mixed mask and nonmask vector types\n");
6739 /* Supportable by target? */
6741 vec_mode
= TYPE_MODE (vectype
);
6742 if (code
== MULT_HIGHPART_EXPR
)
6743 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6746 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6749 if (dump_enabled_p ())
6750 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6754 target_support_p
= (optab_handler (optab
, vec_mode
) != CODE_FOR_nothing
6755 || optab_libfunc (optab
, vec_mode
));
6758 bool using_emulated_vectors_p
= vect_emulated_vector_p (vectype
);
6759 if (!target_support_p
|| using_emulated_vectors_p
)
6761 if (dump_enabled_p ())
6762 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6763 "op not supported by target.\n");
6764 /* When vec_mode is not a vector mode and we verified ops we
6765 do not have to lower like AND are natively supported let
6766 those through even when the mode isn't word_mode. For
6767 ops we have to lower the lowering code assumes we are
6768 dealing with word_mode. */
6769 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype
))
6770 || (((code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== NEGATE_EXPR
)
6771 || !target_support_p
)
6772 && maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
))
6773 /* Check only during analysis. */
6774 || (!vec_stmt
&& !vect_can_vectorize_without_simd_p (code
)))
6776 if (dump_enabled_p ())
6777 dump_printf (MSG_NOTE
, "using word mode not possible.\n");
6780 if (dump_enabled_p ())
6781 dump_printf_loc (MSG_NOTE
, vect_location
,
6782 "proceeding using word mode.\n");
6783 using_emulated_vectors_p
= true;
6786 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6787 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6788 vec_loop_lens
*lens
= (loop_vinfo
? &LOOP_VINFO_LENS (loop_vinfo
) : NULL
);
6789 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6790 internal_fn cond_len_fn
= get_conditional_len_internal_fn (code
);
6792 /* If operating on inactive elements could generate spurious traps,
6793 we need to restrict the operation to active lanes. Note that this
6794 specifically doesn't apply to unhoisted invariants, since they
6795 operate on the same value for every lane.
6797 Similarly, if this operation is part of a reduction, a fully-masked
6798 loop should only change the active lanes of the reduction chain,
6799 keeping the inactive lanes as-is. */
6800 bool mask_out_inactive
= ((!is_invariant
&& gimple_could_trap_p (stmt
))
6803 if (!vec_stmt
) /* transformation not required. */
6806 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
6807 && mask_out_inactive
)
6809 if (cond_len_fn
!= IFN_LAST
6810 && direct_internal_fn_supported_p (cond_len_fn
, vectype
,
6811 OPTIMIZE_FOR_SPEED
))
6812 vect_record_loop_len (loop_vinfo
, lens
, ncopies
* vec_num
, vectype
,
6814 else if (cond_fn
!= IFN_LAST
6815 && direct_internal_fn_supported_p (cond_fn
, vectype
,
6816 OPTIMIZE_FOR_SPEED
))
6817 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6821 if (dump_enabled_p ())
6822 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6823 "can't use a fully-masked loop because no"
6824 " conditional operation is available.\n");
6825 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
6829 /* Put types on constant and invariant SLP children. */
6831 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6832 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6833 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6835 if (dump_enabled_p ())
6836 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6837 "incompatible vector types for invariants\n");
6841 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6842 DUMP_VECT_SCOPE ("vectorizable_operation");
6843 vect_model_simple_cost (vinfo
, stmt_info
,
6844 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6845 if (using_emulated_vectors_p
)
6847 /* The above vect_model_simple_cost call handles constants
6848 in the prologue and (mis-)costs one of the stmts as
6849 vector stmt. See below for the actual lowering that will
6852 = slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
;
6865 /* Bit operations do not have extra cost and are accounted
6866 as vector stmt by vect_model_simple_cost. */
6872 /* We also need to materialize two large constants. */
6873 record_stmt_cost (cost_vec
, 2, scalar_stmt
, stmt_info
,
6875 record_stmt_cost (cost_vec
, n
, scalar_stmt
, stmt_info
,
6884 if (dump_enabled_p ())
6885 dump_printf_loc (MSG_NOTE
, vect_location
,
6886 "transform binary/unary operation.\n");
6888 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6889 bool len_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
);
6891 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6892 vectors with unsigned elements, but the result is signed. So, we
6893 need to compute the MINUS_EXPR into vectype temporary and
6894 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6895 tree vec_cvt_dest
= NULL_TREE
;
6896 if (orig_code
== POINTER_DIFF_EXPR
)
6898 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6899 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6903 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6905 /* In case the vectorization factor (VF) is bigger than the number
6906 of elements that we can fit in a vectype (nunits), we have to generate
6907 more than one vector stmt - i.e - we need to "unroll" the
6908 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6909 from one copy of the vector stmt to the next, in the field
6910 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6911 stages to find the correct vector defs to be used when vectorizing
6912 stmts that use the defs of the current stmt. The example below
6913 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6914 we need to create 4 vectorized stmts):
6916 before vectorization:
6917 RELATED_STMT VEC_STMT
6921 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6923 RELATED_STMT VEC_STMT
6924 VS1_0: vx0 = memref0 VS1_1 -
6925 VS1_1: vx1 = memref1 VS1_2 -
6926 VS1_2: vx2 = memref2 VS1_3 -
6927 VS1_3: vx3 = memref3 - -
6928 S1: x = load - VS1_0
6931 step2: vectorize stmt S2 (done here):
6932 To vectorize stmt S2 we first need to find the relevant vector
6933 def for the first operand 'x'. This is, as usual, obtained from
6934 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6935 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6936 relevant vector def 'vx0'. Having found 'vx0' we can generate
6937 the vector stmt VS2_0, and as usual, record it in the
6938 STMT_VINFO_VEC_STMT of stmt S2.
6939 When creating the second copy (VS2_1), we obtain the relevant vector
6940 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6941 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6942 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6943 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6944 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6945 chain of stmts and pointers:
6946 RELATED_STMT VEC_STMT
6947 VS1_0: vx0 = memref0 VS1_1 -
6948 VS1_1: vx1 = memref1 VS1_2 -
6949 VS1_2: vx2 = memref2 VS1_3 -
6950 VS1_3: vx3 = memref3 - -
6951 S1: x = load - VS1_0
6952 VS2_0: vz0 = vx0 + v1 VS2_1 -
6953 VS2_1: vz1 = vx1 + v1 VS2_2 -
6954 VS2_2: vz2 = vx2 + v1 VS2_3 -
6955 VS2_3: vz3 = vx3 + v1 - -
6956 S2: z = x + 1 - VS2_0 */
6958 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6959 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
6960 /* Arguments are ready. Create the new vector stmt. */
6961 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6963 gimple
*new_stmt
= NULL
;
6964 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6965 ? vec_oprnds1
[i
] : NULL_TREE
);
6966 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
6967 if (using_emulated_vectors_p
6968 && (code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== NEGATE_EXPR
))
6970 /* Lower the operation. This follows vector lowering. */
6971 unsigned int width
= vector_element_bits (vectype
);
6972 tree inner_type
= TREE_TYPE (vectype
);
6974 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode
), 1);
6975 HOST_WIDE_INT max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
6976 tree low_bits
= build_replicated_int_cst (word_type
, width
, max
>> 1);
6978 = build_replicated_int_cst (word_type
, width
, max
& ~(max
>> 1));
6979 tree wvop0
= make_ssa_name (word_type
);
6980 new_stmt
= gimple_build_assign (wvop0
, VIEW_CONVERT_EXPR
,
6981 build1 (VIEW_CONVERT_EXPR
,
6983 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6984 tree result_low
, signs
;
6985 if (code
== PLUS_EXPR
|| code
== MINUS_EXPR
)
6987 tree wvop1
= make_ssa_name (word_type
);
6988 new_stmt
= gimple_build_assign (wvop1
, VIEW_CONVERT_EXPR
,
6989 build1 (VIEW_CONVERT_EXPR
,
6991 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6992 signs
= make_ssa_name (word_type
);
6993 new_stmt
= gimple_build_assign (signs
,
6994 BIT_XOR_EXPR
, wvop0
, wvop1
);
6995 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6996 tree b_low
= make_ssa_name (word_type
);
6997 new_stmt
= gimple_build_assign (b_low
,
6998 BIT_AND_EXPR
, wvop1
, low_bits
);
6999 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7000 tree a_low
= make_ssa_name (word_type
);
7001 if (code
== PLUS_EXPR
)
7002 new_stmt
= gimple_build_assign (a_low
,
7003 BIT_AND_EXPR
, wvop0
, low_bits
);
7005 new_stmt
= gimple_build_assign (a_low
,
7006 BIT_IOR_EXPR
, wvop0
, high_bits
);
7007 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7008 if (code
== MINUS_EXPR
)
7010 new_stmt
= gimple_build_assign (NULL_TREE
,
7011 BIT_NOT_EXPR
, signs
);
7012 signs
= make_ssa_name (word_type
);
7013 gimple_assign_set_lhs (new_stmt
, signs
);
7014 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7016 new_stmt
= gimple_build_assign (NULL_TREE
,
7017 BIT_AND_EXPR
, signs
, high_bits
);
7018 signs
= make_ssa_name (word_type
);
7019 gimple_assign_set_lhs (new_stmt
, signs
);
7020 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7021 result_low
= make_ssa_name (word_type
);
7022 new_stmt
= gimple_build_assign (result_low
, code
, a_low
, b_low
);
7023 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7027 tree a_low
= make_ssa_name (word_type
);
7028 new_stmt
= gimple_build_assign (a_low
,
7029 BIT_AND_EXPR
, wvop0
, low_bits
);
7030 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7031 signs
= make_ssa_name (word_type
);
7032 new_stmt
= gimple_build_assign (signs
, BIT_NOT_EXPR
, wvop0
);
7033 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7034 new_stmt
= gimple_build_assign (NULL_TREE
,
7035 BIT_AND_EXPR
, signs
, high_bits
);
7036 signs
= make_ssa_name (word_type
);
7037 gimple_assign_set_lhs (new_stmt
, signs
);
7038 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7039 result_low
= make_ssa_name (word_type
);
7040 new_stmt
= gimple_build_assign (result_low
,
7041 MINUS_EXPR
, high_bits
, a_low
);
7042 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7044 new_stmt
= gimple_build_assign (NULL_TREE
, BIT_XOR_EXPR
, result_low
,
7046 result_low
= make_ssa_name (word_type
);
7047 gimple_assign_set_lhs (new_stmt
, result_low
);
7048 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7049 new_stmt
= gimple_build_assign (NULL_TREE
, VIEW_CONVERT_EXPR
,
7050 build1 (VIEW_CONVERT_EXPR
,
7051 vectype
, result_low
));
7052 new_temp
= make_ssa_name (vectype
);
7053 gimple_assign_set_lhs (new_stmt
, new_temp
);
7054 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7056 else if ((masked_loop_p
|| len_loop_p
) && mask_out_inactive
)
7060 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
,
7061 vec_num
* ncopies
, vectype
, i
);
7064 mask
= build_minus_one_cst (truth_type_for (vectype
));
7065 auto_vec
<tree
> vops (6);
7066 vops
.quick_push (mask
);
7067 vops
.quick_push (vop0
);
7069 vops
.quick_push (vop1
);
7071 vops
.quick_push (vop2
);
7074 /* Perform the operation on active elements only and take
7075 inactive elements from the reduction chain input. */
7077 vops
.quick_push (reduc_idx
== 1 ? vop1
: vop0
);
7081 auto else_value
= targetm
.preferred_else_value
7082 (cond_fn
, vectype
, vops
.length () - 1, &vops
[1]);
7083 vops
.quick_push (else_value
);
7087 tree len
= vect_get_loop_len (loop_vinfo
, gsi
, lens
,
7088 vec_num
* ncopies
, vectype
, i
, 1);
7090 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
7091 tree bias
= build_int_cst (intQI_type_node
, biasval
);
7092 vops
.quick_push (len
);
7093 vops
.quick_push (bias
);
7096 = gimple_build_call_internal_vec (masked_loop_p
? cond_fn
7099 new_temp
= make_ssa_name (vec_dest
, call
);
7100 gimple_call_set_lhs (call
, new_temp
);
7101 gimple_call_set_nothrow (call
, true);
7102 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
7107 tree mask
= NULL_TREE
;
7108 /* When combining two masks check if either of them is elsewhere
7109 combined with a loop mask, if that's the case we can mark that the
7110 new combined mask doesn't need to be combined with a loop mask. */
7112 && code
== BIT_AND_EXPR
7113 && VECTOR_BOOLEAN_TYPE_P (vectype
))
7115 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op0
,
7118 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
,
7119 vec_num
* ncopies
, vectype
, i
);
7121 vop0
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
7125 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op1
,
7128 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
,
7129 vec_num
* ncopies
, vectype
, i
);
7131 vop1
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
7136 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
7137 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7138 gimple_assign_set_lhs (new_stmt
, new_temp
);
7139 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7140 if (using_emulated_vectors_p
)
7141 suppress_warning (new_stmt
, OPT_Wvector_operation_performance
);
7143 /* Enter the combined value into the vector cond hash so we don't
7144 AND it with a loop mask again. */
7146 loop_vinfo
->vec_cond_masked_set
.add ({ new_temp
, mask
});
7151 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
7152 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
7154 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
7155 gimple_assign_set_lhs (new_stmt
, new_temp
);
7156 vect_finish_stmt_generation (vinfo
, stmt_info
,
7161 slp_node
->push_vec_def (new_stmt
);
7163 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
7167 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7169 vec_oprnds0
.release ();
7170 vec_oprnds1
.release ();
7171 vec_oprnds2
.release ();
7176 /* A helper function to ensure data reference DR_INFO's base alignment. */
7179 ensure_base_align (dr_vec_info
*dr_info
)
7181 /* Alignment is only analyzed for the first element of a DR group,
7182 use that to look at base alignment we need to enforce. */
7183 if (STMT_VINFO_GROUPED_ACCESS (dr_info
->stmt
))
7184 dr_info
= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info
->stmt
));
7186 gcc_assert (dr_info
->misalignment
!= DR_MISALIGNMENT_UNINITIALIZED
);
7188 if (dr_info
->base_misaligned
)
7190 tree base_decl
= dr_info
->base_decl
;
7192 // We should only be able to increase the alignment of a base object if
7193 // we know what its new alignment should be at compile time.
7194 unsigned HOST_WIDE_INT align_base_to
=
7195 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
7197 if (decl_in_symtab_p (base_decl
))
7198 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
7199 else if (DECL_ALIGN (base_decl
) < align_base_to
)
7201 SET_DECL_ALIGN (base_decl
, align_base_to
);
7202 DECL_USER_ALIGN (base_decl
) = 1;
7204 dr_info
->base_misaligned
= false;
7209 /* Function get_group_alias_ptr_type.
7211 Return the alias type for the group starting at FIRST_STMT_INFO. */
7214 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
7216 struct data_reference
*first_dr
, *next_dr
;
7218 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
7219 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
7220 while (next_stmt_info
)
7222 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
7223 if (get_alias_set (DR_REF (first_dr
))
7224 != get_alias_set (DR_REF (next_dr
)))
7226 if (dump_enabled_p ())
7227 dump_printf_loc (MSG_NOTE
, vect_location
,
7228 "conflicting alias set types.\n");
7229 return ptr_type_node
;
7231 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7233 return reference_alias_ptr_type (DR_REF (first_dr
));
7237 /* Function scan_operand_equal_p.
7239 Helper function for check_scan_store. Compare two references
7240 with .GOMP_SIMD_LANE bases. */
7243 scan_operand_equal_p (tree ref1
, tree ref2
)
7245 tree ref
[2] = { ref1
, ref2
};
7246 poly_int64 bitsize
[2], bitpos
[2];
7247 tree offset
[2], base
[2];
7248 for (int i
= 0; i
< 2; ++i
)
7251 int unsignedp
, reversep
, volatilep
= 0;
7252 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
7253 &offset
[i
], &mode
, &unsignedp
,
7254 &reversep
, &volatilep
);
7255 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
7257 if (TREE_CODE (base
[i
]) == MEM_REF
7258 && offset
[i
] == NULL_TREE
7259 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
7261 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
7262 if (is_gimple_assign (def_stmt
)
7263 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
7264 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
7265 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
7267 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
7269 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
7270 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
7275 if (!operand_equal_p (base
[0], base
[1], 0))
7277 if (maybe_ne (bitsize
[0], bitsize
[1]))
7279 if (offset
[0] != offset
[1])
7281 if (!offset
[0] || !offset
[1])
7283 if (!operand_equal_p (offset
[0], offset
[1], 0))
7286 for (int i
= 0; i
< 2; ++i
)
7288 step
[i
] = integer_one_node
;
7289 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
7291 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
7292 if (is_gimple_assign (def_stmt
)
7293 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
7294 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
7297 step
[i
] = gimple_assign_rhs2 (def_stmt
);
7298 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
7301 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
7303 step
[i
] = TREE_OPERAND (offset
[i
], 1);
7304 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
7306 tree rhs1
= NULL_TREE
;
7307 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
7309 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
7310 if (gimple_assign_cast_p (def_stmt
))
7311 rhs1
= gimple_assign_rhs1 (def_stmt
);
7313 else if (CONVERT_EXPR_P (offset
[i
]))
7314 rhs1
= TREE_OPERAND (offset
[i
], 0);
7316 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
7317 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
7318 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
7319 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
7322 if (!operand_equal_p (offset
[0], offset
[1], 0)
7323 || !operand_equal_p (step
[0], step
[1], 0))
7331 enum scan_store_kind
{
7332 /* Normal permutation. */
7333 scan_store_kind_perm
,
7335 /* Whole vector left shift permutation with zero init. */
7336 scan_store_kind_lshift_zero
,
7338 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7339 scan_store_kind_lshift_cond
7342 /* Function check_scan_store.
7344 Verify if we can perform the needed permutations or whole vector shifts.
7345 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7346 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7347 to do at each step. */
7350 scan_store_can_perm_p (tree vectype
, tree init
,
7351 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
7353 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7354 unsigned HOST_WIDE_INT nunits
;
7355 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7357 int units_log2
= exact_log2 (nunits
);
7358 if (units_log2
<= 0)
7362 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
7363 for (i
= 0; i
<= units_log2
; ++i
)
7365 unsigned HOST_WIDE_INT j
, k
;
7366 enum scan_store_kind kind
= scan_store_kind_perm
;
7367 vec_perm_builder
sel (nunits
, nunits
, 1);
7368 sel
.quick_grow (nunits
);
7369 if (i
== units_log2
)
7371 for (j
= 0; j
< nunits
; ++j
)
7372 sel
[j
] = nunits
- 1;
7376 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7378 for (k
= 0; j
< nunits
; ++j
, ++k
)
7379 sel
[j
] = nunits
+ k
;
7381 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7382 if (!can_vec_perm_const_p (vec_mode
, vec_mode
, indices
))
7384 if (i
== units_log2
)
7387 if (whole_vector_shift_kind
== scan_store_kind_perm
)
7389 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
7391 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
7392 /* Whole vector shifts shift in zeros, so if init is all zero
7393 constant, there is no need to do anything further. */
7394 if ((TREE_CODE (init
) != INTEGER_CST
7395 && TREE_CODE (init
) != REAL_CST
)
7396 || !initializer_zerop (init
))
7398 tree masktype
= truth_type_for (vectype
);
7399 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
7401 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
7404 kind
= whole_vector_shift_kind
;
7406 if (use_whole_vector
)
7408 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
7409 use_whole_vector
->safe_grow_cleared (i
, true);
7410 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
7411 use_whole_vector
->safe_push (kind
);
7419 /* Function check_scan_store.
7421 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7424 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
7425 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
7426 vect_memory_access_type memory_access_type
)
7428 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7429 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7432 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
7435 || memory_access_type
!= VMAT_CONTIGUOUS
7436 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
7437 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
7438 || loop_vinfo
== NULL
7439 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7440 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7441 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
7442 || !integer_zerop (DR_INIT (dr_info
->dr
))
7443 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
7444 || !alias_sets_conflict_p (get_alias_set (vectype
),
7445 get_alias_set (TREE_TYPE (ref_type
))))
7447 if (dump_enabled_p ())
7448 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7449 "unsupported OpenMP scan store.\n");
7453 /* We need to pattern match code built by OpenMP lowering and simplified
7454 by following optimizations into something we can handle.
7455 #pragma omp simd reduction(inscan,+:r)
7459 #pragma omp scan inclusive (r)
7462 shall have body with:
7463 // Initialization for input phase, store the reduction initializer:
7464 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7465 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7467 // Actual input phase:
7469 r.0_5 = D.2042[_20];
7472 // Initialization for scan phase:
7473 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7479 // Actual scan phase:
7481 r.1_8 = D.2042[_20];
7483 The "omp simd array" variable D.2042 holds the privatized copy used
7484 inside of the loop and D.2043 is another one that holds copies of
7485 the current original list item. The separate GOMP_SIMD_LANE ifn
7486 kinds are there in order to allow optimizing the initializer store
7487 and combiner sequence, e.g. if it is originally some C++ish user
7488 defined reduction, but allow the vectorizer to pattern recognize it
7489 and turn into the appropriate vectorized scan.
7491 For exclusive scan, this is slightly different:
7492 #pragma omp simd reduction(inscan,+:r)
7496 #pragma omp scan exclusive (r)
7499 shall have body with:
7500 // Initialization for input phase, store the reduction initializer:
7501 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7502 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7504 // Actual input phase:
7506 r.0_5 = D.2042[_20];
7509 // Initialization for scan phase:
7510 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7516 // Actual scan phase:
7518 r.1_8 = D.2044[_20];
7521 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
7523 /* Match the D.2042[_21] = 0; store above. Just require that
7524 it is a constant or external definition store. */
7525 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
7528 if (dump_enabled_p ())
7529 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7530 "unsupported OpenMP scan initializer store.\n");
7534 if (! loop_vinfo
->scan_map
)
7535 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
7536 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7537 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
7540 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
7542 /* These stores can be vectorized normally. */
7546 if (rhs_dt
!= vect_internal_def
)
7549 if (dump_enabled_p ())
7550 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7551 "unsupported OpenMP scan combiner pattern.\n");
7555 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7556 tree rhs
= gimple_assign_rhs1 (stmt
);
7557 if (TREE_CODE (rhs
) != SSA_NAME
)
7560 gimple
*other_store_stmt
= NULL
;
7561 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7562 bool inscan_var_store
7563 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7565 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7567 if (!inscan_var_store
)
7569 use_operand_p use_p
;
7570 imm_use_iterator iter
;
7571 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7573 gimple
*use_stmt
= USE_STMT (use_p
);
7574 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7576 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
7577 || !is_gimple_assign (use_stmt
)
7578 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
7580 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
7582 other_store_stmt
= use_stmt
;
7584 if (other_store_stmt
== NULL
)
7586 rhs
= gimple_assign_lhs (other_store_stmt
);
7587 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
7591 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
7593 use_operand_p use_p
;
7594 imm_use_iterator iter
;
7595 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7597 gimple
*use_stmt
= USE_STMT (use_p
);
7598 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7600 if (other_store_stmt
)
7602 other_store_stmt
= use_stmt
;
7608 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7609 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
7610 || !is_gimple_assign (def_stmt
)
7611 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
7614 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7615 /* For pointer addition, we should use the normal plus for the vector
7619 case POINTER_PLUS_EXPR
:
7622 case MULT_HIGHPART_EXPR
:
7627 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
7630 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7631 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7632 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
7635 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7636 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7637 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
7638 || !gimple_assign_load_p (load1_stmt
)
7639 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
7640 || !gimple_assign_load_p (load2_stmt
))
7643 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7644 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7645 if (load1_stmt_info
== NULL
7646 || load2_stmt_info
== NULL
7647 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
7648 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
7649 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
7650 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7653 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
7655 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7656 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
7657 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
7659 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7661 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7665 use_operand_p use_p
;
7666 imm_use_iterator iter
;
7667 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
7669 gimple
*use_stmt
= USE_STMT (use_p
);
7670 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
7672 if (other_store_stmt
)
7674 other_store_stmt
= use_stmt
;
7678 if (other_store_stmt
== NULL
)
7680 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
7681 || !gimple_store_p (other_store_stmt
))
7684 stmt_vec_info other_store_stmt_info
7685 = loop_vinfo
->lookup_stmt (other_store_stmt
);
7686 if (other_store_stmt_info
== NULL
7687 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
7688 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7691 gimple
*stmt1
= stmt
;
7692 gimple
*stmt2
= other_store_stmt
;
7693 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7694 std::swap (stmt1
, stmt2
);
7695 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7696 gimple_assign_rhs1 (load2_stmt
)))
7698 std::swap (rhs1
, rhs2
);
7699 std::swap (load1_stmt
, load2_stmt
);
7700 std::swap (load1_stmt_info
, load2_stmt_info
);
7702 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7703 gimple_assign_rhs1 (load1_stmt
)))
7706 tree var3
= NULL_TREE
;
7707 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
7708 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
7709 gimple_assign_rhs1 (load2_stmt
)))
7711 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7713 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7714 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
7715 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
7717 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7718 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
7719 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
7720 || lookup_attribute ("omp simd inscan exclusive",
7721 DECL_ATTRIBUTES (var3
)))
7725 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
7726 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
7727 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
7730 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7731 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
7732 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
7733 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
7734 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7735 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
7738 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7739 std::swap (var1
, var2
);
7741 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7743 if (!lookup_attribute ("omp simd inscan exclusive",
7744 DECL_ATTRIBUTES (var1
)))
7749 if (loop_vinfo
->scan_map
== NULL
)
7751 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7755 /* The IL is as expected, now check if we can actually vectorize it.
7762 should be vectorized as (where _40 is the vectorized rhs
7763 from the D.2042[_21] = 0; store):
7764 _30 = MEM <vector(8) int> [(int *)&D.2043];
7765 _31 = MEM <vector(8) int> [(int *)&D.2042];
7766 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7768 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7769 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7771 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7772 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7773 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7775 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7776 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7778 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7779 MEM <vector(8) int> [(int *)&D.2043] = _39;
7780 MEM <vector(8) int> [(int *)&D.2042] = _38;
7787 should be vectorized as (where _40 is the vectorized rhs
7788 from the D.2042[_21] = 0; store):
7789 _30 = MEM <vector(8) int> [(int *)&D.2043];
7790 _31 = MEM <vector(8) int> [(int *)&D.2042];
7791 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7792 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7794 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7795 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7796 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7798 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7799 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7800 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7802 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7803 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7806 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7807 MEM <vector(8) int> [(int *)&D.2044] = _39;
7808 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7809 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7810 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7811 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7814 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7815 if (units_log2
== -1)
7822 /* Function vectorizable_scan_store.
7824 Helper of vectorizable_score, arguments like on vectorizable_store.
7825 Handle only the transformation, checking is done in check_scan_store. */
7828 vectorizable_scan_store (vec_info
*vinfo
,
7829 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7830 gimple
**vec_stmt
, int ncopies
)
7832 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7833 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7834 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7835 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7837 if (dump_enabled_p ())
7838 dump_printf_loc (MSG_NOTE
, vect_location
,
7839 "transform scan store. ncopies = %d\n", ncopies
);
7841 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7842 tree rhs
= gimple_assign_rhs1 (stmt
);
7843 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7845 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7846 bool inscan_var_store
7847 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7849 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7851 use_operand_p use_p
;
7852 imm_use_iterator iter
;
7853 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7855 gimple
*use_stmt
= USE_STMT (use_p
);
7856 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7858 rhs
= gimple_assign_lhs (use_stmt
);
7863 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7864 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7865 if (code
== POINTER_PLUS_EXPR
)
7867 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7868 && commutative_tree_code (code
));
7869 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7870 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7871 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7872 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7873 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7874 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7875 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7876 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7877 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7878 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7879 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7881 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7883 std::swap (rhs1
, rhs2
);
7884 std::swap (var1
, var2
);
7885 std::swap (load1_dr_info
, load2_dr_info
);
7888 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7891 unsigned HOST_WIDE_INT nunits
;
7892 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7894 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7895 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7896 gcc_assert (units_log2
> 0);
7897 auto_vec
<tree
, 16> perms
;
7898 perms
.quick_grow (units_log2
+ 1);
7899 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7900 for (int i
= 0; i
<= units_log2
; ++i
)
7902 unsigned HOST_WIDE_INT j
, k
;
7903 vec_perm_builder
sel (nunits
, nunits
, 1);
7904 sel
.quick_grow (nunits
);
7905 if (i
== units_log2
)
7906 for (j
= 0; j
< nunits
; ++j
)
7907 sel
[j
] = nunits
- 1;
7910 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7912 for (k
= 0; j
< nunits
; ++j
, ++k
)
7913 sel
[j
] = nunits
+ k
;
7915 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7916 if (!use_whole_vector
.is_empty ()
7917 && use_whole_vector
[i
] != scan_store_kind_perm
)
7919 if (zero_vec
== NULL_TREE
)
7920 zero_vec
= build_zero_cst (vectype
);
7921 if (masktype
== NULL_TREE
7922 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7923 masktype
= truth_type_for (vectype
);
7924 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7927 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7930 tree vec_oprnd1
= NULL_TREE
;
7931 tree vec_oprnd2
= NULL_TREE
;
7932 tree vec_oprnd3
= NULL_TREE
;
7933 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7934 tree dataref_offset
= build_int_cst (ref_type
, 0);
7935 tree bump
= vect_get_data_ptr_increment (vinfo
, gsi
, dr_info
,
7936 vectype
, VMAT_CONTIGUOUS
);
7937 tree ldataref_ptr
= NULL_TREE
;
7938 tree orig
= NULL_TREE
;
7939 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7940 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7941 auto_vec
<tree
> vec_oprnds1
;
7942 auto_vec
<tree
> vec_oprnds2
;
7943 auto_vec
<tree
> vec_oprnds3
;
7944 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
7945 *init
, &vec_oprnds1
,
7946 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
7947 rhs2
, &vec_oprnds3
);
7948 for (int j
= 0; j
< ncopies
; j
++)
7950 vec_oprnd1
= vec_oprnds1
[j
];
7951 if (ldataref_ptr
== NULL
)
7952 vec_oprnd2
= vec_oprnds2
[j
];
7953 vec_oprnd3
= vec_oprnds3
[j
];
7956 else if (!inscan_var_store
)
7957 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7961 vec_oprnd2
= make_ssa_name (vectype
);
7962 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7963 unshare_expr (ldataref_ptr
),
7965 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7966 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7967 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7968 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7969 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7972 tree v
= vec_oprnd2
;
7973 for (int i
= 0; i
< units_log2
; ++i
)
7975 tree new_temp
= make_ssa_name (vectype
);
7976 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7978 && (use_whole_vector
[i
]
7979 != scan_store_kind_perm
))
7980 ? zero_vec
: vec_oprnd1
, v
,
7982 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7983 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7984 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7986 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7988 /* Whole vector shift shifted in zero bits, but if *init
7989 is not initializer_zerop, we need to replace those elements
7990 with elements from vec_oprnd1. */
7991 tree_vector_builder
vb (masktype
, nunits
, 1);
7992 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7993 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7994 ? boolean_false_node
: boolean_true_node
);
7996 tree new_temp2
= make_ssa_name (vectype
);
7997 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7998 new_temp
, vec_oprnd1
);
7999 vect_finish_stmt_generation (vinfo
, stmt_info
,
8001 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8002 new_temp
= new_temp2
;
8005 /* For exclusive scan, perform the perms[i] permutation once
8008 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
8016 tree new_temp2
= make_ssa_name (vectype
);
8017 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
8018 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8019 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8024 tree new_temp
= make_ssa_name (vectype
);
8025 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
8026 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8027 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8029 tree last_perm_arg
= new_temp
;
8030 /* For exclusive scan, new_temp computed above is the exclusive scan
8031 prefix sum. Turn it into inclusive prefix sum for the broadcast
8032 of the last element into orig. */
8033 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
8035 last_perm_arg
= make_ssa_name (vectype
);
8036 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
8037 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8038 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8041 orig
= make_ssa_name (vectype
);
8042 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
8043 last_perm_arg
, perms
[units_log2
]);
8044 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8045 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8047 if (!inscan_var_store
)
8049 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
8050 unshare_expr (dataref_ptr
),
8052 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
8053 g
= gimple_build_assign (data_ref
, new_temp
);
8054 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8055 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8059 if (inscan_var_store
)
8060 for (int j
= 0; j
< ncopies
; j
++)
8063 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8065 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
8066 unshare_expr (dataref_ptr
),
8068 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
8069 gimple
*g
= gimple_build_assign (data_ref
, orig
);
8070 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8071 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8077 /* Function vectorizable_store.
8079 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8080 that can be vectorized.
8081 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8082 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8083 Return true if STMT_INFO is vectorizable in this way. */
8086 vectorizable_store (vec_info
*vinfo
,
8087 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8088 gimple
**vec_stmt
, slp_tree slp_node
,
8089 stmt_vector_for_cost
*cost_vec
)
8092 tree vec_oprnd
= NULL_TREE
;
8094 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8095 class loop
*loop
= NULL
;
8096 machine_mode vec_mode
;
8098 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
8099 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8100 tree dataref_ptr
= NULL_TREE
;
8101 tree dataref_offset
= NULL_TREE
;
8102 gimple
*ptr_incr
= NULL
;
8105 stmt_vec_info first_stmt_info
;
8107 unsigned int group_size
, i
;
8108 bool slp
= (slp_node
!= NULL
);
8109 unsigned int vec_num
;
8110 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8112 gather_scatter_info gs_info
;
8114 vec_load_store_type vls_type
;
8117 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8120 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8124 /* Is vectorizable store? */
8126 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8127 slp_tree mask_node
= NULL
;
8128 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8130 tree scalar_dest
= gimple_assign_lhs (assign
);
8131 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
8132 && is_pattern_stmt_p (stmt_info
))
8133 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
8134 if (TREE_CODE (scalar_dest
) != ARRAY_REF
8135 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
8136 && TREE_CODE (scalar_dest
) != INDIRECT_REF
8137 && TREE_CODE (scalar_dest
) != COMPONENT_REF
8138 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
8139 && TREE_CODE (scalar_dest
) != REALPART_EXPR
8140 && TREE_CODE (scalar_dest
) != MEM_REF
)
8145 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8146 if (!call
|| !gimple_call_internal_p (call
))
8149 internal_fn ifn
= gimple_call_internal_fn (call
);
8150 if (!internal_store_fn_p (ifn
))
8153 int mask_index
= internal_fn_mask_index (ifn
);
8154 if (mask_index
>= 0 && slp_node
)
8155 mask_index
= vect_slp_child_index_for_operand
8156 (call
, mask_index
, STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8158 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
8159 &mask
, &mask_node
, &mask_dt
,
8164 /* Cannot have hybrid store SLP -- that would mean storing to the
8165 same location twice. */
8166 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
8168 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
8169 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8173 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8174 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8179 /* Multiple types in SLP are handled by creating the appropriate number of
8180 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8185 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8187 gcc_assert (ncopies
>= 1);
8189 /* FORNOW. This restriction should be relaxed. */
8190 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
8192 if (dump_enabled_p ())
8193 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8194 "multiple types in nested loop.\n");
8200 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
8201 &op
, &op_node
, &rhs_dt
, &rhs_vectype
, &vls_type
))
8204 elem_type
= TREE_TYPE (vectype
);
8205 vec_mode
= TYPE_MODE (vectype
);
8207 if (!STMT_VINFO_DATA_REF (stmt_info
))
8210 vect_memory_access_type memory_access_type
;
8211 enum dr_alignment_support alignment_support_scheme
;
8214 internal_fn lanes_ifn
;
8215 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, vls_type
,
8216 ncopies
, &memory_access_type
, &poffset
,
8217 &alignment_support_scheme
, &misalignment
, &gs_info
,
8223 if (memory_access_type
== VMAT_CONTIGUOUS
)
8225 if (!VECTOR_MODE_P (vec_mode
)
8226 || !can_vec_mask_load_store_p (vec_mode
,
8227 TYPE_MODE (mask_vectype
), false))
8230 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8231 && (memory_access_type
!= VMAT_GATHER_SCATTER
8232 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
8234 if (dump_enabled_p ())
8235 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8236 "unsupported access type for masked store.\n");
8239 else if (memory_access_type
== VMAT_GATHER_SCATTER
8240 && gs_info
.ifn
== IFN_LAST
8243 if (dump_enabled_p ())
8244 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8245 "unsupported masked emulated scatter.\n");
8251 /* FORNOW. In some cases can vectorize even if data-type not supported
8252 (e.g. - array initialization with 0). */
8253 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
8257 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8258 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
8259 && memory_access_type
!= VMAT_GATHER_SCATTER
8260 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
8263 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8264 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8265 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8269 first_stmt_info
= stmt_info
;
8270 first_dr_info
= dr_info
;
8271 group_size
= vec_num
= 1;
8274 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
8276 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
8277 memory_access_type
))
8281 bool costing_p
= !vec_stmt
;
8282 if (costing_p
) /* transformation not required. */
8284 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8287 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
8288 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
8289 vls_type
, group_size
,
8290 memory_access_type
, &gs_info
,
8294 && (!vect_maybe_update_slp_op_vectype (op_node
, vectype
)
8296 && !vect_maybe_update_slp_op_vectype (mask_node
,
8299 if (dump_enabled_p ())
8300 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8301 "incompatible vector types for invariants\n");
8305 if (dump_enabled_p ()
8306 && memory_access_type
!= VMAT_ELEMENTWISE
8307 && memory_access_type
!= VMAT_GATHER_SCATTER
8308 && alignment_support_scheme
!= dr_aligned
)
8309 dump_printf_loc (MSG_NOTE
, vect_location
,
8310 "Vectorizing an unaligned access.\n");
8312 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
8314 /* As function vect_transform_stmt shows, for interleaving stores
8315 the whole chain is vectorized when the last store in the chain
8316 is reached, the other stores in the group are skipped. So we
8317 want to only cost the last one here, but it's not trivial to
8318 get the last, as it's equivalent to use the first one for
8319 costing, use the first one instead. */
8322 && first_stmt_info
!= stmt_info
)
8325 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8329 ensure_base_align (dr_info
);
8331 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
8333 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS
);
8337 unsigned int inside_cost
= 0, prologue_cost
= 0;
8338 if (vls_type
== VLS_STORE_INVARIANT
)
8339 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
8340 stmt_info
, 0, vect_prologue
);
8341 vect_get_store_cost (vinfo
, stmt_info
, ncopies
,
8342 alignment_support_scheme
, misalignment
,
8343 &inside_cost
, cost_vec
);
8345 if (dump_enabled_p ())
8346 dump_printf_loc (MSG_NOTE
, vect_location
,
8347 "vect_model_store_cost: inside_cost = %d, "
8348 "prologue_cost = %d .\n",
8349 inside_cost
, prologue_cost
);
8353 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
8359 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
8363 grouped_store
= false;
8364 /* VEC_NUM is the number of vect stmts to be created for this
8366 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8367 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8368 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
8369 == first_stmt_info
);
8370 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8371 op
= vect_get_store_rhs (first_stmt_info
);
8374 /* VEC_NUM is the number of vect stmts to be created for this
8376 vec_num
= group_size
;
8378 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8381 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
8383 if (!costing_p
&& dump_enabled_p ())
8384 dump_printf_loc (MSG_NOTE
, vect_location
, "transform store. ncopies = %d\n",
8387 /* Check if we need to update prologue cost for invariant,
8388 and update it accordingly if so. If it's not for
8389 interleaving store, we can just check vls_type; but if
8390 it's for interleaving store, need to check the def_type
8391 of the stored value since the current vls_type is just
8392 for first_stmt_info. */
8393 auto update_prologue_cost
= [&](unsigned *prologue_cost
, tree store_rhs
)
8395 gcc_assert (costing_p
);
8400 gcc_assert (store_rhs
);
8401 enum vect_def_type cdt
;
8402 gcc_assert (vect_is_simple_use (store_rhs
, vinfo
, &cdt
));
8403 if (cdt
!= vect_constant_def
&& cdt
!= vect_external_def
)
8406 else if (vls_type
!= VLS_STORE_INVARIANT
)
8408 *prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
, stmt_info
,
8412 if (memory_access_type
== VMAT_ELEMENTWISE
8413 || memory_access_type
== VMAT_STRIDED_SLP
)
8415 unsigned inside_cost
= 0, prologue_cost
= 0;
8416 gimple_stmt_iterator incr_gsi
;
8422 tree stride_base
, stride_step
, alias_off
;
8423 tree vec_oprnd
= NULL_TREE
;
8426 /* Checked by get_load_store_type. */
8427 unsigned int const_nunits
= nunits
.to_constant ();
8429 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
8430 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
8432 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
8434 = fold_build_pointer_plus
8435 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8436 size_binop (PLUS_EXPR
,
8437 convert_to_ptrofftype (dr_offset
),
8438 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8439 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8441 /* For a store with loop-invariant (but other than power-of-2)
8442 stride (i.e. not a grouped access) like so:
8444 for (i = 0; i < n; i += stride)
8447 we generate a new induction variable and new stores from
8448 the components of the (vectorized) rhs:
8450 for (j = 0; ; j += VF*stride)
8455 array[j + stride] = tmp2;
8459 unsigned nstores
= const_nunits
;
8461 tree ltype
= elem_type
;
8462 tree lvectype
= vectype
;
8465 if (group_size
< const_nunits
8466 && const_nunits
% group_size
== 0)
8468 nstores
= const_nunits
/ group_size
;
8470 ltype
= build_vector_type (elem_type
, group_size
);
8473 /* First check if vec_extract optab doesn't support extraction
8474 of vector elts directly. */
8475 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
8477 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
8478 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
8479 group_size
).exists (&vmode
)
8480 || (convert_optab_handler (vec_extract_optab
,
8481 TYPE_MODE (vectype
), vmode
)
8482 == CODE_FOR_nothing
))
8484 /* Try to avoid emitting an extract of vector elements
8485 by performing the extracts using an integer type of the
8486 same size, extracting from a vector of those and then
8487 re-interpreting it as the original vector type if
8490 = group_size
* GET_MODE_BITSIZE (elmode
);
8491 unsigned int lnunits
= const_nunits
/ group_size
;
8492 /* If we can't construct such a vector fall back to
8493 element extracts from the original vector type and
8494 element size stores. */
8495 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
8496 && VECTOR_MODE_P (TYPE_MODE (vectype
))
8497 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
8498 lnunits
).exists (&vmode
)
8499 && (convert_optab_handler (vec_extract_optab
,
8501 != CODE_FOR_nothing
))
8505 ltype
= build_nonstandard_integer_type (lsize
, 1);
8506 lvectype
= build_vector_type (ltype
, nstores
);
8508 /* Else fall back to vector extraction anyway.
8509 Fewer stores are more important than avoiding spilling
8510 of the vector we extract from. Compared to the
8511 construction case in vectorizable_load no store-forwarding
8512 issue exists here for reasonable archs. */
8515 else if (group_size
>= const_nunits
8516 && group_size
% const_nunits
== 0)
8518 int mis_align
= dr_misalignment (first_dr_info
, vectype
);
8519 dr_alignment_support dr_align
8520 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
,
8522 if (dr_align
== dr_aligned
8523 || dr_align
== dr_unaligned_supported
)
8526 lnel
= const_nunits
;
8529 alignment_support_scheme
= dr_align
;
8530 misalignment
= mis_align
;
8533 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
8534 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8539 ivstep
= stride_step
;
8540 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
8541 build_int_cst (TREE_TYPE (ivstep
), vf
));
8543 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8545 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8546 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8547 create_iv (stride_base
, PLUS_EXPR
, ivstep
, NULL
, loop
, &incr_gsi
,
8548 insert_after
, &offvar
, NULL
);
8549 incr
= gsi_stmt (incr_gsi
);
8551 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8554 alias_off
= build_int_cst (ref_type
, 0);
8555 stmt_vec_info next_stmt_info
= first_stmt_info
;
8556 auto_vec
<tree
> vec_oprnds
;
8557 /* For costing some adjacent vector stores, we'd like to cost with
8558 the total number of them once instead of cost each one by one. */
8559 unsigned int n_adjacent_stores
= 0;
8560 for (g
= 0; g
< group_size
; g
++)
8562 running_off
= offvar
;
8567 tree size
= TYPE_SIZE_UNIT (ltype
);
8569 = fold_build2 (MULT_EXPR
, sizetype
, size_int (g
), size
);
8570 tree newoff
= copy_ssa_name (running_off
, NULL
);
8571 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8573 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8574 running_off
= newoff
;
8578 op
= vect_get_store_rhs (next_stmt_info
);
8580 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
, op
,
8583 update_prologue_cost (&prologue_cost
, op
);
8584 unsigned int group_el
= 0;
8585 unsigned HOST_WIDE_INT
8586 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8587 for (j
= 0; j
< ncopies
; j
++)
8591 vec_oprnd
= vec_oprnds
[j
];
8592 /* Pun the vector to extract from if necessary. */
8593 if (lvectype
!= vectype
)
8595 tree tem
= make_ssa_name (lvectype
);
8597 = build1 (VIEW_CONVERT_EXPR
, lvectype
, vec_oprnd
);
8598 gimple
*pun
= gimple_build_assign (tem
, cvt
);
8599 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
8603 for (i
= 0; i
< nstores
; i
++)
8607 /* Only need vector extracting when there are more
8611 += record_stmt_cost (cost_vec
, 1, vec_to_scalar
,
8612 stmt_info
, 0, vect_body
);
8613 /* Take a single lane vector type store as scalar
8614 store to avoid ICE like 110776. */
8615 if (VECTOR_TYPE_P (ltype
)
8616 && known_ne (TYPE_VECTOR_SUBPARTS (ltype
), 1U))
8617 n_adjacent_stores
++;
8620 += record_stmt_cost (cost_vec
, 1, scalar_store
,
8621 stmt_info
, 0, vect_body
);
8624 tree newref
, newoff
;
8625 gimple
*incr
, *assign
;
8626 tree size
= TYPE_SIZE (ltype
);
8627 /* Extract the i'th component. */
8628 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
8629 bitsize_int (i
), size
);
8630 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
8633 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
8637 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8639 newref
= build2 (MEM_REF
, ltype
,
8640 running_off
, this_off
);
8641 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
8643 /* And store it to *running_off. */
8644 assign
= gimple_build_assign (newref
, elem
);
8645 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
8649 || group_el
== group_size
)
8651 newoff
= copy_ssa_name (running_off
, NULL
);
8652 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8653 running_off
, stride_step
);
8654 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8656 running_off
= newoff
;
8659 if (g
== group_size
- 1
8662 if (j
== 0 && i
== 0)
8664 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
8668 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8669 vec_oprnds
.truncate(0);
8676 if (n_adjacent_stores
> 0)
8677 vect_get_store_cost (vinfo
, stmt_info
, n_adjacent_stores
,
8678 alignment_support_scheme
, misalignment
,
8679 &inside_cost
, cost_vec
);
8680 if (dump_enabled_p ())
8681 dump_printf_loc (MSG_NOTE
, vect_location
,
8682 "vect_model_store_cost: inside_cost = %d, "
8683 "prologue_cost = %d .\n",
8684 inside_cost
, prologue_cost
);
8690 gcc_assert (alignment_support_scheme
);
8691 vec_loop_masks
*loop_masks
8692 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8693 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8695 vec_loop_lens
*loop_lens
8696 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
8697 ? &LOOP_VINFO_LENS (loop_vinfo
)
8700 /* The vect_transform_stmt and vect_analyze_stmt will go here but there
8701 are some difference here. We cannot enable both the lens and masks
8702 during transform but it is allowed during analysis.
8703 Shouldn't go with length-based approach if fully masked. */
8704 if (cost_vec
== NULL
)
8705 /* The cost_vec is NULL during transfrom. */
8706 gcc_assert ((!loop_lens
|| !loop_masks
));
8708 /* Targets with store-lane instructions must not require explicit
8709 realignment. vect_supportable_dr_alignment always returns either
8710 dr_aligned or dr_unaligned_supported for masked operations. */
8711 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8714 || alignment_support_scheme
== dr_aligned
8715 || alignment_support_scheme
== dr_unaligned_supported
);
8717 tree offset
= NULL_TREE
;
8718 if (!known_eq (poffset
, 0))
8719 offset
= size_int (poffset
);
8722 tree vec_offset
= NULL_TREE
;
8723 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8725 aggr_type
= NULL_TREE
;
8728 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8730 aggr_type
= elem_type
;
8732 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, gsi
, &gs_info
,
8733 &bump
, &vec_offset
, loop_lens
);
8737 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8738 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8740 aggr_type
= vectype
;
8741 bump
= vect_get_data_ptr_increment (vinfo
, gsi
, dr_info
, aggr_type
,
8742 memory_access_type
, loop_lens
);
8745 if (mask
&& !costing_p
)
8746 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8748 /* In case the vectorization factor (VF) is bigger than the number
8749 of elements that we can fit in a vectype (nunits), we have to generate
8750 more than one vector stmt - i.e - we need to "unroll" the
8751 vector stmt by a factor VF/nunits. */
8753 /* In case of interleaving (non-unit grouped access):
8760 We create vectorized stores starting from base address (the access of the
8761 first stmt in the chain (S2 in the above example), when the last store stmt
8762 of the chain (S4) is reached:
8765 VS2: &base + vec_size*1 = vx0
8766 VS3: &base + vec_size*2 = vx1
8767 VS4: &base + vec_size*3 = vx3
8769 Then permutation statements are generated:
8771 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8772 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8775 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8776 (the order of the data-refs in the output of vect_permute_store_chain
8777 corresponds to the order of scalar stmts in the interleaving chain - see
8778 the documentation of vect_permute_store_chain()).
8780 In case of both multiple types and interleaving, above vector stores and
8781 permutation stmts are created for every copy. The result vector stmts are
8782 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8783 STMT_VINFO_RELATED_STMT for the next copies.
8786 auto_vec
<tree
> dr_chain (group_size
);
8787 auto_vec
<tree
> vec_masks
;
8788 tree vec_mask
= NULL
;
8789 auto_delete_vec
<auto_vec
<tree
>> gvec_oprnds (group_size
);
8790 for (i
= 0; i
< group_size
; i
++)
8791 gvec_oprnds
.quick_push (new auto_vec
<tree
> ());
8793 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8795 gcc_assert (!slp
&& grouped_store
);
8796 unsigned inside_cost
= 0, prologue_cost
= 0;
8797 /* For costing some adjacent vector stores, we'd like to cost with
8798 the total number of them once instead of cost each one by one. */
8799 unsigned int n_adjacent_stores
= 0;
8800 for (j
= 0; j
< ncopies
; j
++)
8805 /* For interleaved stores we collect vectorized defs for all
8806 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8807 as an input to vect_permute_store_chain(). */
8808 stmt_vec_info next_stmt_info
= first_stmt_info
;
8809 for (i
= 0; i
< group_size
; i
++)
8811 /* Since gaps are not supported for interleaved stores,
8812 DR_GROUP_SIZE is the exact number of stmts in the
8813 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8814 op
= vect_get_store_rhs (next_stmt_info
);
8816 update_prologue_cost (&prologue_cost
, op
);
8819 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
8822 vec_oprnd
= (*gvec_oprnds
[i
])[0];
8823 dr_chain
.quick_push (vec_oprnd
);
8825 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8832 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
8835 vec_mask
= vec_masks
[0];
8838 /* We should have catched mismatched types earlier. */
8840 useless_type_conversion_p (vectype
, TREE_TYPE (vec_oprnd
)));
8842 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
,
8843 aggr_type
, NULL
, offset
, &dummy
,
8844 gsi
, &ptr_incr
, false, bump
);
8847 else if (!costing_p
)
8849 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
8850 /* DR_CHAIN is then used as an input to
8851 vect_permute_store_chain(). */
8852 for (i
= 0; i
< group_size
; i
++)
8854 vec_oprnd
= (*gvec_oprnds
[i
])[j
];
8855 dr_chain
[i
] = vec_oprnd
;
8858 vec_mask
= vec_masks
[j
];
8859 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8865 n_adjacent_stores
+= vec_num
;
8869 /* Get an array into which we can store the individual vectors. */
8870 tree vec_array
= create_vector_array (vectype
, vec_num
);
8872 /* Invalidate the current contents of VEC_ARRAY. This should
8873 become an RTL clobber too, which prevents the vector registers
8874 from being upward-exposed. */
8875 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8877 /* Store the individual vectors into the array. */
8878 for (i
= 0; i
< vec_num
; i
++)
8880 vec_oprnd
= dr_chain
[i
];
8881 write_vector_array (vinfo
, stmt_info
, gsi
, vec_oprnd
, vec_array
,
8885 tree final_mask
= NULL
;
8886 tree final_len
= NULL
;
8889 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
8890 ncopies
, vectype
, j
);
8892 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
8895 if (lanes_ifn
== IFN_MASK_LEN_STORE_LANES
)
8898 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
8899 ncopies
, vectype
, j
, 1);
8901 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
8903 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
8904 bias
= build_int_cst (intQI_type_node
, biasval
);
8907 mask_vectype
= truth_type_for (vectype
);
8908 final_mask
= build_minus_one_cst (mask_vectype
);
8913 if (final_len
&& final_mask
)
8916 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8917 LEN, BIAS, VEC_ARRAY). */
8918 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8919 tree alias_ptr
= build_int_cst (ref_type
, align
);
8920 call
= gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES
, 6,
8921 dataref_ptr
, alias_ptr
,
8922 final_mask
, final_len
, bias
,
8925 else if (final_mask
)
8928 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8930 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8931 tree alias_ptr
= build_int_cst (ref_type
, align
);
8932 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8933 dataref_ptr
, alias_ptr
,
8934 final_mask
, vec_array
);
8939 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8940 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8941 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
8942 gimple_call_set_lhs (call
, data_ref
);
8944 gimple_call_set_nothrow (call
, true);
8945 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8948 /* Record that VEC_ARRAY is now dead. */
8949 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8951 *vec_stmt
= new_stmt
;
8952 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8957 if (n_adjacent_stores
> 0)
8958 vect_get_store_cost (vinfo
, stmt_info
, n_adjacent_stores
,
8959 alignment_support_scheme
, misalignment
,
8960 &inside_cost
, cost_vec
);
8961 if (dump_enabled_p ())
8962 dump_printf_loc (MSG_NOTE
, vect_location
,
8963 "vect_model_store_cost: inside_cost = %d, "
8964 "prologue_cost = %d .\n",
8965 inside_cost
, prologue_cost
);
8971 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8973 gcc_assert (!grouped_store
);
8974 auto_vec
<tree
> vec_offsets
;
8975 unsigned int inside_cost
= 0, prologue_cost
= 0;
8976 for (j
= 0; j
< ncopies
; j
++)
8981 if (costing_p
&& vls_type
== VLS_STORE_INVARIANT
)
8982 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
8983 stmt_info
, 0, vect_prologue
);
8984 else if (!costing_p
)
8986 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
8987 DR_CHAIN is of size 1. */
8988 gcc_assert (group_size
== 1);
8990 vect_get_slp_defs (op_node
, gvec_oprnds
[0]);
8992 vect_get_vec_defs_for_operand (vinfo
, first_stmt_info
,
8993 ncopies
, op
, gvec_oprnds
[0]);
8997 vect_get_slp_defs (mask_node
, &vec_masks
);
8999 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
9005 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9006 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
9008 &dataref_ptr
, &vec_offsets
);
9011 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
,
9012 aggr_type
, NULL
, offset
,
9013 &dummy
, gsi
, &ptr_incr
, false,
9017 else if (!costing_p
)
9019 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
9020 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9021 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9022 gsi
, stmt_info
, bump
);
9026 for (i
= 0; i
< vec_num
; ++i
)
9030 vec_oprnd
= (*gvec_oprnds
[0])[vec_num
* j
+ i
];
9032 vec_mask
= vec_masks
[vec_num
* j
+ i
];
9033 /* We should have catched mismatched types earlier. */
9034 gcc_assert (useless_type_conversion_p (vectype
,
9035 TREE_TYPE (vec_oprnd
)));
9037 unsigned HOST_WIDE_INT align
;
9038 tree final_mask
= NULL_TREE
;
9039 tree final_len
= NULL_TREE
;
9040 tree bias
= NULL_TREE
;
9044 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
,
9045 loop_masks
, ncopies
,
9048 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
9049 final_mask
, vec_mask
, gsi
);
9052 if (gs_info
.ifn
!= IFN_LAST
)
9056 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
9058 += record_stmt_cost (cost_vec
, cnunits
, scalar_store
,
9059 stmt_info
, 0, vect_body
);
9063 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9064 vec_offset
= vec_offsets
[vec_num
* j
+ i
];
9065 tree scale
= size_int (gs_info
.scale
);
9067 if (gs_info
.ifn
== IFN_MASK_LEN_SCATTER_STORE
)
9070 final_len
= vect_get_loop_len (loop_vinfo
, gsi
,
9074 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9076 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
9077 bias
= build_int_cst (intQI_type_node
, biasval
);
9080 mask_vectype
= truth_type_for (vectype
);
9081 final_mask
= build_minus_one_cst (mask_vectype
);
9086 if (final_len
&& final_mask
)
9087 call
= gimple_build_call_internal
9088 (IFN_MASK_LEN_SCATTER_STORE
, 7, dataref_ptr
,
9089 vec_offset
, scale
, vec_oprnd
, final_mask
,
9091 else if (final_mask
)
9092 call
= gimple_build_call_internal
9093 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
,
9094 vec_offset
, scale
, vec_oprnd
, final_mask
);
9096 call
= gimple_build_call_internal (IFN_SCATTER_STORE
, 4,
9097 dataref_ptr
, vec_offset
,
9099 gimple_call_set_nothrow (call
, true);
9100 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9103 else if (gs_info
.decl
)
9105 /* The builtin decls path for scatter is legacy, x86 only. */
9106 gcc_assert (nunits
.is_constant ()
9108 || SCALAR_INT_MODE_P
9109 (TYPE_MODE (TREE_TYPE (final_mask
)))));
9112 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
9114 += record_stmt_cost (cost_vec
, cnunits
, scalar_store
,
9115 stmt_info
, 0, vect_body
);
9118 poly_uint64 offset_nunits
9119 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
9120 if (known_eq (nunits
, offset_nunits
))
9122 new_stmt
= vect_build_one_scatter_store_call
9123 (vinfo
, stmt_info
, gsi
, &gs_info
,
9124 dataref_ptr
, vec_offsets
[vec_num
* j
+ i
],
9125 vec_oprnd
, final_mask
);
9126 vect_finish_stmt_generation (vinfo
, stmt_info
,
9129 else if (known_eq (nunits
, offset_nunits
* 2))
9131 /* We have a offset vector with half the number of
9132 lanes but the builtins will store full vectype
9133 data from the lower lanes. */
9134 new_stmt
= vect_build_one_scatter_store_call
9135 (vinfo
, stmt_info
, gsi
, &gs_info
,
9137 vec_offsets
[2 * vec_num
* j
+ 2 * i
],
9138 vec_oprnd
, final_mask
);
9139 vect_finish_stmt_generation (vinfo
, stmt_info
,
9141 int count
= nunits
.to_constant ();
9142 vec_perm_builder
sel (count
, count
, 1);
9143 sel
.quick_grow (count
);
9144 for (int i
= 0; i
< count
; ++i
)
9145 sel
[i
] = i
| (count
/ 2);
9146 vec_perm_indices
indices (sel
, 2, count
);
9148 = vect_gen_perm_mask_checked (vectype
, indices
);
9149 new_stmt
= gimple_build_assign (NULL_TREE
, VEC_PERM_EXPR
,
9150 vec_oprnd
, vec_oprnd
,
9152 vec_oprnd
= make_ssa_name (vectype
);
9153 gimple_set_lhs (new_stmt
, vec_oprnd
);
9154 vect_finish_stmt_generation (vinfo
, stmt_info
,
9158 new_stmt
= gimple_build_assign (NULL_TREE
,
9161 final_mask
= make_ssa_name
9162 (truth_type_for (gs_info
.offset_vectype
));
9163 gimple_set_lhs (new_stmt
, final_mask
);
9164 vect_finish_stmt_generation (vinfo
, stmt_info
,
9167 new_stmt
= vect_build_one_scatter_store_call
9168 (vinfo
, stmt_info
, gsi
, &gs_info
,
9170 vec_offsets
[2 * vec_num
* j
+ 2 * i
+ 1],
9171 vec_oprnd
, final_mask
);
9172 vect_finish_stmt_generation (vinfo
, stmt_info
,
9175 else if (known_eq (nunits
* 2, offset_nunits
))
9177 /* We have a offset vector with double the number of
9178 lanes. Select the low/high part accordingly. */
9179 vec_offset
= vec_offsets
[(vec_num
* j
+ i
) / 2];
9180 if ((vec_num
* j
+ i
) & 1)
9182 int count
= offset_nunits
.to_constant ();
9183 vec_perm_builder
sel (count
, count
, 1);
9184 sel
.quick_grow (count
);
9185 for (int i
= 0; i
< count
; ++i
)
9186 sel
[i
] = i
| (count
/ 2);
9187 vec_perm_indices
indices (sel
, 2, count
);
9188 tree perm_mask
= vect_gen_perm_mask_checked
9189 (TREE_TYPE (vec_offset
), indices
);
9190 new_stmt
= gimple_build_assign (NULL_TREE
,
9195 vec_offset
= make_ssa_name (TREE_TYPE (vec_offset
));
9196 gimple_set_lhs (new_stmt
, vec_offset
);
9197 vect_finish_stmt_generation (vinfo
, stmt_info
,
9200 new_stmt
= vect_build_one_scatter_store_call
9201 (vinfo
, stmt_info
, gsi
, &gs_info
,
9202 dataref_ptr
, vec_offset
,
9203 vec_oprnd
, final_mask
);
9204 vect_finish_stmt_generation (vinfo
, stmt_info
,
9212 /* Emulated scatter. */
9213 gcc_assert (!final_mask
);
9216 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
9217 /* For emulated scatter N offset vector element extracts
9218 (we assume the scalar scaling and ptr + offset add is
9219 consumed by the load). */
9221 += record_stmt_cost (cost_vec
, cnunits
, vec_to_scalar
,
9222 stmt_info
, 0, vect_body
);
9223 /* N scalar stores plus extracting the elements. */
9225 += record_stmt_cost (cost_vec
, cnunits
, vec_to_scalar
,
9226 stmt_info
, 0, vect_body
);
9228 += record_stmt_cost (cost_vec
, cnunits
, scalar_store
,
9229 stmt_info
, 0, vect_body
);
9233 unsigned HOST_WIDE_INT const_nunits
= nunits
.to_constant ();
9234 unsigned HOST_WIDE_INT const_offset_nunits
9235 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
).to_constant ();
9236 vec
<constructor_elt
, va_gc
> *ctor_elts
;
9237 vec_alloc (ctor_elts
, const_nunits
);
9238 gimple_seq stmts
= NULL
;
9239 tree elt_type
= TREE_TYPE (vectype
);
9240 unsigned HOST_WIDE_INT elt_size
9241 = tree_to_uhwi (TYPE_SIZE (elt_type
));
9242 /* We support offset vectors with more elements
9243 than the data vector for now. */
9244 unsigned HOST_WIDE_INT factor
9245 = const_offset_nunits
/ const_nunits
;
9246 vec_offset
= vec_offsets
[(vec_num
* j
+ i
) / factor
];
9248 = ((vec_num
* j
+ i
) % factor
) * const_nunits
;
9249 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
9250 tree scale
= size_int (gs_info
.scale
);
9251 align
= get_object_alignment (DR_REF (first_dr_info
->dr
));
9252 tree ltype
= build_aligned_type (TREE_TYPE (vectype
), align
);
9253 for (unsigned k
= 0; k
< const_nunits
; ++k
)
9255 /* Compute the offsetted pointer. */
9256 tree boff
= size_binop (MULT_EXPR
, TYPE_SIZE (idx_type
),
9257 bitsize_int (k
+ elt_offset
));
9259 = gimple_build (&stmts
, BIT_FIELD_REF
, idx_type
,
9260 vec_offset
, TYPE_SIZE (idx_type
), boff
);
9261 idx
= gimple_convert (&stmts
, sizetype
, idx
);
9262 idx
= gimple_build (&stmts
, MULT_EXPR
, sizetype
,
9265 = gimple_build (&stmts
, PLUS_EXPR
,
9266 TREE_TYPE (dataref_ptr
),
9268 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
9269 /* Extract the element to be stored. */
9271 = gimple_build (&stmts
, BIT_FIELD_REF
,
9272 TREE_TYPE (vectype
),
9273 vec_oprnd
, TYPE_SIZE (elt_type
),
9274 bitsize_int (k
* elt_size
));
9275 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
9278 = build2 (MEM_REF
, ltype
, ptr
,
9279 build_int_cst (ref_type
, 0));
9280 new_stmt
= gimple_build_assign (ref
, elt
);
9281 vect_finish_stmt_generation (vinfo
, stmt_info
,
9285 slp_node
->push_vec_def (new_stmt
);
9288 if (!slp
&& !costing_p
)
9289 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9292 if (!slp
&& !costing_p
)
9293 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9295 if (costing_p
&& dump_enabled_p ())
9296 dump_printf_loc (MSG_NOTE
, vect_location
,
9297 "vect_model_store_cost: inside_cost = %d, "
9298 "prologue_cost = %d .\n",
9299 inside_cost
, prologue_cost
);
9304 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS
9305 || memory_access_type
== VMAT_CONTIGUOUS_DOWN
9306 || memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
9307 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
);
9309 unsigned inside_cost
= 0, prologue_cost
= 0;
9310 /* For costing some adjacent vector stores, we'd like to cost with
9311 the total number of them once instead of cost each one by one. */
9312 unsigned int n_adjacent_stores
= 0;
9313 auto_vec
<tree
> result_chain (group_size
);
9314 auto_vec
<tree
, 1> vec_oprnds
;
9315 for (j
= 0; j
< ncopies
; j
++)
9320 if (slp
&& !costing_p
)
9322 /* Get vectorized arguments for SLP_NODE. */
9323 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1, op
,
9324 &vec_oprnds
, mask
, &vec_masks
);
9325 vec_oprnd
= vec_oprnds
[0];
9327 vec_mask
= vec_masks
[0];
9331 /* For interleaved stores we collect vectorized defs for all the
9332 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9333 input to vect_permute_store_chain().
9335 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9337 stmt_vec_info next_stmt_info
= first_stmt_info
;
9338 for (i
= 0; i
< group_size
; i
++)
9340 /* Since gaps are not supported for interleaved stores,
9341 DR_GROUP_SIZE is the exact number of stmts in the chain.
9342 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9343 that there is no interleaving, DR_GROUP_SIZE is 1,
9344 and only one iteration of the loop will be executed. */
9345 op
= vect_get_store_rhs (next_stmt_info
);
9347 update_prologue_cost (&prologue_cost
, op
);
9350 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
9353 vec_oprnd
= (*gvec_oprnds
[i
])[0];
9354 dr_chain
.quick_push (vec_oprnd
);
9356 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9358 if (mask
&& !costing_p
)
9360 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
9363 vec_mask
= vec_masks
[0];
9367 /* We should have catched mismatched types earlier. */
9368 gcc_assert (costing_p
9369 || useless_type_conversion_p (vectype
,
9370 TREE_TYPE (vec_oprnd
)));
9371 bool simd_lane_access_p
9372 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9374 && simd_lane_access_p
9376 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9377 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9378 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9379 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9380 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9381 get_alias_set (TREE_TYPE (ref_type
))))
9383 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9384 dataref_offset
= build_int_cst (ref_type
, 0);
9386 else if (!costing_p
)
9388 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9389 simd_lane_access_p
? loop
: NULL
,
9390 offset
, &dummy
, gsi
, &ptr_incr
,
9391 simd_lane_access_p
, bump
);
9393 else if (!costing_p
)
9395 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
9396 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9397 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9399 for (i
= 0; i
< group_size
; i
++)
9401 vec_oprnd
= (*gvec_oprnds
[i
])[j
];
9402 dr_chain
[i
] = vec_oprnd
;
9405 vec_mask
= vec_masks
[j
];
9407 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
9409 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9417 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
);
9420 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
9421 int nstmts
= ceil_log2 (group_size
) * group_size
;
9422 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
9423 stmt_info
, 0, vect_body
);
9424 if (dump_enabled_p ())
9425 dump_printf_loc (MSG_NOTE
, vect_location
,
9426 "vect_model_store_cost: "
9427 "strided group_size = %d .\n",
9431 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
9432 gsi
, &result_chain
);
9435 stmt_vec_info next_stmt_info
= first_stmt_info
;
9436 for (i
= 0; i
< vec_num
; i
++)
9441 vec_oprnd
= vec_oprnds
[i
];
9442 else if (grouped_store
)
9443 /* For grouped stores vectorized defs are interleaved in
9444 vect_permute_store_chain(). */
9445 vec_oprnd
= result_chain
[i
];
9448 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9451 inside_cost
+= record_stmt_cost (cost_vec
, 1, vec_perm
,
9452 stmt_info
, 0, vect_body
);
9455 tree perm_mask
= perm_mask_for_reverse (vectype
);
9456 tree perm_dest
= vect_create_destination_var (
9457 vect_get_store_rhs (stmt_info
), vectype
);
9458 tree new_temp
= make_ssa_name (perm_dest
);
9460 /* Generate the permute statement. */
9462 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
9463 vec_oprnd
, perm_mask
);
9464 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
,
9467 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9468 vec_oprnd
= new_temp
;
9474 n_adjacent_stores
++;
9478 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9479 if (!next_stmt_info
)
9486 tree final_mask
= NULL_TREE
;
9487 tree final_len
= NULL_TREE
;
9488 tree bias
= NULL_TREE
;
9490 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
9491 vec_num
* ncopies
, vectype
,
9493 if (slp
&& vec_mask
)
9494 vec_mask
= vec_masks
[i
];
9496 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
9500 /* Bump the vector pointer. */
9501 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9505 unsigned HOST_WIDE_INT align
;
9506 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9507 if (alignment_support_scheme
== dr_aligned
)
9509 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
9511 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
9515 misalign
= misalignment
;
9516 if (dataref_offset
== NULL_TREE
9517 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9518 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
9520 align
= least_bit_hwi (misalign
| align
);
9522 /* Compute IFN when LOOP_LENS or final_mask valid. */
9523 machine_mode vmode
= TYPE_MODE (vectype
);
9524 machine_mode new_vmode
= vmode
;
9525 internal_fn partial_ifn
= IFN_LAST
;
9528 opt_machine_mode new_ovmode
9529 = get_len_load_store_mode (vmode
, false, &partial_ifn
);
9530 new_vmode
= new_ovmode
.require ();
9532 = (new_ovmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vmode
);
9533 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
9534 vec_num
* ncopies
, vectype
,
9535 vec_num
* j
+ i
, factor
);
9537 else if (final_mask
)
9539 if (!can_vec_mask_load_store_p (
9540 vmode
, TYPE_MODE (TREE_TYPE (final_mask
)), false,
9545 if (partial_ifn
== IFN_MASK_LEN_STORE
)
9549 /* Pass VF value to 'len' argument of
9550 MASK_LEN_STORE if LOOP_LENS is invalid. */
9551 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9555 /* Pass all ones value to 'mask' argument of
9556 MASK_LEN_STORE if final_mask is invalid. */
9557 mask_vectype
= truth_type_for (vectype
);
9558 final_mask
= build_minus_one_cst (mask_vectype
);
9564 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
9566 bias
= build_int_cst (intQI_type_node
, biasval
);
9569 /* Arguments are ready. Create the new vector stmt. */
9573 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
9574 /* Need conversion if it's wrapped with VnQI. */
9575 if (vmode
!= new_vmode
)
9578 = build_vector_type_for_mode (unsigned_intQI_type_node
,
9580 tree var
= vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
9581 vec_oprnd
= build1 (VIEW_CONVERT_EXPR
, new_vtype
, vec_oprnd
);
9583 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, vec_oprnd
);
9584 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9588 if (partial_ifn
== IFN_MASK_LEN_STORE
)
9589 call
= gimple_build_call_internal (IFN_MASK_LEN_STORE
, 6,
9590 dataref_ptr
, ptr
, final_mask
,
9591 final_len
, bias
, vec_oprnd
);
9593 call
= gimple_build_call_internal (IFN_LEN_STORE
, 5,
9594 dataref_ptr
, ptr
, final_len
,
9596 gimple_call_set_nothrow (call
, true);
9597 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9600 else if (final_mask
)
9602 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
9604 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
9605 ptr
, final_mask
, vec_oprnd
);
9606 gimple_call_set_nothrow (call
, true);
9607 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9613 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
9614 dataref_offset
? dataref_offset
9615 : build_int_cst (ref_type
, 0));
9616 if (alignment_support_scheme
== dr_aligned
)
9619 TREE_TYPE (data_ref
)
9620 = build_aligned_type (TREE_TYPE (data_ref
),
9621 align
* BITS_PER_UNIT
);
9622 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9623 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
9624 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9630 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9631 if (!next_stmt_info
)
9634 if (!slp
&& !costing_p
)
9637 *vec_stmt
= new_stmt
;
9638 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9644 if (n_adjacent_stores
> 0)
9645 vect_get_store_cost (vinfo
, stmt_info
, n_adjacent_stores
,
9646 alignment_support_scheme
, misalignment
,
9647 &inside_cost
, cost_vec
);
9649 /* When vectorizing a store into the function result assign
9650 a penalty if the function returns in a multi-register location.
9651 In this case we assume we'll end up with having to spill the
9652 vector result and do piecewise loads as a conservative estimate. */
9653 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
9655 && (TREE_CODE (base
) == RESULT_DECL
9656 || (DECL_P (base
) && cfun_returns (base
)))
9657 && !aggregate_value_p (base
, cfun
->decl
))
9659 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
9660 /* ??? Handle PARALLEL in some way. */
9663 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
9664 /* Assume that a single reg-reg move is possible and cheap,
9665 do not account for vector to gp register move cost. */
9670 += record_stmt_cost (cost_vec
, ncopies
, vector_store
,
9671 stmt_info
, 0, vect_epilogue
);
9674 += record_stmt_cost (cost_vec
, ncopies
* nregs
, scalar_load
,
9675 stmt_info
, 0, vect_epilogue
);
9679 if (dump_enabled_p ())
9680 dump_printf_loc (MSG_NOTE
, vect_location
,
9681 "vect_model_store_cost: inside_cost = %d, "
9682 "prologue_cost = %d .\n",
9683 inside_cost
, prologue_cost
);
9689 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9690 VECTOR_CST mask. No checks are made that the target platform supports the
9691 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9692 vect_gen_perm_mask_checked. */
9695 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
9699 poly_uint64 nunits
= sel
.length ();
9700 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
9702 mask_type
= build_vector_type (ssizetype
, nunits
);
9703 return vec_perm_indices_to_tree (mask_type
, sel
);
9706 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9707 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9710 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
9712 machine_mode vmode
= TYPE_MODE (vectype
);
9713 gcc_assert (can_vec_perm_const_p (vmode
, vmode
, sel
));
9714 return vect_gen_perm_mask_any (vectype
, sel
);
9717 /* Given a vector variable X and Y, that was generated for the scalar
9718 STMT_INFO, generate instructions to permute the vector elements of X and Y
9719 using permutation mask MASK_VEC, insert them at *GSI and return the
9720 permuted vector variable. */
9723 permute_vec_elements (vec_info
*vinfo
,
9724 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
9725 gimple_stmt_iterator
*gsi
)
9727 tree vectype
= TREE_TYPE (x
);
9728 tree perm_dest
, data_ref
;
9731 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
9732 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
9733 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9735 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
9736 data_ref
= make_ssa_name (perm_dest
);
9738 /* Generate the permute statement. */
9739 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
9740 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
9745 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9746 inserting them on the loops preheader edge. Returns true if we
9747 were successful in doing so (and thus STMT_INFO can be moved then),
9748 otherwise returns false. HOIST_P indicates if we want to hoist the
9749 definitions of all SSA uses, it would be false when we are costing. */
9752 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
, bool hoist_p
)
9758 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
9760 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
9761 if (!gimple_nop_p (def_stmt
)
9762 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
9764 /* Make sure we don't need to recurse. While we could do
9765 so in simple cases when there are more complex use webs
9766 we don't have an easy way to preserve stmt order to fulfil
9767 dependencies within them. */
9770 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
9772 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
9774 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
9775 if (!gimple_nop_p (def_stmt2
)
9776 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
9789 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
9791 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
9792 if (!gimple_nop_p (def_stmt
)
9793 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
9795 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
9796 gsi_remove (&gsi
, false);
9797 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
9804 /* vectorizable_load.
9806 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9807 that can be vectorized.
9808 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9809 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9810 Return true if STMT_INFO is vectorizable in this way. */
9813 vectorizable_load (vec_info
*vinfo
,
9814 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9815 gimple
**vec_stmt
, slp_tree slp_node
,
9816 stmt_vector_for_cost
*cost_vec
)
9819 tree vec_dest
= NULL
;
9820 tree data_ref
= NULL
;
9821 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
9822 class loop
*loop
= NULL
;
9823 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
9824 bool nested_in_vect_loop
= false;
9826 /* Avoid false positive uninitialized warning, see PR110652. */
9827 tree new_temp
= NULL_TREE
;
9830 tree dataref_ptr
= NULL_TREE
;
9831 tree dataref_offset
= NULL_TREE
;
9832 gimple
*ptr_incr
= NULL
;
9835 unsigned int group_size
;
9836 poly_uint64 group_gap_adj
;
9837 tree msq
= NULL_TREE
, lsq
;
9838 tree realignment_token
= NULL_TREE
;
9840 vec
<tree
> dr_chain
= vNULL
;
9841 bool grouped_load
= false;
9842 stmt_vec_info first_stmt_info
;
9843 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
9844 bool compute_in_loop
= false;
9845 class loop
*at_loop
;
9847 bool slp
= (slp_node
!= NULL
);
9848 bool slp_perm
= false;
9849 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
9852 gather_scatter_info gs_info
;
9854 enum vect_def_type mask_dt
= vect_unknown_def_type
;
9856 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9859 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
9863 if (!STMT_VINFO_DATA_REF (stmt_info
))
9866 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
9867 int mask_index
= -1;
9868 slp_tree slp_op
= NULL
;
9869 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
9871 scalar_dest
= gimple_assign_lhs (assign
);
9872 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
9875 tree_code code
= gimple_assign_rhs_code (assign
);
9876 if (code
!= ARRAY_REF
9877 && code
!= BIT_FIELD_REF
9878 && code
!= INDIRECT_REF
9879 && code
!= COMPONENT_REF
9880 && code
!= IMAGPART_EXPR
9881 && code
!= REALPART_EXPR
9883 && TREE_CODE_CLASS (code
) != tcc_declaration
)
9888 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
9889 if (!call
|| !gimple_call_internal_p (call
))
9892 internal_fn ifn
= gimple_call_internal_fn (call
);
9893 if (!internal_load_fn_p (ifn
))
9896 scalar_dest
= gimple_call_lhs (call
);
9900 mask_index
= internal_fn_mask_index (ifn
);
9901 if (mask_index
>= 0 && slp_node
)
9902 mask_index
= vect_slp_child_index_for_operand
9903 (call
, mask_index
, STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
9905 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
9906 &mask
, &slp_op
, &mask_dt
, &mask_vectype
))
9910 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9911 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
9915 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
9916 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
9917 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
9922 /* Multiple types in SLP are handled by creating the appropriate number of
9923 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9928 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9930 gcc_assert (ncopies
>= 1);
9932 /* FORNOW. This restriction should be relaxed. */
9933 if (nested_in_vect_loop
&& ncopies
> 1)
9935 if (dump_enabled_p ())
9936 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9937 "multiple types in nested loop.\n");
9941 /* Invalidate assumptions made by dependence analysis when vectorization
9942 on the unrolled body effectively re-orders stmts. */
9944 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
9945 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
9946 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
9948 if (dump_enabled_p ())
9949 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9950 "cannot perform implicit CSE when unrolling "
9951 "with negative dependence distance\n");
9955 elem_type
= TREE_TYPE (vectype
);
9956 mode
= TYPE_MODE (vectype
);
9958 /* FORNOW. In some cases can vectorize even if data-type not supported
9959 (e.g. - data copies). */
9960 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
9962 if (dump_enabled_p ())
9963 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9964 "Aligned load, but unsupported type.\n");
9968 /* Check if the load is a part of an interleaving chain. */
9969 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
9971 grouped_load
= true;
9973 gcc_assert (!nested_in_vect_loop
);
9974 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
9976 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9977 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9979 /* Refuse non-SLP vectorization of SLP-only groups. */
9980 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
9982 if (dump_enabled_p ())
9983 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9984 "cannot vectorize load in non-SLP mode.\n");
9988 /* Invalidate assumptions made by dependence analysis when vectorization
9989 on the unrolled body effectively re-orders stmts. */
9990 if (!PURE_SLP_STMT (stmt_info
)
9991 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
9992 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
9993 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
9995 if (dump_enabled_p ())
9996 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9997 "cannot perform implicit CSE when performing "
9998 "group loads with negative dependence distance\n");
10005 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
10011 /* In BB vectorization we may not actually use a loaded vector
10012 accessing elements in excess of DR_GROUP_SIZE. */
10013 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
10014 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
10015 unsigned HOST_WIDE_INT nunits
;
10016 unsigned j
, k
, maxk
= 0;
10017 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
10020 tree vectype
= SLP_TREE_VECTYPE (slp_node
);
10021 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
10022 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
10024 if (dump_enabled_p ())
10025 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10026 "BB vectorization with gaps at the end of "
10027 "a load is not supported\n");
10032 auto_vec
<tree
> tem
;
10034 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
10037 if (dump_enabled_p ())
10038 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
10040 "unsupported load permutation\n");
10045 vect_memory_access_type memory_access_type
;
10046 enum dr_alignment_support alignment_support_scheme
;
10048 poly_int64 poffset
;
10049 internal_fn lanes_ifn
;
10050 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, VLS_LOAD
,
10051 ncopies
, &memory_access_type
, &poffset
,
10052 &alignment_support_scheme
, &misalignment
, &gs_info
,
10058 if (memory_access_type
== VMAT_CONTIGUOUS
)
10060 machine_mode vec_mode
= TYPE_MODE (vectype
);
10061 if (!VECTOR_MODE_P (vec_mode
)
10062 || !can_vec_mask_load_store_p (vec_mode
,
10063 TYPE_MODE (mask_vectype
), true))
10066 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
10067 && memory_access_type
!= VMAT_GATHER_SCATTER
)
10069 if (dump_enabled_p ())
10070 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10071 "unsupported access type for masked load.\n");
10074 else if (memory_access_type
== VMAT_GATHER_SCATTER
10075 && gs_info
.ifn
== IFN_LAST
10078 if (dump_enabled_p ())
10079 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10080 "unsupported masked emulated gather.\n");
10085 bool costing_p
= !vec_stmt
;
10087 if (costing_p
) /* transformation not required. */
10091 && !vect_maybe_update_slp_op_vectype (slp_op
,
10094 if (dump_enabled_p ())
10095 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10096 "incompatible vector types for invariants\n");
10101 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
10104 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
10105 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
10106 VLS_LOAD
, group_size
,
10107 memory_access_type
, &gs_info
,
10110 if (dump_enabled_p ()
10111 && memory_access_type
!= VMAT_ELEMENTWISE
10112 && memory_access_type
!= VMAT_GATHER_SCATTER
10113 && alignment_support_scheme
!= dr_aligned
)
10114 dump_printf_loc (MSG_NOTE
, vect_location
,
10115 "Vectorizing an unaligned access.\n");
10117 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
10118 vinfo
->any_known_not_updated_vssa
= true;
10120 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
10124 gcc_assert (memory_access_type
10125 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
10127 if (dump_enabled_p () && !costing_p
)
10128 dump_printf_loc (MSG_NOTE
, vect_location
,
10129 "transform load. ncopies = %d\n", ncopies
);
10133 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
10134 ensure_base_align (dr_info
);
10136 if (memory_access_type
== VMAT_INVARIANT
)
10138 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
10139 /* If we have versioned for aliasing or the loop doesn't
10140 have any data dependencies that would preclude this,
10141 then we are sure this is a loop invariant load and
10142 thus we can insert it on the preheader edge.
10143 TODO: hoist_defs_of_uses should ideally be computed
10144 once at analysis time, remembered and used in the
10146 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
10147 && !nested_in_vect_loop
10148 && hoist_defs_of_uses (stmt_info
, loop
, !costing_p
));
10151 enum vect_cost_model_location cost_loc
10152 = hoist_p
? vect_prologue
: vect_body
;
10153 unsigned int cost
= record_stmt_cost (cost_vec
, 1, scalar_load
,
10154 stmt_info
, 0, cost_loc
);
10155 cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
, stmt_info
, 0,
10157 unsigned int prologue_cost
= hoist_p
? cost
: 0;
10158 unsigned int inside_cost
= hoist_p
? 0 : cost
;
10159 if (dump_enabled_p ())
10160 dump_printf_loc (MSG_NOTE
, vect_location
,
10161 "vect_model_load_cost: inside_cost = %d, "
10162 "prologue_cost = %d .\n",
10163 inside_cost
, prologue_cost
);
10168 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
10169 if (dump_enabled_p ())
10170 dump_printf_loc (MSG_NOTE
, vect_location
,
10171 "hoisting out of the vectorized loop: %G",
10173 scalar_dest
= copy_ssa_name (scalar_dest
);
10174 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
10175 edge pe
= loop_preheader_edge (loop
);
10176 gphi
*vphi
= get_virtual_phi (loop
->header
);
10179 vuse
= PHI_ARG_DEF_FROM_EDGE (vphi
, pe
);
10181 vuse
= gimple_vuse (gsi_stmt (*gsi
));
10182 gimple
*new_stmt
= gimple_build_assign (scalar_dest
, rhs
);
10183 gimple_set_vuse (new_stmt
, vuse
);
10184 gsi_insert_on_edge_immediate (pe
, new_stmt
);
10186 /* These copies are all equivalent. */
10188 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
10192 gimple_stmt_iterator gsi2
= *gsi
;
10194 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
10197 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
10199 for (j
= 0; j
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
); ++j
)
10200 slp_node
->push_vec_def (new_stmt
);
10203 for (j
= 0; j
< ncopies
; ++j
)
10204 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10205 *vec_stmt
= new_stmt
;
10210 if (memory_access_type
== VMAT_ELEMENTWISE
10211 || memory_access_type
== VMAT_STRIDED_SLP
)
10213 gimple_stmt_iterator incr_gsi
;
10218 vec
<constructor_elt
, va_gc
> *v
= NULL
;
10219 tree stride_base
, stride_step
, alias_off
;
10220 /* Checked by get_load_store_type. */
10221 unsigned int const_nunits
= nunits
.to_constant ();
10222 unsigned HOST_WIDE_INT cst_offset
= 0;
10224 unsigned int inside_cost
= 0;
10226 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
));
10227 gcc_assert (!nested_in_vect_loop
);
10231 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
10232 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
10236 first_stmt_info
= stmt_info
;
10237 first_dr_info
= dr_info
;
10240 if (slp
&& grouped_load
)
10242 group_size
= DR_GROUP_SIZE (first_stmt_info
);
10243 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
10249 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
10250 * vect_get_place_in_interleaving_chain (stmt_info
,
10253 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
10258 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
10259 stride_base
= fold_build_pointer_plus (
10260 DR_BASE_ADDRESS (first_dr_info
->dr
),
10261 size_binop (PLUS_EXPR
, convert_to_ptrofftype (dr_offset
),
10262 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
10263 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
10265 /* For a load with loop-invariant (but other than power-of-2)
10266 stride (i.e. not a grouped access) like so:
10268 for (i = 0; i < n; i += stride)
10271 we generate a new induction variable and new accesses to
10272 form a new vector (or vectors, depending on ncopies):
10274 for (j = 0; ; j += VF*stride)
10276 tmp2 = array[j + stride];
10278 vectemp = {tmp1, tmp2, ...}
10281 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
10282 build_int_cst (TREE_TYPE (stride_step
), vf
));
10284 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
10286 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
10287 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
10288 create_iv (stride_base
, PLUS_EXPR
, ivstep
, NULL
,
10289 loop
, &incr_gsi
, insert_after
,
10292 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
10295 running_off
= offvar
;
10296 alias_off
= build_int_cst (ref_type
, 0);
10297 int nloads
= const_nunits
;
10299 tree ltype
= TREE_TYPE (vectype
);
10300 tree lvectype
= vectype
;
10301 auto_vec
<tree
> dr_chain
;
10302 if (memory_access_type
== VMAT_STRIDED_SLP
)
10304 if (group_size
< const_nunits
)
10306 /* First check if vec_init optab supports construction from vector
10307 elts directly. Otherwise avoid emitting a constructor of
10308 vector elements by performing the loads using an integer type
10309 of the same size, constructing a vector of those and then
10310 re-interpreting it as the original vector type. This avoids a
10311 huge runtime penalty due to the general inability to perform
10312 store forwarding from smaller stores to a larger load. */
10315 = vector_vector_composition_type (vectype
,
10316 const_nunits
/ group_size
,
10318 if (vtype
!= NULL_TREE
)
10320 nloads
= const_nunits
/ group_size
;
10329 lnel
= const_nunits
;
10332 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
10334 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10335 else if (nloads
== 1)
10340 /* For SLP permutation support we need to load the whole group,
10341 not only the number of vector stmts the permutation result
10345 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10347 unsigned int const_vf
= vf
.to_constant ();
10348 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
10349 dr_chain
.create (ncopies
);
10352 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10354 unsigned int group_el
= 0;
10355 unsigned HOST_WIDE_INT
10356 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
10357 unsigned int n_groups
= 0;
10358 /* For costing some adjacent vector loads, we'd like to cost with
10359 the total number of them once instead of cost each one by one. */
10360 unsigned int n_adjacent_loads
= 0;
10361 for (j
= 0; j
< ncopies
; j
++)
10363 if (nloads
> 1 && !costing_p
)
10364 vec_alloc (v
, nloads
);
10365 gimple
*new_stmt
= NULL
;
10366 for (i
= 0; i
< nloads
; i
++)
10370 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10371 avoid ICE, see PR110776. */
10372 if (VECTOR_TYPE_P (ltype
)
10373 && memory_access_type
!= VMAT_ELEMENTWISE
)
10374 n_adjacent_loads
++;
10376 inside_cost
+= record_stmt_cost (cost_vec
, 1, scalar_load
,
10377 stmt_info
, 0, vect_body
);
10380 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
10381 group_el
* elsz
+ cst_offset
);
10382 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
10383 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
10384 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
10385 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10387 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
10388 gimple_assign_lhs (new_stmt
));
10392 || group_el
== group_size
)
10395 /* When doing SLP make sure to not load elements from
10396 the next vector iteration, those will not be accessed
10397 so just use the last element again. See PR107451. */
10398 if (!slp
|| known_lt (n_groups
, vf
))
10400 tree newoff
= copy_ssa_name (running_off
);
10402 = gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
10403 running_off
, stride_step
);
10404 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
10405 running_off
= newoff
;
10414 inside_cost
+= record_stmt_cost (cost_vec
, 1, vec_construct
,
10415 stmt_info
, 0, vect_body
);
10418 tree vec_inv
= build_constructor (lvectype
, v
);
10419 new_temp
= vect_init_vector (vinfo
, stmt_info
, vec_inv
,
10421 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
10422 if (lvectype
!= vectype
)
10425 = gimple_build_assign (make_ssa_name (vectype
),
10427 build1 (VIEW_CONVERT_EXPR
,
10428 vectype
, new_temp
));
10429 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
10440 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
10442 slp_node
->push_vec_def (new_stmt
);
10447 *vec_stmt
= new_stmt
;
10448 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10458 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
, vf
,
10459 true, &n_perms
, &n_loads
);
10460 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
10461 first_stmt_info
, 0, vect_body
);
10464 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
10470 if (n_adjacent_loads
> 0)
10471 vect_get_load_cost (vinfo
, stmt_info
, n_adjacent_loads
,
10472 alignment_support_scheme
, misalignment
, false,
10473 &inside_cost
, nullptr, cost_vec
, cost_vec
,
10475 if (dump_enabled_p ())
10476 dump_printf_loc (MSG_NOTE
, vect_location
,
10477 "vect_model_load_cost: inside_cost = %u, "
10478 "prologue_cost = 0 .\n",
10485 if (memory_access_type
== VMAT_GATHER_SCATTER
10486 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
10487 grouped_load
= false;
10490 || (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()))
10494 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
10495 group_size
= DR_GROUP_SIZE (first_stmt_info
);
10499 first_stmt_info
= stmt_info
;
10502 /* For SLP vectorization we directly vectorize a subchain
10503 without permutation. */
10504 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
10505 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
10506 /* For BB vectorization always use the first stmt to base
10507 the data ref pointer on. */
10509 first_stmt_info_for_drptr
10510 = vect_find_first_scalar_stmt_in_slp (slp_node
);
10512 /* Check if the chain of loads is already vectorized. */
10513 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
10514 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10515 ??? But we can only do so if there is exactly one
10516 as we have no way to get at the rest. Leave the CSE
10518 ??? With the group load eventually participating
10519 in multiple different permutations (having multiple
10520 slp nodes which refer to the same group) the CSE
10521 is even wrong code. See PR56270. */
10524 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10527 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
10530 /* VEC_NUM is the number of vect stmts to be created for this group. */
10533 grouped_load
= false;
10534 /* If an SLP permutation is from N elements to N elements,
10535 and if one vector holds a whole number of N, we can load
10536 the inputs to the permutation in the same way as an
10537 unpermuted sequence. In other cases we need to load the
10538 whole group, not only the number of vector stmts the
10539 permutation result fits in. */
10540 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
10542 && (group_size
!= scalar_lanes
10543 || !multiple_p (nunits
, group_size
)))
10545 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10546 variable VF; see vect_transform_slp_perm_load. */
10547 unsigned int const_vf
= vf
.to_constant ();
10548 unsigned int const_nunits
= nunits
.to_constant ();
10549 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
10550 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
10554 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10556 = group_size
- scalar_lanes
;
10560 vec_num
= group_size
;
10562 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
10566 first_stmt_info
= stmt_info
;
10567 first_dr_info
= dr_info
;
10568 group_size
= vec_num
= 1;
10570 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
10572 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10575 gcc_assert (alignment_support_scheme
);
10576 vec_loop_masks
*loop_masks
10577 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
10578 ? &LOOP_VINFO_MASKS (loop_vinfo
)
10580 vec_loop_lens
*loop_lens
10581 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
10582 ? &LOOP_VINFO_LENS (loop_vinfo
)
10585 /* The vect_transform_stmt and vect_analyze_stmt will go here but there
10586 are some difference here. We cannot enable both the lens and masks
10587 during transform but it is allowed during analysis.
10588 Shouldn't go with length-based approach if fully masked. */
10589 if (cost_vec
== NULL
)
10590 /* The cost_vec is NULL during transfrom. */
10591 gcc_assert ((!loop_lens
|| !loop_masks
));
10593 /* Targets with store-lane instructions must not require explicit
10594 realignment. vect_supportable_dr_alignment always returns either
10595 dr_aligned or dr_unaligned_supported for masked operations. */
10596 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
10599 || alignment_support_scheme
== dr_aligned
10600 || alignment_support_scheme
== dr_unaligned_supported
);
10602 /* In case the vectorization factor (VF) is bigger than the number
10603 of elements that we can fit in a vectype (nunits), we have to generate
10604 more than one vector stmt - i.e - we need to "unroll" the
10605 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10606 from one copy of the vector stmt to the next, in the field
10607 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10608 stages to find the correct vector defs to be used when vectorizing
10609 stmts that use the defs of the current stmt. The example below
10610 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10611 need to create 4 vectorized stmts):
10613 before vectorization:
10614 RELATED_STMT VEC_STMT
10618 step 1: vectorize stmt S1:
10619 We first create the vector stmt VS1_0, and, as usual, record a
10620 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10621 Next, we create the vector stmt VS1_1, and record a pointer to
10622 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10623 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10624 stmts and pointers:
10625 RELATED_STMT VEC_STMT
10626 VS1_0: vx0 = memref0 VS1_1 -
10627 VS1_1: vx1 = memref1 VS1_2 -
10628 VS1_2: vx2 = memref2 VS1_3 -
10629 VS1_3: vx3 = memref3 - -
10630 S1: x = load - VS1_0
10634 /* In case of interleaving (non-unit grouped access):
10641 Vectorized loads are created in the order of memory accesses
10642 starting from the access of the first stmt of the chain:
10645 VS2: vx1 = &base + vec_size*1
10646 VS3: vx3 = &base + vec_size*2
10647 VS4: vx4 = &base + vec_size*3
10649 Then permutation statements are generated:
10651 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10652 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10655 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10656 (the order of the data-refs in the output of vect_permute_load_chain
10657 corresponds to the order of scalar stmts in the interleaving chain - see
10658 the documentation of vect_permute_load_chain()).
10659 The generation of permutation stmts and recording them in
10660 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10662 In case of both multiple types and interleaving, the vector loads and
10663 permutation stmts above are created for every copy. The result vector
10664 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10665 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10667 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10668 on a target that supports unaligned accesses (dr_unaligned_supported)
10669 we generate the following code:
10673 p = p + indx * vectype_size;
10678 Otherwise, the data reference is potentially unaligned on a target that
10679 does not support unaligned accesses (dr_explicit_realign_optimized) -
10680 then generate the following code, in which the data in each iteration is
10681 obtained by two vector loads, one from the previous iteration, and one
10682 from the current iteration:
10684 msq_init = *(floor(p1))
10685 p2 = initial_addr + VS - 1;
10686 realignment_token = call target_builtin;
10689 p2 = p2 + indx * vectype_size
10691 vec_dest = realign_load (msq, lsq, realignment_token)
10696 /* If the misalignment remains the same throughout the execution of the
10697 loop, we can create the init_addr and permutation mask at the loop
10698 preheader. Otherwise, it needs to be created inside the loop.
10699 This can only occur when vectorizing memory accesses in the inner-loop
10700 nested within an outer-loop that is being vectorized. */
10702 if (nested_in_vect_loop
10703 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
10704 GET_MODE_SIZE (TYPE_MODE (vectype
))))
10706 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
10707 compute_in_loop
= true;
10710 bool diff_first_stmt_info
10711 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
10713 tree offset
= NULL_TREE
;
10714 if ((alignment_support_scheme
== dr_explicit_realign_optimized
10715 || alignment_support_scheme
== dr_explicit_realign
)
10716 && !compute_in_loop
)
10718 /* If we have different first_stmt_info, we can't set up realignment
10719 here, since we can't guarantee first_stmt_info DR has been
10720 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10721 distance from first_stmt_info DR instead as below. */
10724 if (!diff_first_stmt_info
)
10725 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
10726 &realignment_token
,
10727 alignment_support_scheme
, NULL_TREE
,
10729 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
10731 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
10732 offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
10734 gcc_assert (!first_stmt_info_for_drptr
);
10741 if (!known_eq (poffset
, 0))
10743 ? size_binop (PLUS_EXPR
, offset
, size_int (poffset
))
10744 : size_int (poffset
));
10747 tree vec_offset
= NULL_TREE
;
10748 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10750 aggr_type
= NULL_TREE
;
10753 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
10755 aggr_type
= elem_type
;
10757 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, gsi
, &gs_info
,
10758 &bump
, &vec_offset
, loop_lens
);
10762 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
10763 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
10765 aggr_type
= vectype
;
10766 bump
= vect_get_data_ptr_increment (vinfo
, gsi
, dr_info
, aggr_type
,
10767 memory_access_type
, loop_lens
);
10770 auto_vec
<tree
> vec_offsets
;
10771 auto_vec
<tree
> vec_masks
;
10772 if (mask
&& !costing_p
)
10775 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[mask_index
],
10778 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
, mask
,
10779 &vec_masks
, mask_vectype
);
10782 tree vec_mask
= NULL_TREE
;
10783 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
10785 gcc_assert (alignment_support_scheme
== dr_aligned
10786 || alignment_support_scheme
== dr_unaligned_supported
);
10787 gcc_assert (grouped_load
&& !slp
);
10789 unsigned int inside_cost
= 0, prologue_cost
= 0;
10790 /* For costing some adjacent vector loads, we'd like to cost with
10791 the total number of them once instead of cost each one by one. */
10792 unsigned int n_adjacent_loads
= 0;
10793 for (j
= 0; j
< ncopies
; j
++)
10797 /* An IFN_LOAD_LANES will load all its vector results,
10798 regardless of which ones we actually need. Account
10799 for the cost of unused results. */
10800 if (first_stmt_info
== stmt_info
)
10802 unsigned int gaps
= DR_GROUP_SIZE (first_stmt_info
);
10803 stmt_vec_info next_stmt_info
= first_stmt_info
;
10807 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
10809 while (next_stmt_info
);
10812 if (dump_enabled_p ())
10813 dump_printf_loc (MSG_NOTE
, vect_location
,
10814 "vect_model_load_cost: %d "
10815 "unused vectors.\n",
10817 vect_get_load_cost (vinfo
, stmt_info
, gaps
,
10818 alignment_support_scheme
,
10819 misalignment
, false, &inside_cost
,
10820 &prologue_cost
, cost_vec
, cost_vec
,
10824 n_adjacent_loads
++;
10828 /* 1. Create the vector or array pointer update chain. */
10831 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
10832 at_loop
, offset
, &dummy
, gsi
,
10833 &ptr_incr
, false, bump
);
10836 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
10837 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
10841 vec_mask
= vec_masks
[j
];
10843 tree vec_array
= create_vector_array (vectype
, vec_num
);
10845 tree final_mask
= NULL_TREE
;
10846 tree final_len
= NULL_TREE
;
10847 tree bias
= NULL_TREE
;
10849 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
10850 ncopies
, vectype
, j
);
10852 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
10855 if (lanes_ifn
== IFN_MASK_LEN_LOAD_LANES
)
10858 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
10859 ncopies
, vectype
, j
, 1);
10861 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
10862 signed char biasval
10863 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
10864 bias
= build_int_cst (intQI_type_node
, biasval
);
10867 mask_vectype
= truth_type_for (vectype
);
10868 final_mask
= build_minus_one_cst (mask_vectype
);
10873 if (final_len
&& final_mask
)
10876 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10877 VEC_MASK, LEN, BIAS). */
10878 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
10879 tree alias_ptr
= build_int_cst (ref_type
, align
);
10880 call
= gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES
, 5,
10881 dataref_ptr
, alias_ptr
,
10882 final_mask
, final_len
, bias
);
10884 else if (final_mask
)
10887 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10889 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
10890 tree alias_ptr
= build_int_cst (ref_type
, align
);
10891 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
10892 dataref_ptr
, alias_ptr
,
10898 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10899 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
10900 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
10902 gimple_call_set_lhs (call
, vec_array
);
10903 gimple_call_set_nothrow (call
, true);
10904 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
10906 dr_chain
.create (vec_num
);
10907 /* Extract each vector into an SSA_NAME. */
10908 for (i
= 0; i
< vec_num
; i
++)
10910 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
10912 dr_chain
.quick_push (new_temp
);
10915 /* Record the mapping between SSA_NAMEs and statements. */
10916 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
10918 /* Record that VEC_ARRAY is now dead. */
10919 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
10921 dr_chain
.release ();
10923 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10928 if (n_adjacent_loads
> 0)
10929 vect_get_load_cost (vinfo
, stmt_info
, n_adjacent_loads
,
10930 alignment_support_scheme
, misalignment
, false,
10931 &inside_cost
, &prologue_cost
, cost_vec
,
10933 if (dump_enabled_p ())
10934 dump_printf_loc (MSG_NOTE
, vect_location
,
10935 "vect_model_load_cost: inside_cost = %u, "
10936 "prologue_cost = %u .\n",
10937 inside_cost
, prologue_cost
);
10943 if (memory_access_type
== VMAT_GATHER_SCATTER
)
10945 gcc_assert (alignment_support_scheme
== dr_aligned
10946 || alignment_support_scheme
== dr_unaligned_supported
);
10947 gcc_assert (!grouped_load
&& !slp_perm
);
10949 unsigned int inside_cost
= 0, prologue_cost
= 0;
10950 for (j
= 0; j
< ncopies
; j
++)
10952 /* 1. Create the vector or array pointer update chain. */
10953 if (j
== 0 && !costing_p
)
10955 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10956 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
10957 slp_node
, &gs_info
, &dataref_ptr
,
10961 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
10962 at_loop
, offset
, &dummy
, gsi
,
10963 &ptr_incr
, false, bump
);
10965 else if (!costing_p
)
10967 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
10968 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10969 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10970 gsi
, stmt_info
, bump
);
10973 gimple
*new_stmt
= NULL
;
10974 for (i
= 0; i
< vec_num
; i
++)
10976 tree final_mask
= NULL_TREE
;
10977 tree final_len
= NULL_TREE
;
10978 tree bias
= NULL_TREE
;
10982 vec_mask
= vec_masks
[vec_num
* j
+ i
];
10985 = vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
10986 vec_num
* ncopies
, vectype
,
10989 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
10990 final_mask
, vec_mask
, gsi
);
10992 if (i
> 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10993 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10994 gsi
, stmt_info
, bump
);
10997 /* 2. Create the vector-load in the loop. */
10998 unsigned HOST_WIDE_INT align
;
10999 if (gs_info
.ifn
!= IFN_LAST
)
11003 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
11005 = record_stmt_cost (cost_vec
, cnunits
, scalar_load
,
11006 stmt_info
, 0, vect_body
);
11009 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
11010 vec_offset
= vec_offsets
[vec_num
* j
+ i
];
11011 tree zero
= build_zero_cst (vectype
);
11012 tree scale
= size_int (gs_info
.scale
);
11014 if (gs_info
.ifn
== IFN_MASK_LEN_GATHER_LOAD
)
11018 = vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
11019 vec_num
* ncopies
, vectype
,
11020 vec_num
* j
+ i
, 1);
11023 = build_int_cst (sizetype
,
11024 TYPE_VECTOR_SUBPARTS (vectype
));
11025 signed char biasval
11026 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
11027 bias
= build_int_cst (intQI_type_node
, biasval
);
11030 mask_vectype
= truth_type_for (vectype
);
11031 final_mask
= build_minus_one_cst (mask_vectype
);
11036 if (final_len
&& final_mask
)
11038 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD
, 7,
11039 dataref_ptr
, vec_offset
,
11040 scale
, zero
, final_mask
,
11042 else if (final_mask
)
11043 call
= gimple_build_call_internal (IFN_MASK_GATHER_LOAD
, 5,
11044 dataref_ptr
, vec_offset
,
11045 scale
, zero
, final_mask
);
11047 call
= gimple_build_call_internal (IFN_GATHER_LOAD
, 4,
11048 dataref_ptr
, vec_offset
,
11050 gimple_call_set_nothrow (call
, true);
11052 data_ref
= NULL_TREE
;
11054 else if (gs_info
.decl
)
11056 /* The builtin decls path for gather is legacy, x86 only. */
11057 gcc_assert (!final_len
&& nunits
.is_constant ());
11060 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
11062 = record_stmt_cost (cost_vec
, cnunits
, scalar_load
,
11063 stmt_info
, 0, vect_body
);
11066 poly_uint64 offset_nunits
11067 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
11068 if (known_eq (nunits
, offset_nunits
))
11070 new_stmt
= vect_build_one_gather_load_call
11071 (vinfo
, stmt_info
, gsi
, &gs_info
,
11072 dataref_ptr
, vec_offsets
[vec_num
* j
+ i
],
11074 data_ref
= NULL_TREE
;
11076 else if (known_eq (nunits
, offset_nunits
* 2))
11078 /* We have a offset vector with half the number of
11079 lanes but the builtins will produce full vectype
11080 data with just the lower lanes filled. */
11081 new_stmt
= vect_build_one_gather_load_call
11082 (vinfo
, stmt_info
, gsi
, &gs_info
,
11083 dataref_ptr
, vec_offsets
[2 * vec_num
* j
+ 2 * i
],
11085 tree low
= make_ssa_name (vectype
);
11086 gimple_set_lhs (new_stmt
, low
);
11087 vect_finish_stmt_generation (vinfo
, stmt_info
,
11090 /* now put upper half of final_mask in final_mask low. */
11092 && !SCALAR_INT_MODE_P
11093 (TYPE_MODE (TREE_TYPE (final_mask
))))
11095 int count
= nunits
.to_constant ();
11096 vec_perm_builder
sel (count
, count
, 1);
11097 sel
.quick_grow (count
);
11098 for (int i
= 0; i
< count
; ++i
)
11099 sel
[i
] = i
| (count
/ 2);
11100 vec_perm_indices
indices (sel
, 2, count
);
11101 tree perm_mask
= vect_gen_perm_mask_checked
11102 (TREE_TYPE (final_mask
), indices
);
11103 new_stmt
= gimple_build_assign (NULL_TREE
,
11108 final_mask
= make_ssa_name (TREE_TYPE (final_mask
));
11109 gimple_set_lhs (new_stmt
, final_mask
);
11110 vect_finish_stmt_generation (vinfo
, stmt_info
,
11113 else if (final_mask
)
11115 new_stmt
= gimple_build_assign (NULL_TREE
,
11116 VEC_UNPACK_HI_EXPR
,
11118 final_mask
= make_ssa_name
11119 (truth_type_for (gs_info
.offset_vectype
));
11120 gimple_set_lhs (new_stmt
, final_mask
);
11121 vect_finish_stmt_generation (vinfo
, stmt_info
,
11125 new_stmt
= vect_build_one_gather_load_call
11126 (vinfo
, stmt_info
, gsi
, &gs_info
,
11128 vec_offsets
[2 * vec_num
* j
+ 2 * i
+ 1],
11130 tree high
= make_ssa_name (vectype
);
11131 gimple_set_lhs (new_stmt
, high
);
11132 vect_finish_stmt_generation (vinfo
, stmt_info
,
11135 /* compose low + high. */
11136 int count
= nunits
.to_constant ();
11137 vec_perm_builder
sel (count
, count
, 1);
11138 sel
.quick_grow (count
);
11139 for (int i
= 0; i
< count
; ++i
)
11140 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
11141 vec_perm_indices
indices (sel
, 2, count
);
11143 = vect_gen_perm_mask_checked (vectype
, indices
);
11144 new_stmt
= gimple_build_assign (NULL_TREE
,
11146 low
, high
, perm_mask
);
11147 data_ref
= NULL_TREE
;
11149 else if (known_eq (nunits
* 2, offset_nunits
))
11151 /* We have a offset vector with double the number of
11152 lanes. Select the low/high part accordingly. */
11153 vec_offset
= vec_offsets
[(vec_num
* j
+ i
) / 2];
11154 if ((vec_num
* j
+ i
) & 1)
11156 int count
= offset_nunits
.to_constant ();
11157 vec_perm_builder
sel (count
, count
, 1);
11158 sel
.quick_grow (count
);
11159 for (int i
= 0; i
< count
; ++i
)
11160 sel
[i
] = i
| (count
/ 2);
11161 vec_perm_indices
indices (sel
, 2, count
);
11162 tree perm_mask
= vect_gen_perm_mask_checked
11163 (TREE_TYPE (vec_offset
), indices
);
11164 new_stmt
= gimple_build_assign (NULL_TREE
,
11169 vec_offset
= make_ssa_name (TREE_TYPE (vec_offset
));
11170 gimple_set_lhs (new_stmt
, vec_offset
);
11171 vect_finish_stmt_generation (vinfo
, stmt_info
,
11174 new_stmt
= vect_build_one_gather_load_call
11175 (vinfo
, stmt_info
, gsi
, &gs_info
,
11176 dataref_ptr
, vec_offset
, final_mask
);
11177 data_ref
= NULL_TREE
;
11180 gcc_unreachable ();
11184 /* Emulated gather-scatter. */
11185 gcc_assert (!final_mask
);
11186 unsigned HOST_WIDE_INT const_nunits
= nunits
.to_constant ();
11189 /* For emulated gathers N offset vector element
11190 offset add is consumed by the load). */
11191 inside_cost
= record_stmt_cost (cost_vec
, const_nunits
,
11192 vec_to_scalar
, stmt_info
,
11194 /* N scalar loads plus gathering them into a
11197 = record_stmt_cost (cost_vec
, const_nunits
, scalar_load
,
11198 stmt_info
, 0, vect_body
);
11200 = record_stmt_cost (cost_vec
, 1, vec_construct
,
11201 stmt_info
, 0, vect_body
);
11204 unsigned HOST_WIDE_INT const_offset_nunits
11205 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
)
11207 vec
<constructor_elt
, va_gc
> *ctor_elts
;
11208 vec_alloc (ctor_elts
, const_nunits
);
11209 gimple_seq stmts
= NULL
;
11210 /* We support offset vectors with more elements
11211 than the data vector for now. */
11212 unsigned HOST_WIDE_INT factor
11213 = const_offset_nunits
/ const_nunits
;
11214 vec_offset
= vec_offsets
[(vec_num
* j
+ i
) / factor
];
11215 unsigned elt_offset
11216 = ((vec_num
* j
+ i
) % factor
) * const_nunits
;
11217 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
11218 tree scale
= size_int (gs_info
.scale
);
11219 align
= get_object_alignment (DR_REF (first_dr_info
->dr
));
11220 tree ltype
= build_aligned_type (TREE_TYPE (vectype
), align
);
11221 for (unsigned k
= 0; k
< const_nunits
; ++k
)
11223 tree boff
= size_binop (MULT_EXPR
, TYPE_SIZE (idx_type
),
11224 bitsize_int (k
+ elt_offset
));
11226 = gimple_build (&stmts
, BIT_FIELD_REF
, idx_type
,
11227 vec_offset
, TYPE_SIZE (idx_type
), boff
);
11228 idx
= gimple_convert (&stmts
, sizetype
, idx
);
11229 idx
= gimple_build (&stmts
, MULT_EXPR
, sizetype
, idx
,
11231 tree ptr
= gimple_build (&stmts
, PLUS_EXPR
,
11232 TREE_TYPE (dataref_ptr
),
11234 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
11235 tree elt
= make_ssa_name (TREE_TYPE (vectype
));
11236 tree ref
= build2 (MEM_REF
, ltype
, ptr
,
11237 build_int_cst (ref_type
, 0));
11238 new_stmt
= gimple_build_assign (elt
, ref
);
11239 gimple_set_vuse (new_stmt
, gimple_vuse (gsi_stmt (*gsi
)));
11240 gimple_seq_add_stmt (&stmts
, new_stmt
);
11241 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
, elt
);
11243 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
11244 new_stmt
= gimple_build_assign (
11245 NULL_TREE
, build_constructor (vectype
, ctor_elts
));
11246 data_ref
= NULL_TREE
;
11249 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11250 /* DATA_REF is null if we've already built the statement. */
11253 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
11254 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
11256 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11257 gimple_set_lhs (new_stmt
, new_temp
);
11258 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11260 /* Store vector loads in the corresponding SLP_NODE. */
11262 slp_node
->push_vec_def (new_stmt
);
11265 if (!slp
&& !costing_p
)
11266 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
11269 if (!slp
&& !costing_p
)
11270 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11272 if (costing_p
&& dump_enabled_p ())
11273 dump_printf_loc (MSG_NOTE
, vect_location
,
11274 "vect_model_load_cost: inside_cost = %u, "
11275 "prologue_cost = %u .\n",
11276 inside_cost
, prologue_cost
);
11280 poly_uint64 group_elt
= 0;
11281 unsigned int inside_cost
= 0, prologue_cost
= 0;
11282 /* For costing some adjacent vector loads, we'd like to cost with
11283 the total number of them once instead of cost each one by one. */
11284 unsigned int n_adjacent_loads
= 0;
11285 for (j
= 0; j
< ncopies
; j
++)
11287 /* 1. Create the vector or array pointer update chain. */
11288 if (j
== 0 && !costing_p
)
11290 bool simd_lane_access_p
11291 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
11292 if (simd_lane_access_p
11293 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
11294 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
11295 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
11296 && integer_zerop (DR_INIT (first_dr_info
->dr
))
11297 && alias_sets_conflict_p (get_alias_set (aggr_type
),
11298 get_alias_set (TREE_TYPE (ref_type
)))
11299 && (alignment_support_scheme
== dr_aligned
11300 || alignment_support_scheme
== dr_unaligned_supported
))
11302 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
11303 dataref_offset
= build_int_cst (ref_type
, 0);
11305 else if (diff_first_stmt_info
)
11308 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
11309 aggr_type
, at_loop
, offset
, &dummy
,
11310 gsi
, &ptr_incr
, simd_lane_access_p
,
11312 /* Adjust the pointer by the difference to first_stmt. */
11313 data_reference_p ptrdr
11314 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
11316 = fold_convert (sizetype
,
11317 size_binop (MINUS_EXPR
,
11318 DR_INIT (first_dr_info
->dr
),
11320 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11322 if (alignment_support_scheme
== dr_explicit_realign
)
11324 msq
= vect_setup_realignment (vinfo
,
11325 first_stmt_info_for_drptr
, gsi
,
11326 &realignment_token
,
11327 alignment_support_scheme
,
11328 dataref_ptr
, &at_loop
);
11329 gcc_assert (!compute_in_loop
);
11334 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
11336 offset
, &dummy
, gsi
, &ptr_incr
,
11337 simd_lane_access_p
, bump
);
11339 else if (!costing_p
)
11341 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
11342 if (dataref_offset
)
11343 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
11346 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11350 if (grouped_load
|| slp_perm
)
11351 dr_chain
.create (vec_num
);
11353 gimple
*new_stmt
= NULL
;
11354 for (i
= 0; i
< vec_num
; i
++)
11356 tree final_mask
= NULL_TREE
;
11357 tree final_len
= NULL_TREE
;
11358 tree bias
= NULL_TREE
;
11362 vec_mask
= vec_masks
[vec_num
* j
+ i
];
11364 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
11365 vec_num
* ncopies
, vectype
,
11368 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
11369 final_mask
, vec_mask
, gsi
);
11372 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
11373 gsi
, stmt_info
, bump
);
11376 /* 2. Create the vector-load in the loop. */
11377 switch (alignment_support_scheme
)
11380 case dr_unaligned_supported
:
11385 unsigned int misalign
;
11386 unsigned HOST_WIDE_INT align
;
11387 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
11388 if (alignment_support_scheme
== dr_aligned
)
11390 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
11393 = dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
11397 misalign
= misalignment
;
11398 if (dataref_offset
== NULL_TREE
11399 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
11400 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
11402 align
= least_bit_hwi (misalign
| align
);
11404 /* Compute IFN when LOOP_LENS or final_mask valid. */
11405 machine_mode vmode
= TYPE_MODE (vectype
);
11406 machine_mode new_vmode
= vmode
;
11407 internal_fn partial_ifn
= IFN_LAST
;
11410 opt_machine_mode new_ovmode
11411 = get_len_load_store_mode (vmode
, true, &partial_ifn
);
11412 new_vmode
= new_ovmode
.require ();
11414 = (new_ovmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vmode
);
11415 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
11416 vec_num
* ncopies
, vectype
,
11417 vec_num
* j
+ i
, factor
);
11419 else if (final_mask
)
11421 if (!can_vec_mask_load_store_p (
11422 vmode
, TYPE_MODE (TREE_TYPE (final_mask
)), true,
11424 gcc_unreachable ();
11427 if (partial_ifn
== IFN_MASK_LEN_LOAD
)
11431 /* Pass VF value to 'len' argument of
11432 MASK_LEN_LOAD if LOOP_LENS is invalid. */
11433 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
11437 /* Pass all ones value to 'mask' argument of
11438 MASK_LEN_LOAD if final_mask is invalid. */
11439 mask_vectype
= truth_type_for (vectype
);
11440 final_mask
= build_minus_one_cst (mask_vectype
);
11445 signed char biasval
11446 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
11448 bias
= build_int_cst (intQI_type_node
, biasval
);
11453 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
11455 if (partial_ifn
== IFN_MASK_LEN_LOAD
)
11456 call
= gimple_build_call_internal (IFN_MASK_LEN_LOAD
, 5,
11458 final_mask
, final_len
,
11461 call
= gimple_build_call_internal (IFN_LEN_LOAD
, 4,
11464 gimple_call_set_nothrow (call
, true);
11466 data_ref
= NULL_TREE
;
11468 /* Need conversion if it's wrapped with VnQI. */
11469 if (vmode
!= new_vmode
)
11471 tree new_vtype
= build_vector_type_for_mode (
11472 unsigned_intQI_type_node
, new_vmode
);
11474 = vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
11475 gimple_set_lhs (call
, var
);
11476 vect_finish_stmt_generation (vinfo
, stmt_info
, call
,
11478 tree op
= build1 (VIEW_CONVERT_EXPR
, vectype
, var
);
11479 new_stmt
= gimple_build_assign (vec_dest
,
11480 VIEW_CONVERT_EXPR
, op
);
11483 else if (final_mask
)
11485 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
11486 gcall
*call
= gimple_build_call_internal (IFN_MASK_LOAD
, 3,
11489 gimple_call_set_nothrow (call
, true);
11491 data_ref
= NULL_TREE
;
11495 tree ltype
= vectype
;
11496 tree new_vtype
= NULL_TREE
;
11497 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
11498 unsigned int vect_align
11499 = vect_known_alignment_in_bytes (first_dr_info
, vectype
);
11500 unsigned int scalar_dr_size
11501 = vect_get_scalar_dr_size (first_dr_info
);
11502 /* If there's no peeling for gaps but we have a gap
11503 with slp loads then load the lower half of the
11504 vector only. See get_group_load_store_type for
11505 when we apply this optimization. */
11508 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) && gap
!= 0
11509 && known_eq (nunits
, (group_size
- gap
) * 2)
11510 && known_eq (nunits
, group_size
)
11511 && gap
>= (vect_align
/ scalar_dr_size
))
11515 = vector_vector_composition_type (vectype
, 2,
11517 if (new_vtype
!= NULL_TREE
)
11518 ltype
= half_vtype
;
11520 /* Try to use a single smaller load when we are about
11521 to load excess elements compared to the unrolled
11523 ??? This should cover the above case as well. */
11524 else if (known_gt ((vec_num
* j
+ i
+ 1) * nunits
,
11525 (group_size
* vf
- gap
)))
11527 if (known_ge ((vec_num
* j
+ i
+ 1) * nunits
11528 - (group_size
* vf
- gap
), nunits
))
11529 /* DR will be unused. */
11531 else if (known_ge (vect_align
,
11532 tree_to_poly_uint64
11533 (TYPE_SIZE_UNIT (vectype
))))
11534 /* Aligned access to excess elements is OK if
11535 at least one element is accessed in the
11541 = ((group_size
* vf
- gap
)
11542 - (vec_num
* j
+ i
) * nunits
);
11543 /* remain should now be > 0 and < nunits. */
11545 if (constant_multiple_p (nunits
, remain
, &num
))
11549 = vector_vector_composition_type (vectype
,
11555 /* Else use multiple loads or a masked load? */
11559 = (dataref_offset
? dataref_offset
11560 : build_int_cst (ref_type
, 0));
11563 else if (ltype
!= vectype
11564 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
11566 poly_uint64 gap_offset
11567 = (tree_to_poly_uint64 (TYPE_SIZE_UNIT (vectype
))
11568 - tree_to_poly_uint64 (TYPE_SIZE_UNIT (ltype
)));
11569 tree gapcst
= build_int_cstu (ref_type
, gap_offset
);
11570 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
11575 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
11576 if (alignment_support_scheme
== dr_aligned
)
11579 TREE_TYPE (data_ref
)
11580 = build_aligned_type (TREE_TYPE (data_ref
),
11581 align
* BITS_PER_UNIT
);
11584 data_ref
= build_constructor (vectype
, NULL
);
11585 else if (ltype
!= vectype
)
11587 vect_copy_ref_info (data_ref
,
11588 DR_REF (first_dr_info
->dr
));
11589 tree tem
= make_ssa_name (ltype
);
11590 new_stmt
= gimple_build_assign (tem
, data_ref
);
11591 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
11594 vec
<constructor_elt
, va_gc
> *v
;
11595 /* We've computed 'num' above to statically two
11596 or via constant_multiple_p. */
11598 = (exact_div (tree_to_poly_uint64
11599 (TYPE_SIZE_UNIT (vectype
)),
11600 tree_to_poly_uint64
11601 (TYPE_SIZE_UNIT (ltype
)))
11603 vec_alloc (v
, num
);
11604 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
11607 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
11608 build_zero_cst (ltype
));
11609 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
11613 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
11615 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
11616 build_zero_cst (ltype
));
11618 gcc_assert (new_vtype
!= NULL_TREE
);
11619 if (new_vtype
== vectype
)
11620 new_stmt
= gimple_build_assign (
11621 vec_dest
, build_constructor (vectype
, v
));
11624 tree new_vname
= make_ssa_name (new_vtype
);
11625 new_stmt
= gimple_build_assign (
11626 new_vname
, build_constructor (new_vtype
, v
));
11627 vect_finish_stmt_generation (vinfo
, stmt_info
,
11629 new_stmt
= gimple_build_assign (
11631 build1 (VIEW_CONVERT_EXPR
, vectype
, new_vname
));
11637 case dr_explicit_realign
:
11643 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
11645 if (compute_in_loop
)
11646 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
11647 &realignment_token
,
11648 dr_explicit_realign
,
11649 dataref_ptr
, NULL
);
11651 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
11652 ptr
= copy_ssa_name (dataref_ptr
);
11654 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
11655 // For explicit realign the target alignment should be
11656 // known at compile time.
11657 unsigned HOST_WIDE_INT align
11658 = DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
11659 new_stmt
= gimple_build_assign (
11660 ptr
, BIT_AND_EXPR
, dataref_ptr
,
11661 build_int_cst (TREE_TYPE (dataref_ptr
),
11662 -(HOST_WIDE_INT
) align
));
11663 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11665 = build2 (MEM_REF
, vectype
, ptr
, build_int_cst (ref_type
, 0));
11666 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
11667 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11668 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
11669 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11670 gimple_assign_set_lhs (new_stmt
, new_temp
);
11671 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
11672 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11675 bump
= size_binop (MULT_EXPR
, vs
, TYPE_SIZE_UNIT (elem_type
));
11676 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
11677 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
, stmt_info
,
11679 new_stmt
= gimple_build_assign (
11680 NULL_TREE
, BIT_AND_EXPR
, ptr
,
11681 build_int_cst (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
11682 if (TREE_CODE (ptr
) == SSA_NAME
)
11683 ptr
= copy_ssa_name (ptr
, new_stmt
);
11685 ptr
= make_ssa_name (TREE_TYPE (ptr
), new_stmt
);
11686 gimple_assign_set_lhs (new_stmt
, ptr
);
11687 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11689 = build2 (MEM_REF
, vectype
, ptr
, build_int_cst (ref_type
, 0));
11692 case dr_explicit_realign_optimized
:
11696 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
11697 new_temp
= copy_ssa_name (dataref_ptr
);
11699 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
11700 // We should only be doing this if we know the target
11701 // alignment at compile time.
11702 unsigned HOST_WIDE_INT align
11703 = DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
11704 new_stmt
= gimple_build_assign (
11705 new_temp
, BIT_AND_EXPR
, dataref_ptr
,
11706 build_int_cst (TREE_TYPE (dataref_ptr
),
11707 -(HOST_WIDE_INT
) align
));
11708 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11709 data_ref
= build2 (MEM_REF
, vectype
, new_temp
,
11710 build_int_cst (ref_type
, 0));
11714 gcc_unreachable ();
11717 /* One common place to cost the above vect load for different
11718 alignment support schemes. */
11721 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11722 only need to take care of the first stmt, whose
11723 stmt_info is first_stmt_info, vec_num iterating on it
11724 will cover the cost for the remaining, it's consistent
11725 with transforming. For the prologue cost for realign,
11726 we only need to count it once for the whole group. */
11727 bool first_stmt_info_p
= first_stmt_info
== stmt_info
;
11728 bool add_realign_cost
= first_stmt_info_p
&& i
== 0;
11729 if (memory_access_type
== VMAT_CONTIGUOUS
11730 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
11731 || (memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
11732 && (!grouped_load
|| first_stmt_info_p
)))
11734 /* Leave realign cases alone to keep them simple. */
11735 if (alignment_support_scheme
== dr_explicit_realign_optimized
11736 || alignment_support_scheme
== dr_explicit_realign
)
11737 vect_get_load_cost (vinfo
, stmt_info
, 1,
11738 alignment_support_scheme
, misalignment
,
11739 add_realign_cost
, &inside_cost
,
11740 &prologue_cost
, cost_vec
, cost_vec
,
11743 n_adjacent_loads
++;
11748 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11749 /* DATA_REF is null if we've already built the statement. */
11752 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
11753 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
11755 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11756 gimple_set_lhs (new_stmt
, new_temp
);
11757 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11760 /* 3. Handle explicit realignment if necessary/supported.
11762 vec_dest = realign_load (msq, lsq, realignment_token) */
11764 && (alignment_support_scheme
== dr_explicit_realign_optimized
11765 || alignment_support_scheme
== dr_explicit_realign
))
11767 lsq
= gimple_assign_lhs (new_stmt
);
11768 if (!realignment_token
)
11769 realignment_token
= dataref_ptr
;
11770 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11771 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
, msq
,
11772 lsq
, realignment_token
);
11773 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11774 gimple_assign_set_lhs (new_stmt
, new_temp
);
11775 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11777 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
11780 if (i
== vec_num
- 1 && j
== ncopies
- 1)
11781 add_phi_arg (phi
, lsq
, loop_latch_edge (containing_loop
),
11787 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
11790 inside_cost
= record_stmt_cost (cost_vec
, 1, vec_perm
,
11791 stmt_info
, 0, vect_body
);
11794 tree perm_mask
= perm_mask_for_reverse (vectype
);
11795 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
11796 perm_mask
, stmt_info
, gsi
);
11797 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
11801 /* Collect vector loads and later create their permutation in
11802 vect_transform_grouped_load (). */
11803 if (!costing_p
&& (grouped_load
|| slp_perm
))
11804 dr_chain
.quick_push (new_temp
);
11806 /* Store vector loads in the corresponding SLP_NODE. */
11807 if (!costing_p
&& slp
&& !slp_perm
)
11808 slp_node
->push_vec_def (new_stmt
);
11810 /* With SLP permutation we load the gaps as well, without
11811 we need to skip the gaps after we manage to fully load
11812 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11813 group_elt
+= nunits
;
11815 && maybe_ne (group_gap_adj
, 0U)
11817 && known_eq (group_elt
, group_size
- group_gap_adj
))
11819 poly_wide_int bump_val
11820 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
)) * group_gap_adj
);
11821 if (tree_int_cst_sgn (vect_dr_behavior (vinfo
, dr_info
)->step
)
11823 bump_val
= -bump_val
;
11824 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
11825 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11830 /* Bump the vector pointer to account for a gap or for excess
11831 elements loaded for a permuted SLP load. */
11833 && maybe_ne (group_gap_adj
, 0U)
11836 poly_wide_int bump_val
11837 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
)) * group_gap_adj
);
11838 if (tree_int_cst_sgn (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
11839 bump_val
= -bump_val
;
11840 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
11841 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11845 if (slp
&& !slp_perm
)
11851 /* For SLP we know we've seen all possible uses of dr_chain so
11852 direct vect_transform_slp_perm_load to DCE the unused parts.
11853 ??? This is a hack to prevent compile-time issues as seen
11854 in PR101120 and friends. */
11857 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, nullptr, vf
,
11858 true, &n_perms
, nullptr);
11859 inside_cost
= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
11860 stmt_info
, 0, vect_body
);
11864 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
11865 gsi
, vf
, false, &n_perms
,
11874 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
);
11875 /* We assume that the cost of a single load-lanes instruction
11876 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11877 If a grouped access is instead being provided by a
11878 load-and-permute operation, include the cost of the
11880 if (costing_p
&& first_stmt_info
== stmt_info
)
11882 /* Uses an even and odd extract operations or shuffle
11883 operations for each needed permute. */
11884 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
11885 int nstmts
= ceil_log2 (group_size
) * group_size
;
11886 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
11887 stmt_info
, 0, vect_body
);
11889 if (dump_enabled_p ())
11890 dump_printf_loc (MSG_NOTE
, vect_location
,
11891 "vect_model_load_cost:"
11892 "strided group_size = %d .\n",
11895 else if (!costing_p
)
11897 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
11899 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11902 else if (!costing_p
)
11903 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
11905 dr_chain
.release ();
11907 if (!slp
&& !costing_p
)
11908 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11912 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS
11913 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
11914 || memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
);
11915 if (n_adjacent_loads
> 0)
11916 vect_get_load_cost (vinfo
, stmt_info
, n_adjacent_loads
,
11917 alignment_support_scheme
, misalignment
, false,
11918 &inside_cost
, &prologue_cost
, cost_vec
, cost_vec
,
11920 if (dump_enabled_p ())
11921 dump_printf_loc (MSG_NOTE
, vect_location
,
11922 "vect_model_load_cost: inside_cost = %u, "
11923 "prologue_cost = %u .\n",
11924 inside_cost
, prologue_cost
);
11930 /* Function vect_is_simple_cond.
11933 LOOP - the loop that is being vectorized.
11934 COND - Condition that is checked for simple use.
11937 *COMP_VECTYPE - the vector type for the comparison.
11938 *DTS - The def types for the arguments of the comparison
11940 Returns whether a COND can be vectorized. Checks whether
11941 condition operands are supportable using vec_is_simple_use. */
11944 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
11945 slp_tree slp_node
, tree
*comp_vectype
,
11946 enum vect_def_type
*dts
, tree vectype
)
11949 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
11953 if (TREE_CODE (cond
) == SSA_NAME
11954 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
11956 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
11957 &slp_op
, &dts
[0], comp_vectype
)
11959 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
11964 if (!COMPARISON_CLASS_P (cond
))
11967 lhs
= TREE_OPERAND (cond
, 0);
11968 rhs
= TREE_OPERAND (cond
, 1);
11970 if (TREE_CODE (lhs
) == SSA_NAME
)
11972 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
11973 &lhs
, &slp_op
, &dts
[0], &vectype1
))
11976 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
11977 || TREE_CODE (lhs
) == FIXED_CST
)
11978 dts
[0] = vect_constant_def
;
11982 if (TREE_CODE (rhs
) == SSA_NAME
)
11984 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
11985 &rhs
, &slp_op
, &dts
[1], &vectype2
))
11988 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
11989 || TREE_CODE (rhs
) == FIXED_CST
)
11990 dts
[1] = vect_constant_def
;
11994 if (vectype1
&& vectype2
11995 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
11996 TYPE_VECTOR_SUBPARTS (vectype2
)))
11999 *comp_vectype
= vectype1
? vectype1
: vectype2
;
12000 /* Invariant comparison. */
12001 if (! *comp_vectype
)
12003 tree scalar_type
= TREE_TYPE (lhs
);
12004 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
12005 *comp_vectype
= truth_type_for (vectype
);
12008 /* If we can widen the comparison to match vectype do so. */
12009 if (INTEGRAL_TYPE_P (scalar_type
)
12011 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
12012 TYPE_SIZE (TREE_TYPE (vectype
))))
12013 scalar_type
= build_nonstandard_integer_type
12014 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
12015 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12023 /* vectorizable_condition.
12025 Check if STMT_INFO is conditional modify expression that can be vectorized.
12026 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12027 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
12030 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
12032 Return true if STMT_INFO is vectorizable in this way. */
12035 vectorizable_condition (vec_info
*vinfo
,
12036 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
12038 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
12040 tree scalar_dest
= NULL_TREE
;
12041 tree vec_dest
= NULL_TREE
;
12042 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
12043 tree then_clause
, else_clause
;
12044 tree comp_vectype
= NULL_TREE
;
12045 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
12046 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
12049 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
12050 enum vect_def_type dts
[4]
12051 = {vect_unknown_def_type
, vect_unknown_def_type
,
12052 vect_unknown_def_type
, vect_unknown_def_type
};
12056 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
12058 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
12059 vec
<tree
> vec_oprnds0
= vNULL
;
12060 vec
<tree
> vec_oprnds1
= vNULL
;
12061 vec
<tree
> vec_oprnds2
= vNULL
;
12062 vec
<tree
> vec_oprnds3
= vNULL
;
12064 bool masked
= false;
12066 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
12069 /* Is vectorizable conditional operation? */
12070 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
12074 code
= gimple_assign_rhs_code (stmt
);
12075 if (code
!= COND_EXPR
)
12078 stmt_vec_info reduc_info
= NULL
;
12079 int reduc_index
= -1;
12080 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
12082 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
12087 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
12088 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
12089 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
12090 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
12091 || reduc_index
!= -1);
12095 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
12099 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12100 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
12105 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
12109 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
12113 gcc_assert (ncopies
>= 1);
12114 if (for_reduction
&& ncopies
> 1)
12115 return false; /* FORNOW */
12117 cond_expr
= gimple_assign_rhs1 (stmt
);
12119 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
12120 &comp_vectype
, &dts
[0], vectype
)
12124 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
12125 slp_tree then_slp_node
, else_slp_node
;
12126 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
12127 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
12129 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
12130 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
12133 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
12136 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
12139 masked
= !COMPARISON_CLASS_P (cond_expr
);
12140 vec_cmp_type
= truth_type_for (comp_vectype
);
12142 if (vec_cmp_type
== NULL_TREE
)
12145 cond_code
= TREE_CODE (cond_expr
);
12148 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
12149 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
12152 /* For conditional reductions, the "then" value needs to be the candidate
12153 value calculated by this iteration while the "else" value needs to be
12154 the result carried over from previous iterations. If the COND_EXPR
12155 is the other way around, we need to swap it. */
12156 bool must_invert_cmp_result
= false;
12157 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
12160 must_invert_cmp_result
= true;
12163 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
12164 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
12165 if (new_code
== ERROR_MARK
)
12166 must_invert_cmp_result
= true;
12169 cond_code
= new_code
;
12170 /* Make sure we don't accidentally use the old condition. */
12171 cond_expr
= NULL_TREE
;
12174 std::swap (then_clause
, else_clause
);
12177 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
12179 /* Boolean values may have another representation in vectors
12180 and therefore we prefer bit operations over comparison for
12181 them (which also works for scalar masks). We store opcodes
12182 to use in bitop1 and bitop2. Statement is vectorized as
12183 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12184 depending on bitop1 and bitop2 arity. */
12188 bitop1
= BIT_NOT_EXPR
;
12189 bitop2
= BIT_AND_EXPR
;
12192 bitop1
= BIT_NOT_EXPR
;
12193 bitop2
= BIT_IOR_EXPR
;
12196 bitop1
= BIT_NOT_EXPR
;
12197 bitop2
= BIT_AND_EXPR
;
12198 std::swap (cond_expr0
, cond_expr1
);
12201 bitop1
= BIT_NOT_EXPR
;
12202 bitop2
= BIT_IOR_EXPR
;
12203 std::swap (cond_expr0
, cond_expr1
);
12206 bitop1
= BIT_XOR_EXPR
;
12209 bitop1
= BIT_XOR_EXPR
;
12210 bitop2
= BIT_NOT_EXPR
;
12215 cond_code
= SSA_NAME
;
12218 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
12219 && reduction_type
== EXTRACT_LAST_REDUCTION
12220 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
12222 if (dump_enabled_p ())
12223 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12224 "reduction comparison operation not supported.\n");
12230 if (bitop1
!= NOP_EXPR
)
12232 machine_mode mode
= TYPE_MODE (comp_vectype
);
12235 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
12236 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12239 if (bitop2
!= NOP_EXPR
)
12241 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
12243 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12248 vect_cost_for_stmt kind
= vector_stmt
;
12249 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12250 /* Count one reduction-like operation per vector. */
12251 kind
= vec_to_scalar
;
12252 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
)
12254 || (!expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
,
12256 || !expand_vec_cond_expr_p (vectype
, vec_cmp_type
,
12261 && (!vect_maybe_update_slp_op_vectype
12262 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
12264 && !vect_maybe_update_slp_op_vectype
12265 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
12266 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
12267 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
12269 if (dump_enabled_p ())
12270 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12271 "incompatible vector types for invariants\n");
12275 if (loop_vinfo
&& for_reduction
12276 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
12278 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12280 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST
,
12281 vectype
, OPTIMIZE_FOR_SPEED
))
12282 vect_record_loop_len (loop_vinfo
,
12283 &LOOP_VINFO_LENS (loop_vinfo
),
12284 ncopies
* vec_num
, vectype
, 1);
12286 vect_record_loop_mask (loop_vinfo
,
12287 &LOOP_VINFO_MASKS (loop_vinfo
),
12288 ncopies
* vec_num
, vectype
, NULL
);
12290 /* Extra inactive lanes should be safe for vect_nested_cycle. */
12291 else if (STMT_VINFO_DEF_TYPE (reduc_info
) != vect_nested_cycle
)
12293 if (dump_enabled_p ())
12294 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12295 "conditional reduction prevents the use"
12296 " of partial vectors.\n");
12297 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
12301 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
12302 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
12310 scalar_dest
= gimple_assign_lhs (stmt
);
12311 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
12312 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
12314 bool swap_cond_operands
= false;
12316 /* See whether another part of the vectorized code applies a loop
12317 mask to the condition, or to its inverse. */
12319 vec_loop_masks
*masks
= NULL
;
12320 vec_loop_lens
*lens
= NULL
;
12321 if (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
))
12323 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12324 lens
= &LOOP_VINFO_LENS (loop_vinfo
);
12326 else if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
12328 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12329 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12332 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
12333 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
12334 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12337 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
12338 tree_code orig_code
= cond
.code
;
12339 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
12340 if (!masked
&& loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
12342 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12343 cond_code
= cond
.code
;
12344 swap_cond_operands
= true;
12348 /* Try the inverse of the current mask. We check if the
12349 inverse mask is live and if so we generate a negate of
12350 the current mask such that we still honor NaNs. */
12351 cond
.inverted_p
= true;
12352 cond
.code
= orig_code
;
12353 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
12355 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12356 cond_code
= cond
.code
;
12357 swap_cond_operands
= true;
12358 must_invert_cmp_result
= true;
12365 /* Handle cond expr. */
12367 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
12368 cond_expr
, comp_vectype
, &vec_oprnds0
,
12369 then_clause
, vectype
, &vec_oprnds2
,
12370 reduction_type
!= EXTRACT_LAST_REDUCTION
12371 ? else_clause
: NULL
, vectype
, &vec_oprnds3
);
12373 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
12374 cond_expr0
, comp_vectype
, &vec_oprnds0
,
12375 cond_expr1
, comp_vectype
, &vec_oprnds1
,
12376 then_clause
, vectype
, &vec_oprnds2
,
12377 reduction_type
!= EXTRACT_LAST_REDUCTION
12378 ? else_clause
: NULL
, vectype
, &vec_oprnds3
);
12380 /* Arguments are ready. Create the new vector stmt. */
12381 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
12383 vec_then_clause
= vec_oprnds2
[i
];
12384 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
12385 vec_else_clause
= vec_oprnds3
[i
];
12387 if (swap_cond_operands
)
12388 std::swap (vec_then_clause
, vec_else_clause
);
12391 vec_compare
= vec_cond_lhs
;
12394 vec_cond_rhs
= vec_oprnds1
[i
];
12395 if (bitop1
== NOP_EXPR
)
12397 gimple_seq stmts
= NULL
;
12398 vec_compare
= gimple_build (&stmts
, cond_code
, vec_cmp_type
,
12399 vec_cond_lhs
, vec_cond_rhs
);
12400 gsi_insert_before (gsi
, stmts
, GSI_SAME_STMT
);
12404 new_temp
= make_ssa_name (vec_cmp_type
);
12406 if (bitop1
== BIT_NOT_EXPR
)
12407 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
12411 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
12413 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12414 if (bitop2
== NOP_EXPR
)
12415 vec_compare
= new_temp
;
12416 else if (bitop2
== BIT_NOT_EXPR
12417 && reduction_type
!= EXTRACT_LAST_REDUCTION
)
12419 /* Instead of doing ~x ? y : z do x ? z : y. */
12420 vec_compare
= new_temp
;
12421 std::swap (vec_then_clause
, vec_else_clause
);
12425 vec_compare
= make_ssa_name (vec_cmp_type
);
12426 if (bitop2
== BIT_NOT_EXPR
)
12428 = gimple_build_assign (vec_compare
, bitop2
, new_temp
);
12431 = gimple_build_assign (vec_compare
, bitop2
,
12432 vec_cond_lhs
, new_temp
);
12433 vect_finish_stmt_generation (vinfo
, stmt_info
,
12439 /* If we decided to apply a loop mask to the result of the vector
12440 comparison, AND the comparison with the mask now. Later passes
12441 should then be able to reuse the AND results between mulitple
12445 for (int i = 0; i < 100; ++i)
12446 x[i] = y[i] ? z[i] : 10;
12448 results in following optimized GIMPLE:
12450 mask__35.8_43 = vect__4.7_41 != { 0, ... };
12451 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12452 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12453 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12454 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12455 vect_iftmp.11_47, { 10, ... }>;
12457 instead of using a masked and unmasked forms of
12458 vec != { 0, ... } (masked in the MASK_LOAD,
12459 unmasked in the VEC_COND_EXPR). */
12461 /* Force vec_compare to be an SSA_NAME rather than a comparison,
12462 in cases where that's necessary. */
12464 tree len
= NULL_TREE
, bias
= NULL_TREE
;
12465 if (masks
|| lens
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
12467 if (!is_gimple_val (vec_compare
))
12469 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
12470 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
12472 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12473 vec_compare
= vec_compare_name
;
12476 if (must_invert_cmp_result
)
12478 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
12479 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
12482 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12483 vec_compare
= vec_compare_name
;
12486 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST
,
12487 vectype
, OPTIMIZE_FOR_SPEED
))
12491 len
= vect_get_loop_len (loop_vinfo
, gsi
, lens
,
12492 vec_num
* ncopies
, vectype
, i
, 1);
12493 signed char biasval
12494 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
12495 bias
= build_int_cst (intQI_type_node
, biasval
);
12499 len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
12500 bias
= build_int_cst (intQI_type_node
, 0);
12506 = vect_get_loop_mask (loop_vinfo
, gsi
, masks
, vec_num
* ncopies
,
12508 tree tmp2
= make_ssa_name (vec_cmp_type
);
12510 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
12512 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
12513 vec_compare
= tmp2
;
12518 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12520 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
12521 tree lhs
= gimple_get_lhs (old_stmt
);
12523 new_stmt
= gimple_build_call_internal
12524 (IFN_LEN_FOLD_EXTRACT_LAST
, 5, else_clause
, vec_compare
,
12525 vec_then_clause
, len
, bias
);
12527 new_stmt
= gimple_build_call_internal
12528 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
12530 gimple_call_set_lhs (new_stmt
, lhs
);
12531 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
12532 if (old_stmt
== gsi_stmt (*gsi
))
12533 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
12536 /* In this case we're moving the definition to later in the
12537 block. That doesn't matter because the only uses of the
12538 lhs are in phi statements. */
12539 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
12540 gsi_remove (&old_gsi
, true);
12541 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12546 new_temp
= make_ssa_name (vec_dest
);
12547 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
12548 vec_then_clause
, vec_else_clause
);
12549 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12552 slp_node
->push_vec_def (new_stmt
);
12554 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
12558 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
12560 vec_oprnds0
.release ();
12561 vec_oprnds1
.release ();
12562 vec_oprnds2
.release ();
12563 vec_oprnds3
.release ();
12568 /* Helper of vectorizable_comparison.
12570 Check if STMT_INFO is comparison expression CODE that can be vectorized.
12571 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12572 comparison, put it in VEC_STMT, and insert it at GSI.
12574 Return true if STMT_INFO is vectorizable in this way. */
12577 vectorizable_comparison_1 (vec_info
*vinfo
, tree vectype
,
12578 stmt_vec_info stmt_info
, tree_code code
,
12579 gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
12580 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
12582 tree lhs
, rhs1
, rhs2
;
12583 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
12584 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
12586 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
12587 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
12589 poly_uint64 nunits
;
12591 enum tree_code bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
12593 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
12594 vec
<tree
> vec_oprnds0
= vNULL
;
12595 vec
<tree
> vec_oprnds1
= vNULL
;
12597 tree mask
= NULL_TREE
;
12599 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
12602 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
12605 mask_type
= vectype
;
12606 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
12611 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
12613 gcc_assert (ncopies
>= 1);
12615 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
12618 slp_tree slp_rhs1
, slp_rhs2
;
12619 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
12620 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
12623 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
12624 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
12627 if (vectype1
&& vectype2
12628 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
12629 TYPE_VECTOR_SUBPARTS (vectype2
)))
12632 vectype
= vectype1
? vectype1
: vectype2
;
12634 /* Invariant comparison. */
12637 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
12638 vectype
= mask_type
;
12640 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
12642 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
12645 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
12648 /* Can't compare mask and non-mask types. */
12649 if (vectype1
&& vectype2
12650 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
12653 /* Boolean values may have another representation in vectors
12654 and therefore we prefer bit operations over comparison for
12655 them (which also works for scalar masks). We store opcodes
12656 to use in bitop1 and bitop2. Statement is vectorized as
12657 BITOP2 (rhs1 BITOP1 rhs2) or
12658 rhs1 BITOP2 (BITOP1 rhs2)
12659 depending on bitop1 and bitop2 arity. */
12660 bool swap_p
= false;
12661 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
12663 if (code
== GT_EXPR
)
12665 bitop1
= BIT_NOT_EXPR
;
12666 bitop2
= BIT_AND_EXPR
;
12668 else if (code
== GE_EXPR
)
12670 bitop1
= BIT_NOT_EXPR
;
12671 bitop2
= BIT_IOR_EXPR
;
12673 else if (code
== LT_EXPR
)
12675 bitop1
= BIT_NOT_EXPR
;
12676 bitop2
= BIT_AND_EXPR
;
12679 else if (code
== LE_EXPR
)
12681 bitop1
= BIT_NOT_EXPR
;
12682 bitop2
= BIT_IOR_EXPR
;
12687 bitop1
= BIT_XOR_EXPR
;
12688 if (code
== EQ_EXPR
)
12689 bitop2
= BIT_NOT_EXPR
;
12695 if (bitop1
== NOP_EXPR
)
12697 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
12702 machine_mode mode
= TYPE_MODE (vectype
);
12705 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
12706 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12709 if (bitop2
!= NOP_EXPR
)
12711 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
12712 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12717 /* Put types on constant and invariant SLP children. */
12719 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
12720 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
12722 if (dump_enabled_p ())
12723 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12724 "incompatible vector types for invariants\n");
12728 vect_model_simple_cost (vinfo
, stmt_info
,
12729 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
12730 dts
, ndts
, slp_node
, cost_vec
);
12737 lhs
= gimple_get_lhs (STMT_VINFO_STMT (stmt_info
));
12739 mask
= vect_create_destination_var (lhs
, mask_type
);
12741 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
12742 rhs1
, vectype
, &vec_oprnds0
,
12743 rhs2
, vectype
, &vec_oprnds1
);
12745 std::swap (vec_oprnds0
, vec_oprnds1
);
12747 /* Arguments are ready. Create the new vector stmt. */
12748 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
12751 vec_rhs2
= vec_oprnds1
[i
];
12754 new_temp
= make_ssa_name (mask
);
12756 new_temp
= make_temp_ssa_name (mask_type
, NULL
, "cmp");
12757 if (bitop1
== NOP_EXPR
)
12759 new_stmt
= gimple_build_assign (new_temp
, code
,
12760 vec_rhs1
, vec_rhs2
);
12761 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12765 if (bitop1
== BIT_NOT_EXPR
)
12766 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
12768 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
12770 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12771 if (bitop2
!= NOP_EXPR
)
12773 tree res
= make_ssa_name (mask
);
12774 if (bitop2
== BIT_NOT_EXPR
)
12775 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
12777 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
12779 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12783 slp_node
->push_vec_def (new_stmt
);
12785 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
12789 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
12791 vec_oprnds0
.release ();
12792 vec_oprnds1
.release ();
12797 /* vectorizable_comparison.
12799 Check if STMT_INFO is comparison expression that can be vectorized.
12800 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12801 comparison, put it in VEC_STMT, and insert it at GSI.
12803 Return true if STMT_INFO is vectorizable in this way. */
12806 vectorizable_comparison (vec_info
*vinfo
,
12807 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
12809 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
12811 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
12813 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
12816 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
12819 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
12823 enum tree_code code
= gimple_assign_rhs_code (stmt
);
12824 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12825 if (!vectorizable_comparison_1 (vinfo
, vectype
, stmt_info
, code
, gsi
,
12826 vec_stmt
, slp_node
, cost_vec
))
12830 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
12835 /* Check to see if the current early break given in STMT_INFO is valid for
12839 vectorizable_early_exit (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12840 gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
12841 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
12843 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
12845 || !is_a
<gcond
*> (STMT_VINFO_STMT (stmt_info
)))
12848 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_condition_def
)
12851 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
12854 DUMP_VECT_SCOPE ("vectorizable_early_exit");
12856 auto code
= gimple_cond_code (STMT_VINFO_STMT (stmt_info
));
12858 tree vectype
= NULL_TREE
;
12861 enum vect_def_type dt0
;
12862 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op0
, &slp_op0
, &dt0
,
12865 if (dump_enabled_p ())
12866 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12867 "use not simple.\n");
12874 machine_mode mode
= TYPE_MODE (vectype
);
12880 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
12882 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12883 bool masked_loop_p
= LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
12885 /* Now build the new conditional. Pattern gimple_conds get dropped during
12886 codegen so we must replace the original insn. */
12887 gimple
*orig_stmt
= STMT_VINFO_STMT (vect_orig_stmt (stmt_info
));
12888 gcond
*cond_stmt
= as_a
<gcond
*>(orig_stmt
);
12889 /* When vectorizing we assume that if the branch edge is taken that we're
12890 exiting the loop. This is not however always the case as the compiler will
12891 rewrite conditions to always be a comparison against 0. To do this it
12892 sometimes flips the edges. This is fine for scalar, but for vector we
12893 then have to flip the test, as we're still assuming that if you take the
12894 branch edge that we found the exit condition. i.e. we need to know whether
12895 we are generating a `forall` or an `exist` condition. */
12896 auto new_code
= NE_EXPR
;
12897 auto reduc_optab
= ior_optab
;
12898 auto reduc_op
= BIT_IOR_EXPR
;
12899 tree cst
= build_zero_cst (vectype
);
12900 edge exit_true_edge
= EDGE_SUCC (gimple_bb (cond_stmt
), 0);
12901 if (exit_true_edge
->flags
& EDGE_FALSE_VALUE
)
12902 exit_true_edge
= EDGE_SUCC (gimple_bb (cond_stmt
), 1);
12903 gcc_assert (exit_true_edge
->flags
& EDGE_TRUE_VALUE
);
12904 if (flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo
),
12905 exit_true_edge
->dest
))
12907 new_code
= EQ_EXPR
;
12908 reduc_optab
= and_optab
;
12909 reduc_op
= BIT_AND_EXPR
;
12910 cst
= build_minus_one_cst (vectype
);
12913 /* Analyze only. */
12916 if (direct_optab_handler (cbranch_optab
, mode
) == CODE_FOR_nothing
)
12918 if (dump_enabled_p ())
12919 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12920 "can't vectorize early exit because the "
12921 "target doesn't support flag setting vector "
12927 && direct_optab_handler (reduc_optab
, mode
) == CODE_FOR_nothing
)
12929 if (dump_enabled_p ())
12930 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12931 "can't vectorize early exit because the "
12932 "target does not support boolean vector %s "
12934 reduc_optab
== ior_optab
? "OR" : "AND",
12939 if (!vectorizable_comparison_1 (vinfo
, vectype
, stmt_info
, code
, gsi
,
12940 vec_stmt
, slp_node
, cost_vec
))
12943 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
12945 if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN
, vectype
,
12946 OPTIMIZE_FOR_SPEED
))
12949 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, NULL
);
12958 tree new_temp
= NULL_TREE
;
12959 gimple
*new_stmt
= NULL
;
12961 if (dump_enabled_p ())
12962 dump_printf_loc (MSG_NOTE
, vect_location
, "transform early-exit.\n");
12964 if (!vectorizable_comparison_1 (vinfo
, vectype
, stmt_info
, code
, gsi
,
12965 vec_stmt
, slp_node
, cost_vec
))
12966 gcc_unreachable ();
12968 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
12969 basic_block cond_bb
= gimple_bb (stmt
);
12970 gimple_stmt_iterator cond_gsi
= gsi_last_bb (cond_bb
);
12972 auto_vec
<tree
> stmts
;
12975 stmts
.safe_splice (SLP_TREE_VEC_DEFS (slp_node
));
12978 auto vec_stmts
= STMT_VINFO_VEC_STMTS (stmt_info
);
12979 stmts
.reserve_exact (vec_stmts
.length ());
12980 for (auto stmt
: vec_stmts
)
12981 stmts
.quick_push (gimple_assign_lhs (stmt
));
12984 /* Determine if we need to reduce the final value. */
12985 if (stmts
.length () > 1)
12987 /* We build the reductions in a way to maintain as much parallelism as
12989 auto_vec
<tree
> workset (stmts
.length ());
12991 /* Mask the statements as we queue them up. Normally we loop over
12992 vec_num, but since we inspect the exact results of vectorization
12993 we don't need to and instead can just use the stmts themselves. */
12995 for (unsigned i
= 0; i
< stmts
.length (); i
++)
12998 = vect_get_loop_mask (loop_vinfo
, gsi
, masks
, ncopies
, vectype
,
13001 = prepare_vec_mask (loop_vinfo
, TREE_TYPE (stmt_mask
), stmt_mask
,
13002 stmts
[i
], &cond_gsi
);
13003 workset
.quick_push (stmt_mask
);
13006 workset
.splice (stmts
);
13008 while (workset
.length () > 1)
13010 new_temp
= make_temp_ssa_name (vectype
, NULL
, "vexit_reduc");
13011 tree arg0
= workset
.pop ();
13012 tree arg1
= workset
.pop ();
13013 new_stmt
= gimple_build_assign (new_temp
, reduc_op
, arg0
, arg1
);
13014 vect_finish_stmt_generation (loop_vinfo
, stmt_info
, new_stmt
,
13016 workset
.quick_insert (0, new_temp
);
13021 new_temp
= stmts
[0];
13025 = vect_get_loop_mask (loop_vinfo
, gsi
, masks
, ncopies
, vectype
, 0);
13026 new_temp
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
13027 new_temp
, &cond_gsi
);
13031 gcc_assert (new_temp
);
13033 gimple_cond_set_condition (cond_stmt
, new_code
, new_temp
, cst
);
13034 update_stmt (orig_stmt
);
13037 SLP_TREE_VEC_DEFS (slp_node
).truncate (0);
13039 STMT_VINFO_VEC_STMTS (stmt_info
).truncate (0);
13042 *vec_stmt
= orig_stmt
;
13047 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
13048 can handle all live statements in the node. Otherwise return true
13049 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
13050 VEC_STMT_P is as for vectorizable_live_operation. */
13053 can_vectorize_live_stmts (vec_info
*vinfo
, stmt_vec_info stmt_info
,
13054 slp_tree slp_node
, slp_instance slp_node_instance
,
13056 stmt_vector_for_cost
*cost_vec
)
13058 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
13061 stmt_vec_info slp_stmt_info
;
13063 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
13065 if ((STMT_VINFO_LIVE_P (slp_stmt_info
)
13067 && LOOP_VINFO_EARLY_BREAKS (loop_vinfo
)
13068 && STMT_VINFO_DEF_TYPE (slp_stmt_info
)
13069 == vect_induction_def
))
13070 && !vectorizable_live_operation (vinfo
, slp_stmt_info
, slp_node
,
13071 slp_node_instance
, i
,
13072 vec_stmt_p
, cost_vec
))
13076 else if ((STMT_VINFO_LIVE_P (stmt_info
)
13077 || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo
)
13078 && STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
))
13079 && !vectorizable_live_operation (vinfo
, stmt_info
,
13080 slp_node
, slp_node_instance
, -1,
13081 vec_stmt_p
, cost_vec
))
13087 /* Make sure the statement is vectorizable. */
13090 vect_analyze_stmt (vec_info
*vinfo
,
13091 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
13092 slp_tree node
, slp_instance node_instance
,
13093 stmt_vector_for_cost
*cost_vec
)
13095 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
13096 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
13098 gimple_seq pattern_def_seq
;
13100 if (dump_enabled_p ())
13101 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
13104 if (gimple_has_volatile_ops (stmt_info
->stmt
))
13105 return opt_result::failure_at (stmt_info
->stmt
,
13107 " stmt has volatile operands: %G\n",
13110 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
13112 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
13114 gimple_stmt_iterator si
;
13116 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
13118 stmt_vec_info pattern_def_stmt_info
13119 = vinfo
->lookup_stmt (gsi_stmt (si
));
13120 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
13121 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
13123 /* Analyze def stmt of STMT if it's a pattern stmt. */
13124 if (dump_enabled_p ())
13125 dump_printf_loc (MSG_NOTE
, vect_location
,
13126 "==> examining pattern def statement: %G",
13127 pattern_def_stmt_info
->stmt
);
13130 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
13131 need_to_vectorize
, node
, node_instance
,
13139 /* Skip stmts that do not need to be vectorized. In loops this is expected
13141 - the COND_EXPR which is the loop exit condition
13142 - any LABEL_EXPRs in the loop
13143 - computations that are used only for array indexing or loop control.
13144 In basic blocks we only analyze statements that are a part of some SLP
13145 instance, therefore, all the statements are relevant.
13147 Pattern statement needs to be analyzed instead of the original statement
13148 if the original statement is not relevant. Otherwise, we analyze both
13149 statements. In basic blocks we are called from some SLP instance
13150 traversal, don't analyze pattern stmts instead, the pattern stmts
13151 already will be part of SLP instance. */
13153 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
13154 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
13155 && !STMT_VINFO_LIVE_P (stmt_info
))
13157 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
13158 && pattern_stmt_info
13159 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
13160 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
13162 /* Analyze PATTERN_STMT instead of the original stmt. */
13163 stmt_info
= pattern_stmt_info
;
13164 if (dump_enabled_p ())
13165 dump_printf_loc (MSG_NOTE
, vect_location
,
13166 "==> examining pattern statement: %G",
13171 if (dump_enabled_p ())
13172 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
13174 return opt_result::success ();
13177 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
13179 && pattern_stmt_info
13180 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
13181 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
13183 /* Analyze PATTERN_STMT too. */
13184 if (dump_enabled_p ())
13185 dump_printf_loc (MSG_NOTE
, vect_location
,
13186 "==> examining pattern statement: %G",
13187 pattern_stmt_info
->stmt
);
13190 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
13191 node_instance
, cost_vec
);
13196 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
13198 case vect_internal_def
:
13199 case vect_condition_def
:
13202 case vect_reduction_def
:
13203 case vect_nested_cycle
:
13204 gcc_assert (!bb_vinfo
13205 && (relevance
== vect_used_in_outer
13206 || relevance
== vect_used_in_outer_by_reduction
13207 || relevance
== vect_used_by_reduction
13208 || relevance
== vect_unused_in_scope
13209 || relevance
== vect_used_only_live
));
13212 case vect_induction_def
:
13213 case vect_first_order_recurrence
:
13214 gcc_assert (!bb_vinfo
);
13217 case vect_constant_def
:
13218 case vect_external_def
:
13219 case vect_unknown_def_type
:
13221 gcc_unreachable ();
13224 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
13226 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (node
);
13228 if (STMT_VINFO_RELEVANT_P (stmt_info
))
13230 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
13231 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
13232 || gimple_code (stmt_info
->stmt
) == GIMPLE_COND
13233 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
13234 *need_to_vectorize
= true;
13237 if (PURE_SLP_STMT (stmt_info
) && !node
)
13239 if (dump_enabled_p ())
13240 dump_printf_loc (MSG_NOTE
, vect_location
,
13241 "handled only by SLP analysis\n");
13242 return opt_result::success ();
13247 && (STMT_VINFO_RELEVANT_P (stmt_info
)
13248 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
13249 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
13250 -mveclibabi= takes preference over library functions with
13251 the simd attribute. */
13252 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
13253 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
13255 || vectorizable_conversion (vinfo
, stmt_info
,
13256 NULL
, NULL
, node
, cost_vec
)
13257 || vectorizable_operation (vinfo
, stmt_info
,
13258 NULL
, NULL
, node
, cost_vec
)
13259 || vectorizable_assignment (vinfo
, stmt_info
,
13260 NULL
, NULL
, node
, cost_vec
)
13261 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
13262 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
13263 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
13264 node
, node_instance
, cost_vec
)
13265 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
13266 NULL
, node
, cost_vec
)
13267 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
13268 || vectorizable_condition (vinfo
, stmt_info
,
13269 NULL
, NULL
, node
, cost_vec
)
13270 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
13272 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
13273 stmt_info
, NULL
, node
)
13274 || vectorizable_recurr (as_a
<loop_vec_info
> (vinfo
),
13275 stmt_info
, NULL
, node
, cost_vec
)
13276 || vectorizable_early_exit (vinfo
, stmt_info
, NULL
, NULL
, node
,
13281 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
13282 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
13283 NULL
, NULL
, node
, cost_vec
)
13284 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
13286 || vectorizable_shift (vinfo
, stmt_info
,
13287 NULL
, NULL
, node
, cost_vec
)
13288 || vectorizable_operation (vinfo
, stmt_info
,
13289 NULL
, NULL
, node
, cost_vec
)
13290 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
13292 || vectorizable_load (vinfo
, stmt_info
,
13293 NULL
, NULL
, node
, cost_vec
)
13294 || vectorizable_store (vinfo
, stmt_info
,
13295 NULL
, NULL
, node
, cost_vec
)
13296 || vectorizable_condition (vinfo
, stmt_info
,
13297 NULL
, NULL
, node
, cost_vec
)
13298 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
13300 || vectorizable_phi (vinfo
, stmt_info
, NULL
, node
, cost_vec
)
13301 || vectorizable_early_exit (vinfo
, stmt_info
, NULL
, NULL
, node
,
13307 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
13310 return opt_result::failure_at (stmt_info
->stmt
,
13312 " relevant stmt not supported: %G",
13315 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
13316 need extra handling, except for vectorizable reductions. */
13318 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
13319 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
13320 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
13321 stmt_info
, node
, node_instance
,
13323 return opt_result::failure_at (stmt_info
->stmt
,
13325 " live stmt not supported: %G",
13328 return opt_result::success ();
13332 /* Function vect_transform_stmt.
13334 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
13337 vect_transform_stmt (vec_info
*vinfo
,
13338 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
13339 slp_tree slp_node
, slp_instance slp_node_instance
)
13341 bool is_store
= false;
13342 gimple
*vec_stmt
= NULL
;
13345 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
13347 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
13349 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (slp_node
);
13351 switch (STMT_VINFO_TYPE (stmt_info
))
13353 case type_demotion_vec_info_type
:
13354 case type_promotion_vec_info_type
:
13355 case type_conversion_vec_info_type
:
13356 done
= vectorizable_conversion (vinfo
, stmt_info
,
13357 gsi
, &vec_stmt
, slp_node
, NULL
);
13361 case induc_vec_info_type
:
13362 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
13363 stmt_info
, &vec_stmt
, slp_node
,
13368 case shift_vec_info_type
:
13369 done
= vectorizable_shift (vinfo
, stmt_info
,
13370 gsi
, &vec_stmt
, slp_node
, NULL
);
13374 case op_vec_info_type
:
13375 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
13380 case assignment_vec_info_type
:
13381 done
= vectorizable_assignment (vinfo
, stmt_info
,
13382 gsi
, &vec_stmt
, slp_node
, NULL
);
13386 case load_vec_info_type
:
13387 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
13392 case store_vec_info_type
:
13393 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
13395 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))
13396 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info
))))
13397 /* In case of interleaving, the whole chain is vectorized when the
13398 last store in the chain is reached. Store stmts before the last
13399 one are skipped, and there vec_stmt_info shouldn't be freed
13404 done
= vectorizable_store (vinfo
, stmt_info
,
13405 gsi
, &vec_stmt
, slp_node
, NULL
);
13411 case condition_vec_info_type
:
13412 done
= vectorizable_condition (vinfo
, stmt_info
,
13413 gsi
, &vec_stmt
, slp_node
, NULL
);
13417 case comparison_vec_info_type
:
13418 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
13423 case call_vec_info_type
:
13424 done
= vectorizable_call (vinfo
, stmt_info
,
13425 gsi
, &vec_stmt
, slp_node
, NULL
);
13428 case call_simd_clone_vec_info_type
:
13429 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
13433 case reduc_vec_info_type
:
13434 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
13435 gsi
, &vec_stmt
, slp_node
);
13439 case cycle_phi_info_type
:
13440 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
13441 &vec_stmt
, slp_node
, slp_node_instance
);
13445 case lc_phi_info_type
:
13446 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
13447 stmt_info
, &vec_stmt
, slp_node
);
13451 case recurr_info_type
:
13452 done
= vectorizable_recurr (as_a
<loop_vec_info
> (vinfo
),
13453 stmt_info
, &vec_stmt
, slp_node
, NULL
);
13457 case phi_info_type
:
13458 done
= vectorizable_phi (vinfo
, stmt_info
, &vec_stmt
, slp_node
, NULL
);
13462 case loop_exit_ctrl_vec_info_type
:
13463 done
= vectorizable_early_exit (vinfo
, stmt_info
, gsi
, &vec_stmt
,
13469 if (!STMT_VINFO_LIVE_P (stmt_info
))
13471 if (dump_enabled_p ())
13472 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
13473 "stmt not supported.\n");
13474 gcc_unreachable ();
13479 if (!slp_node
&& vec_stmt
)
13480 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
13482 if (STMT_VINFO_TYPE (stmt_info
) != store_vec_info_type
)
13484 /* Handle stmts whose DEF is used outside the loop-nest that is
13485 being vectorized. */
13486 done
= can_vectorize_live_stmts (vinfo
, stmt_info
, slp_node
,
13487 slp_node_instance
, true, NULL
);
13492 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
13498 /* Remove a group of stores (for SLP or interleaving), free their
13502 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
13504 stmt_vec_info next_stmt_info
= first_stmt_info
;
13506 while (next_stmt_info
)
13508 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
13509 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
13510 /* Free the attached stmt_vec_info and remove the stmt. */
13511 vinfo
->remove_stmt (next_stmt_info
);
13512 next_stmt_info
= tmp
;
13516 /* If NUNITS is nonzero, return a vector type that contains NUNITS
13517 elements of type SCALAR_TYPE, or null if the target doesn't support
13520 If NUNITS is zero, return a vector type that contains elements of
13521 type SCALAR_TYPE, choosing whichever vector size the target prefers.
13523 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13524 for this vectorization region and want to "autodetect" the best choice.
13525 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13526 and we want the new type to be interoperable with it. PREVAILING_MODE
13527 in this case can be a scalar integer mode or a vector mode; when it
13528 is a vector mode, the function acts like a tree-level version of
13529 related_vector_mode. */
13532 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
13533 tree scalar_type
, poly_uint64 nunits
)
13535 tree orig_scalar_type
= scalar_type
;
13536 scalar_mode inner_mode
;
13537 machine_mode simd_mode
;
13540 if ((!INTEGRAL_TYPE_P (scalar_type
)
13541 && !POINTER_TYPE_P (scalar_type
)
13542 && !SCALAR_FLOAT_TYPE_P (scalar_type
))
13543 || (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
13544 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
)))
13547 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
13549 /* Interoperability between modes requires one to be a constant multiple
13550 of the other, so that the number of vectors required for each operation
13551 is a compile-time constant. */
13552 if (prevailing_mode
!= VOIDmode
13553 && !constant_multiple_p (nunits
* nbytes
,
13554 GET_MODE_SIZE (prevailing_mode
))
13555 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode
),
13559 /* For vector types of elements whose mode precision doesn't
13560 match their types precision we use a element type of mode
13561 precision. The vectorization routines will have to make sure
13562 they support the proper result truncation/extension.
13563 We also make sure to build vector types with INTEGER_TYPE
13564 component type only. */
13565 if (INTEGRAL_TYPE_P (scalar_type
)
13566 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
13567 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
13568 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
13569 TYPE_UNSIGNED (scalar_type
));
13571 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13572 When the component mode passes the above test simply use a type
13573 corresponding to that mode. The theory is that any use that
13574 would cause problems with this will disable vectorization anyway. */
13575 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
13576 && !INTEGRAL_TYPE_P (scalar_type
))
13577 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
13579 /* We can't build a vector type of elements with alignment bigger than
13581 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
13582 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
13583 TYPE_UNSIGNED (scalar_type
));
13585 /* If we felt back to using the mode fail if there was
13586 no scalar type for it. */
13587 if (scalar_type
== NULL_TREE
)
13590 /* If no prevailing mode was supplied, use the mode the target prefers.
13591 Otherwise lookup a vector mode based on the prevailing mode. */
13592 if (prevailing_mode
== VOIDmode
)
13594 gcc_assert (known_eq (nunits
, 0U));
13595 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
13596 if (SCALAR_INT_MODE_P (simd_mode
))
13598 /* Traditional behavior is not to take the integer mode
13599 literally, but simply to use it as a way of determining
13600 the vector size. It is up to mode_for_vector to decide
13601 what the TYPE_MODE should be.
13603 Note that nunits == 1 is allowed in order to support single
13604 element vector types. */
13605 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
13606 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
13610 else if (SCALAR_INT_MODE_P (prevailing_mode
)
13611 || !related_vector_mode (prevailing_mode
,
13612 inner_mode
, nunits
).exists (&simd_mode
))
13614 /* Fall back to using mode_for_vector, mostly in the hope of being
13615 able to use an integer mode. */
13616 if (known_eq (nunits
, 0U)
13617 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
13620 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
13624 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
13626 /* In cases where the mode was chosen by mode_for_vector, check that
13627 the target actually supports the chosen mode, or that it at least
13628 allows the vector mode to be replaced by a like-sized integer. */
13629 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
13630 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
13633 /* Re-attach the address-space qualifier if we canonicalized the scalar
13635 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
13636 return build_qualified_type
13637 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
13642 /* Function get_vectype_for_scalar_type.
13644 Returns the vector type corresponding to SCALAR_TYPE as supported
13645 by the target. If GROUP_SIZE is nonzero and we're performing BB
13646 vectorization, make sure that the number of elements in the vector
13647 is no bigger than GROUP_SIZE. */
13650 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
13651 unsigned int group_size
)
13653 /* For BB vectorization, we should always have a group size once we've
13654 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13655 are tentative requests during things like early data reference
13656 analysis and pattern recognition. */
13657 if (is_a
<bb_vec_info
> (vinfo
))
13658 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
13662 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
13664 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
13665 vinfo
->vector_mode
= TYPE_MODE (vectype
);
13667 /* Register the natural choice of vector type, before the group size
13668 has been applied. */
13670 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
13672 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13673 try again with an explicit number of elements. */
13676 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
13678 /* Start with the biggest number of units that fits within
13679 GROUP_SIZE and halve it until we find a valid vector type.
13680 Usually either the first attempt will succeed or all will
13681 fail (in the latter case because GROUP_SIZE is too small
13682 for the target), but it's possible that a target could have
13683 a hole between supported vector types.
13685 If GROUP_SIZE is not a power of 2, this has the effect of
13686 trying the largest power of 2 that fits within the group,
13687 even though the group is not a multiple of that vector size.
13688 The BB vectorizer will then try to carve up the group into
13690 unsigned int nunits
= 1 << floor_log2 (group_size
);
13693 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
13694 scalar_type
, nunits
);
13697 while (nunits
> 1 && !vectype
);
13703 /* Return the vector type corresponding to SCALAR_TYPE as supported
13704 by the target. NODE, if nonnull, is the SLP tree node that will
13705 use the returned vector type. */
13708 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
13710 unsigned int group_size
= 0;
13712 group_size
= SLP_TREE_LANES (node
);
13713 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
13716 /* Function get_mask_type_for_scalar_type.
13718 Returns the mask type corresponding to a result of comparison
13719 of vectors of specified SCALAR_TYPE as supported by target.
13720 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13721 make sure that the number of elements in the vector is no bigger
13722 than GROUP_SIZE. */
13725 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
13726 unsigned int group_size
)
13728 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
13733 return truth_type_for (vectype
);
13736 /* Function get_mask_type_for_scalar_type.
13738 Returns the mask type corresponding to a result of comparison
13739 of vectors of specified SCALAR_TYPE as supported by target.
13740 NODE, if nonnull, is the SLP tree node that will use the returned
13744 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
13747 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, node
);
13752 return truth_type_for (vectype
);
13755 /* Function get_same_sized_vectype
13757 Returns a vector type corresponding to SCALAR_TYPE of size
13758 VECTOR_TYPE if supported by the target. */
13761 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
13763 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
13764 return truth_type_for (vector_type
);
13766 poly_uint64 nunits
;
13767 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
13768 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
13771 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
13772 scalar_type
, nunits
);
13775 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13776 would not change the chosen vector modes. */
13779 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
13781 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
13782 i
!= vinfo
->used_vector_modes
.end (); ++i
)
13783 if (!VECTOR_MODE_P (*i
)
13784 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
13789 /* Function vect_is_simple_use.
13792 VINFO - the vect info of the loop or basic block that is being vectorized.
13793 OPERAND - operand in the loop or bb.
13795 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13796 case OPERAND is an SSA_NAME that is defined in the vectorizable region
13797 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13798 the definition could be anywhere in the function
13799 DT - the type of definition
13801 Returns whether a stmt with OPERAND can be vectorized.
13802 For loops, supportable operands are constants, loop invariants, and operands
13803 that are defined by the current iteration of the loop. Unsupportable
13804 operands are those that are defined by a previous iteration of the loop (as
13805 is the case in reduction/induction computations).
13806 For basic blocks, supportable operands are constants and bb invariants.
13807 For now, operands defined outside the basic block are not supported. */
13810 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
13811 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
13813 if (def_stmt_info_out
)
13814 *def_stmt_info_out
= NULL
;
13816 *def_stmt_out
= NULL
;
13817 *dt
= vect_unknown_def_type
;
13819 if (dump_enabled_p ())
13821 dump_printf_loc (MSG_NOTE
, vect_location
,
13822 "vect_is_simple_use: operand ");
13823 if (TREE_CODE (operand
) == SSA_NAME
13824 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
13825 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
13827 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
13830 if (CONSTANT_CLASS_P (operand
))
13831 *dt
= vect_constant_def
;
13832 else if (is_gimple_min_invariant (operand
))
13833 *dt
= vect_external_def
;
13834 else if (TREE_CODE (operand
) != SSA_NAME
)
13835 *dt
= vect_unknown_def_type
;
13836 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
13837 *dt
= vect_external_def
;
13840 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
13841 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
13843 *dt
= vect_external_def
;
13846 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
13847 def_stmt
= stmt_vinfo
->stmt
;
13848 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
13849 if (def_stmt_info_out
)
13850 *def_stmt_info_out
= stmt_vinfo
;
13853 *def_stmt_out
= def_stmt
;
13856 if (dump_enabled_p ())
13858 dump_printf (MSG_NOTE
, ", type of def: ");
13861 case vect_uninitialized_def
:
13862 dump_printf (MSG_NOTE
, "uninitialized\n");
13864 case vect_constant_def
:
13865 dump_printf (MSG_NOTE
, "constant\n");
13867 case vect_external_def
:
13868 dump_printf (MSG_NOTE
, "external\n");
13870 case vect_internal_def
:
13871 dump_printf (MSG_NOTE
, "internal\n");
13873 case vect_induction_def
:
13874 dump_printf (MSG_NOTE
, "induction\n");
13876 case vect_reduction_def
:
13877 dump_printf (MSG_NOTE
, "reduction\n");
13879 case vect_double_reduction_def
:
13880 dump_printf (MSG_NOTE
, "double reduction\n");
13882 case vect_nested_cycle
:
13883 dump_printf (MSG_NOTE
, "nested cycle\n");
13885 case vect_first_order_recurrence
:
13886 dump_printf (MSG_NOTE
, "first order recurrence\n");
13888 case vect_condition_def
:
13889 dump_printf (MSG_NOTE
, "control flow\n");
13891 case vect_unknown_def_type
:
13892 dump_printf (MSG_NOTE
, "unknown\n");
13897 if (*dt
== vect_unknown_def_type
)
13899 if (dump_enabled_p ())
13900 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
13901 "Unsupported pattern.\n");
13908 /* Function vect_is_simple_use.
13910 Same as vect_is_simple_use but also determines the vector operand
13911 type of OPERAND and stores it to *VECTYPE. If the definition of
13912 OPERAND is vect_uninitialized_def, vect_constant_def or
13913 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
13914 is responsible to compute the best suited vector type for the
13918 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
13919 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
13920 gimple
**def_stmt_out
)
13922 stmt_vec_info def_stmt_info
;
13924 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
13928 *def_stmt_out
= def_stmt
;
13929 if (def_stmt_info_out
)
13930 *def_stmt_info_out
= def_stmt_info
;
13932 /* Now get a vector type if the def is internal, otherwise supply
13933 NULL_TREE and leave it up to the caller to figure out a proper
13934 type for the use stmt. */
13935 if (*dt
== vect_internal_def
13936 || *dt
== vect_induction_def
13937 || *dt
== vect_reduction_def
13938 || *dt
== vect_double_reduction_def
13939 || *dt
== vect_nested_cycle
13940 || *dt
== vect_first_order_recurrence
)
13942 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
13943 gcc_assert (*vectype
!= NULL_TREE
);
13944 if (dump_enabled_p ())
13945 dump_printf_loc (MSG_NOTE
, vect_location
,
13946 "vect_is_simple_use: vectype %T\n", *vectype
);
13948 else if (*dt
== vect_uninitialized_def
13949 || *dt
== vect_constant_def
13950 || *dt
== vect_external_def
)
13951 *vectype
= NULL_TREE
;
13953 gcc_unreachable ();
13958 /* Function vect_is_simple_use.
13960 Same as vect_is_simple_use but determines the operand by operand
13961 position OPERAND from either STMT or SLP_NODE, filling in *OP
13962 and *SLP_DEF (when SLP_NODE is not NULL). */
13965 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
13966 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
13967 enum vect_def_type
*dt
,
13968 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
13972 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
13974 *vectype
= SLP_TREE_VECTYPE (child
);
13975 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
13977 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
13978 return vect_is_simple_use (*op
, vinfo
, dt
, def_stmt_info_out
);
13982 if (def_stmt_info_out
)
13983 *def_stmt_info_out
= NULL
;
13984 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
13985 *dt
= SLP_TREE_DEF_TYPE (child
);
13992 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
13994 if (gimple_assign_rhs_code (ass
) == COND_EXPR
13995 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
13998 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
14000 *op
= gimple_op (ass
, operand
);
14002 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
14003 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
14005 *op
= gimple_op (ass
, operand
+ 1);
14007 else if (gcond
*cond
= dyn_cast
<gcond
*> (stmt
->stmt
))
14008 *op
= gimple_op (cond
, operand
);
14009 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
14010 *op
= gimple_call_arg (call
, operand
);
14012 gcc_unreachable ();
14013 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
14017 /* If OP is not NULL and is external or constant update its vector
14018 type with VECTYPE. Returns true if successful or false if not,
14019 for example when conflicting vector types are present. */
14022 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
14024 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
14026 if (SLP_TREE_VECTYPE (op
))
14027 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
14028 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
14029 should be handled by patters. Allow vect_constant_def for now. */
14030 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
14031 && SLP_TREE_DEF_TYPE (op
) == vect_external_def
)
14033 SLP_TREE_VECTYPE (op
) = vectype
;
14037 /* Function supportable_widening_operation
14039 Check whether an operation represented by the code CODE is a
14040 widening operation that is supported by the target platform in
14041 vector form (i.e., when operating on arguments of type VECTYPE_IN
14042 producing a result of type VECTYPE_OUT).
14044 Widening operations we currently support are NOP (CONVERT), FLOAT,
14045 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
14046 are supported by the target platform either directly (via vector
14047 tree-codes), or via target builtins.
14050 - CODE1 and CODE2 are codes of vector operations to be used when
14051 vectorizing the operation, if available.
14052 - MULTI_STEP_CVT determines the number of required intermediate steps in
14053 case of multi-step conversion (like char->short->int - in that case
14054 MULTI_STEP_CVT will be 1).
14055 - INTERM_TYPES contains the intermediate type required to perform the
14056 widening operation (short in the above example). */
14059 supportable_widening_operation (vec_info
*vinfo
,
14061 stmt_vec_info stmt_info
,
14062 tree vectype_out
, tree vectype_in
,
14063 code_helper
*code1
,
14064 code_helper
*code2
,
14065 int *multi_step_cvt
,
14066 vec
<tree
> *interm_types
)
14068 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
14069 class loop
*vect_loop
= NULL
;
14070 machine_mode vec_mode
;
14071 enum insn_code icode1
, icode2
;
14072 optab optab1
= unknown_optab
, optab2
= unknown_optab
;
14073 tree vectype
= vectype_in
;
14074 tree wide_vectype
= vectype_out
;
14075 tree_code c1
= MAX_TREE_CODES
, c2
= MAX_TREE_CODES
;
14077 tree prev_type
, intermediate_type
;
14078 machine_mode intermediate_mode
, prev_mode
;
14079 optab optab3
, optab4
;
14081 *multi_step_cvt
= 0;
14083 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
14085 switch (code
.safe_as_tree_code ())
14087 case MAX_TREE_CODES
:
14088 /* Don't set c1 and c2 if code is not a tree_code. */
14091 case WIDEN_MULT_EXPR
:
14092 /* The result of a vectorized widening operation usually requires
14093 two vectors (because the widened results do not fit into one vector).
14094 The generated vector results would normally be expected to be
14095 generated in the same order as in the original scalar computation,
14096 i.e. if 8 results are generated in each vector iteration, they are
14097 to be organized as follows:
14098 vect1: [res1,res2,res3,res4],
14099 vect2: [res5,res6,res7,res8].
14101 However, in the special case that the result of the widening
14102 operation is used in a reduction computation only, the order doesn't
14103 matter (because when vectorizing a reduction we change the order of
14104 the computation). Some targets can take advantage of this and
14105 generate more efficient code. For example, targets like Altivec,
14106 that support widen_mult using a sequence of {mult_even,mult_odd}
14107 generate the following vectors:
14108 vect1: [res1,res3,res5,res7],
14109 vect2: [res2,res4,res6,res8].
14111 When vectorizing outer-loops, we execute the inner-loop sequentially
14112 (each vectorized inner-loop iteration contributes to VF outer-loop
14113 iterations in parallel). We therefore don't allow to change the
14114 order of the computation in the inner-loop during outer-loop
14116 /* TODO: Another case in which order doesn't *really* matter is when we
14117 widen and then contract again, e.g. (short)((int)x * y >> 8).
14118 Normally, pack_trunc performs an even/odd permute, whereas the
14119 repack from an even/odd expansion would be an interleave, which
14120 would be significantly simpler for e.g. AVX2. */
14121 /* In any case, in order to avoid duplicating the code below, recurse
14122 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
14123 are properly set up for the caller. If we fail, we'll continue with
14124 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
14126 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
14127 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
14128 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
14129 stmt_info
, vectype_out
,
14131 code2
, multi_step_cvt
,
14134 /* Elements in a vector with vect_used_by_reduction property cannot
14135 be reordered if the use chain with this property does not have the
14136 same operation. One such an example is s += a * b, where elements
14137 in a and b cannot be reordered. Here we check if the vector defined
14138 by STMT is only directly used in the reduction statement. */
14139 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
14140 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
14142 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
14145 c1
= VEC_WIDEN_MULT_LO_EXPR
;
14146 c2
= VEC_WIDEN_MULT_HI_EXPR
;
14149 case DOT_PROD_EXPR
:
14150 c1
= DOT_PROD_EXPR
;
14151 c2
= DOT_PROD_EXPR
;
14159 case VEC_WIDEN_MULT_EVEN_EXPR
:
14160 /* Support the recursion induced just above. */
14161 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
14162 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
14165 case WIDEN_LSHIFT_EXPR
:
14166 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
14167 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
14171 c1
= VEC_UNPACK_LO_EXPR
;
14172 c2
= VEC_UNPACK_HI_EXPR
;
14176 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
14177 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
14180 case FIX_TRUNC_EXPR
:
14181 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
14182 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
14186 gcc_unreachable ();
14189 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
14190 std::swap (c1
, c2
);
14192 if (code
== FIX_TRUNC_EXPR
)
14194 /* The signedness is determined from output operand. */
14195 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
14196 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
14198 else if (CONVERT_EXPR_CODE_P (code
.safe_as_tree_code ())
14199 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
14200 && VECTOR_BOOLEAN_TYPE_P (vectype
)
14201 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
14202 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
14204 /* If the input and result modes are the same, a different optab
14205 is needed where we pass in the number of units in vectype. */
14206 optab1
= vec_unpacks_sbool_lo_optab
;
14207 optab2
= vec_unpacks_sbool_hi_optab
;
14210 vec_mode
= TYPE_MODE (vectype
);
14211 if (widening_fn_p (code
))
14213 /* If this is an internal fn then we must check whether the target
14214 supports either a low-high split or an even-odd split. */
14215 internal_fn ifn
= as_internal_fn ((combined_fn
) code
);
14217 internal_fn lo
, hi
, even
, odd
;
14218 lookup_hilo_internal_fn (ifn
, &lo
, &hi
);
14219 *code1
= as_combined_fn (lo
);
14220 *code2
= as_combined_fn (hi
);
14221 optab1
= direct_internal_fn_optab (lo
, {vectype
, vectype
});
14222 optab2
= direct_internal_fn_optab (hi
, {vectype
, vectype
});
14224 /* If we don't support low-high, then check for even-odd. */
14226 || (icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
14228 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
14230 lookup_evenodd_internal_fn (ifn
, &even
, &odd
);
14231 *code1
= as_combined_fn (even
);
14232 *code2
= as_combined_fn (odd
);
14233 optab1
= direct_internal_fn_optab (even
, {vectype
, vectype
});
14234 optab2
= direct_internal_fn_optab (odd
, {vectype
, vectype
});
14237 else if (code
.is_tree_code ())
14239 if (code
== FIX_TRUNC_EXPR
)
14241 /* The signedness is determined from output operand. */
14242 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
14243 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
14245 else if (CONVERT_EXPR_CODE_P ((tree_code
) code
.safe_as_tree_code ())
14246 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
14247 && VECTOR_BOOLEAN_TYPE_P (vectype
)
14248 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
14249 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
14251 /* If the input and result modes are the same, a different optab
14252 is needed where we pass in the number of units in vectype. */
14253 optab1
= vec_unpacks_sbool_lo_optab
;
14254 optab2
= vec_unpacks_sbool_hi_optab
;
14258 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
14259 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
14265 if (!optab1
|| !optab2
)
14268 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
14269 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
14273 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
14274 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
14276 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14278 /* For scalar masks we may have different boolean
14279 vector types having the same QImode. Thus we
14280 add additional check for elements number. */
14281 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
14282 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
14286 /* Check if it's a multi-step conversion that can be done using intermediate
14289 prev_type
= vectype
;
14290 prev_mode
= vec_mode
;
14292 if (!CONVERT_EXPR_CODE_P (code
.safe_as_tree_code ()))
14295 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14296 intermediate steps in promotion sequence. We try
14297 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
14299 interm_types
->create (MAX_INTERM_CVT_STEPS
);
14300 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
14302 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
14303 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
14305 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
14306 else if (VECTOR_MODE_P (intermediate_mode
))
14308 tree intermediate_element_type
14309 = lang_hooks
.types
.type_for_mode (GET_MODE_INNER (intermediate_mode
),
14310 TYPE_UNSIGNED (prev_type
));
14312 = build_vector_type_for_mode (intermediate_element_type
,
14313 intermediate_mode
);
14317 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
14318 TYPE_UNSIGNED (prev_type
));
14320 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
14321 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
14322 && intermediate_mode
== prev_mode
14323 && SCALAR_INT_MODE_P (prev_mode
))
14325 /* If the input and result modes are the same, a different optab
14326 is needed where we pass in the number of units in vectype. */
14327 optab3
= vec_unpacks_sbool_lo_optab
;
14328 optab4
= vec_unpacks_sbool_hi_optab
;
14332 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
14333 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
14336 if (!optab3
|| !optab4
14337 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
14338 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
14339 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
14340 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
14341 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
14342 == CODE_FOR_nothing
)
14343 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
14344 == CODE_FOR_nothing
))
14347 interm_types
->quick_push (intermediate_type
);
14348 (*multi_step_cvt
)++;
14350 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
14351 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
14353 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14355 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
14356 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
14360 prev_type
= intermediate_type
;
14361 prev_mode
= intermediate_mode
;
14364 interm_types
->release ();
14369 /* Function supportable_narrowing_operation
14371 Check whether an operation represented by the code CODE is a
14372 narrowing operation that is supported by the target platform in
14373 vector form (i.e., when operating on arguments of type VECTYPE_IN
14374 and producing a result of type VECTYPE_OUT).
14376 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14377 and FLOAT. This function checks if these operations are supported by
14378 the target platform directly via vector tree-codes.
14381 - CODE1 is the code of a vector operation to be used when
14382 vectorizing the operation, if available.
14383 - MULTI_STEP_CVT determines the number of required intermediate steps in
14384 case of multi-step conversion (like int->short->char - in that case
14385 MULTI_STEP_CVT will be 1).
14386 - INTERM_TYPES contains the intermediate type required to perform the
14387 narrowing operation (short in the above example). */
14390 supportable_narrowing_operation (code_helper code
,
14391 tree vectype_out
, tree vectype_in
,
14392 code_helper
*code1
, int *multi_step_cvt
,
14393 vec
<tree
> *interm_types
)
14395 machine_mode vec_mode
;
14396 enum insn_code icode1
;
14397 optab optab1
, interm_optab
;
14398 tree vectype
= vectype_in
;
14399 tree narrow_vectype
= vectype_out
;
14401 tree intermediate_type
, prev_type
;
14402 machine_mode intermediate_mode
, prev_mode
;
14404 unsigned HOST_WIDE_INT n_elts
;
14407 if (!code
.is_tree_code ())
14410 *multi_step_cvt
= 0;
14411 switch ((tree_code
) code
)
14414 c1
= VEC_PACK_TRUNC_EXPR
;
14415 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
14416 && VECTOR_BOOLEAN_TYPE_P (vectype
)
14417 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
))
14418 && TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&n_elts
)
14419 && n_elts
< BITS_PER_UNIT
)
14420 optab1
= vec_pack_sbool_trunc_optab
;
14422 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
14425 case FIX_TRUNC_EXPR
:
14426 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
14427 /* The signedness is determined from output operand. */
14428 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
14432 c1
= VEC_PACK_FLOAT_EXPR
;
14433 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
14437 gcc_unreachable ();
14443 vec_mode
= TYPE_MODE (vectype
);
14444 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
14449 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
14451 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14453 /* For scalar masks we may have different boolean
14454 vector types having the same QImode. Thus we
14455 add additional check for elements number. */
14456 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
14457 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
14461 if (code
== FLOAT_EXPR
)
14464 /* Check if it's a multi-step conversion that can be done using intermediate
14466 prev_mode
= vec_mode
;
14467 prev_type
= vectype
;
14468 if (code
== FIX_TRUNC_EXPR
)
14469 uns
= TYPE_UNSIGNED (vectype_out
);
14471 uns
= TYPE_UNSIGNED (vectype
);
14473 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14474 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14475 costly than signed. */
14476 if (code
== FIX_TRUNC_EXPR
&& uns
)
14478 enum insn_code icode2
;
14481 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
14483 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
14484 if (interm_optab
!= unknown_optab
14485 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
14486 && insn_data
[icode1
].operand
[0].mode
14487 == insn_data
[icode2
].operand
[0].mode
)
14490 optab1
= interm_optab
;
14495 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14496 intermediate steps in promotion sequence. We try
14497 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14498 interm_types
->create (MAX_INTERM_CVT_STEPS
);
14499 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
14501 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
14502 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
14504 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
14507 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
14508 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
14509 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
14510 && SCALAR_INT_MODE_P (prev_mode
)
14511 && TYPE_VECTOR_SUBPARTS (intermediate_type
).is_constant (&n_elts
)
14512 && n_elts
< BITS_PER_UNIT
)
14513 interm_optab
= vec_pack_sbool_trunc_optab
;
14516 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
14519 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
14520 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
14521 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
14522 == CODE_FOR_nothing
))
14525 interm_types
->quick_push (intermediate_type
);
14526 (*multi_step_cvt
)++;
14528 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
14530 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14532 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
14533 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
14537 prev_mode
= intermediate_mode
;
14538 prev_type
= intermediate_type
;
14539 optab1
= interm_optab
;
14542 interm_types
->release ();
14546 /* Generate and return a vector mask of MASK_TYPE such that
14547 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14548 Add the statements to SEQ. */
14551 vect_gen_while (gimple_seq
*seq
, tree mask_type
, tree start_index
,
14552 tree end_index
, const char *name
)
14554 tree cmp_type
= TREE_TYPE (start_index
);
14555 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
14556 cmp_type
, mask_type
,
14557 OPTIMIZE_FOR_SPEED
));
14558 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
14559 start_index
, end_index
,
14560 build_zero_cst (mask_type
));
14563 tmp
= make_temp_ssa_name (mask_type
, NULL
, name
);
14565 tmp
= make_ssa_name (mask_type
);
14566 gimple_call_set_lhs (call
, tmp
);
14567 gimple_seq_add_stmt (seq
, call
);
14571 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
14572 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14575 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
14578 tree tmp
= vect_gen_while (seq
, mask_type
, start_index
, end_index
);
14579 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
14582 /* Try to compute the vector types required to vectorize STMT_INFO,
14583 returning true on success and false if vectorization isn't possible.
14584 If GROUP_SIZE is nonzero and we're performing BB vectorization,
14585 take sure that the number of elements in the vectors is no bigger
14590 - Set *STMT_VECTYPE_OUT to:
14591 - NULL_TREE if the statement doesn't need to be vectorized;
14592 - the equivalent of STMT_VINFO_VECTYPE otherwise.
14594 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14595 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14596 statement does not help to determine the overall number of units. */
14599 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
14600 tree
*stmt_vectype_out
,
14601 tree
*nunits_vectype_out
,
14602 unsigned int group_size
)
14604 gimple
*stmt
= stmt_info
->stmt
;
14606 /* For BB vectorization, we should always have a group size once we've
14607 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14608 are tentative requests during things like early data reference
14609 analysis and pattern recognition. */
14610 if (is_a
<bb_vec_info
> (vinfo
))
14611 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
14615 *stmt_vectype_out
= NULL_TREE
;
14616 *nunits_vectype_out
= NULL_TREE
;
14618 if (gimple_get_lhs (stmt
) == NULL_TREE
14619 /* Allow vector conditionals through here. */
14620 && !is_a
<gcond
*> (stmt
)
14621 /* MASK_STORE has no lhs, but is ok. */
14622 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
14624 if (is_a
<gcall
*> (stmt
))
14626 /* Ignore calls with no lhs. These must be calls to
14627 #pragma omp simd functions, and what vectorization factor
14628 it really needs can't be determined until
14629 vectorizable_simd_clone_call. */
14630 if (dump_enabled_p ())
14631 dump_printf_loc (MSG_NOTE
, vect_location
,
14632 "defer to SIMD clone analysis.\n");
14633 return opt_result::success ();
14636 return opt_result::failure_at (stmt
,
14637 "not vectorized: irregular stmt: %G", stmt
);
14641 tree scalar_type
= NULL_TREE
;
14642 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
14644 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
14645 if (dump_enabled_p ())
14646 dump_printf_loc (MSG_NOTE
, vect_location
,
14647 "precomputed vectype: %T\n", vectype
);
14649 else if (vect_use_mask_type_p (stmt_info
))
14651 unsigned int precision
= stmt_info
->mask_precision
;
14652 scalar_type
= build_nonstandard_integer_type (precision
, 1);
14653 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
14655 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
14656 " data-type %T\n", scalar_type
);
14657 if (dump_enabled_p ())
14658 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
14662 /* If we got here with a gcond it means that the target had no available vector
14663 mode for the scalar type. We can't vectorize so abort. */
14664 if (is_a
<gcond
*> (stmt
))
14665 return opt_result::failure_at (stmt
,
14667 " unsupported data-type for gcond %T\n",
14670 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
14671 scalar_type
= TREE_TYPE (DR_REF (dr
));
14672 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
14673 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
14675 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
14677 if (dump_enabled_p ())
14680 dump_printf_loc (MSG_NOTE
, vect_location
,
14681 "get vectype for scalar type (group size %d):"
14682 " %T\n", group_size
, scalar_type
);
14684 dump_printf_loc (MSG_NOTE
, vect_location
,
14685 "get vectype for scalar type: %T\n", scalar_type
);
14687 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
14689 return opt_result::failure_at (stmt
,
14691 " unsupported data-type %T\n",
14694 if (dump_enabled_p ())
14695 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
14698 if (scalar_type
&& VECTOR_MODE_P (TYPE_MODE (scalar_type
)))
14699 return opt_result::failure_at (stmt
,
14700 "not vectorized: vector stmt in loop:%G",
14703 *stmt_vectype_out
= vectype
;
14705 /* Don't try to compute scalar types if the stmt produces a boolean
14706 vector; use the existing vector type instead. */
14707 tree nunits_vectype
= vectype
;
14708 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14710 /* The number of units is set according to the smallest scalar
14711 type (or the largest vector size, but we only support one
14712 vector size per vectorization). */
14713 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
14714 TREE_TYPE (vectype
));
14715 if (scalar_type
!= TREE_TYPE (vectype
))
14717 if (dump_enabled_p ())
14718 dump_printf_loc (MSG_NOTE
, vect_location
,
14719 "get vectype for smallest scalar type: %T\n",
14721 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
14723 if (!nunits_vectype
)
14724 return opt_result::failure_at
14725 (stmt
, "not vectorized: unsupported data-type %T\n",
14727 if (dump_enabled_p ())
14728 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
14733 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
14734 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)))
14735 return opt_result::failure_at (stmt
,
14736 "Not vectorized: Incompatible number "
14737 "of vector subparts between %T and %T\n",
14738 nunits_vectype
, *stmt_vectype_out
);
14740 if (dump_enabled_p ())
14742 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
14743 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
14744 dump_printf (MSG_NOTE
, "\n");
14747 *nunits_vectype_out
= nunits_vectype
;
14748 return opt_result::success ();
14751 /* Generate and return statement sequence that sets vector length LEN that is:
14753 min_of_start_and_end = min (START_INDEX, END_INDEX);
14754 left_len = END_INDEX - min_of_start_and_end;
14755 rhs = min (left_len, LEN_LIMIT);
14758 Note: the cost of the code generated by this function is modeled
14759 by vect_estimate_min_profitable_iters, so changes here may need
14760 corresponding changes there. */
14763 vect_gen_len (tree len
, tree start_index
, tree end_index
, tree len_limit
)
14765 gimple_seq stmts
= NULL
;
14766 tree len_type
= TREE_TYPE (len
);
14767 gcc_assert (TREE_TYPE (start_index
) == len_type
);
14769 tree min
= gimple_build (&stmts
, MIN_EXPR
, len_type
, start_index
, end_index
);
14770 tree left_len
= gimple_build (&stmts
, MINUS_EXPR
, len_type
, end_index
, min
);
14771 tree rhs
= gimple_build (&stmts
, MIN_EXPR
, len_type
, left_len
, len_limit
);
14772 gimple
* stmt
= gimple_build_assign (len
, rhs
);
14773 gimple_seq_add_stmt (&stmts
, stmt
);