1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2024 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
59 #include "optabs-libfuncs.h"
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
64 /* Return the vectorized type for the given statement. */
67 stmt_vectype (class _stmt_vec_info
*stmt_info
)
69 return STMT_VINFO_VECTYPE (stmt_info
);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
75 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
77 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
78 basic_block bb
= gimple_bb (stmt
);
79 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
85 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
87 return (bb
->loop_father
== loop
->inner
);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
95 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
96 enum vect_cost_for_stmt kind
,
97 stmt_vec_info stmt_info
, slp_tree node
,
98 tree vectype
, int misalign
,
99 enum vect_cost_model_location where
)
101 if ((kind
== vector_load
|| kind
== unaligned_load
)
102 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
103 kind
= vector_gather_load
;
104 if ((kind
== vector_store
|| kind
== unaligned_store
)
105 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
106 kind
= vector_scatter_store
;
108 stmt_info_for_cost si
109 = { count
, kind
, where
, stmt_info
, node
, vectype
, misalign
};
110 body_cost_vec
->safe_push (si
);
113 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
117 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
118 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
119 tree vectype
, int misalign
,
120 enum vect_cost_model_location where
)
122 return record_stmt_cost (body_cost_vec
, count
, kind
, stmt_info
, NULL
,
123 vectype
, misalign
, where
);
127 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
128 enum vect_cost_for_stmt kind
, slp_tree node
,
129 tree vectype
, int misalign
,
130 enum vect_cost_model_location where
)
132 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, node
,
133 vectype
, misalign
, where
);
137 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
138 enum vect_cost_for_stmt kind
,
139 enum vect_cost_model_location where
)
141 gcc_assert (kind
== cond_branch_taken
|| kind
== cond_branch_not_taken
142 || kind
== scalar_stmt
);
143 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, NULL
,
144 NULL_TREE
, 0, where
);
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
150 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
152 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
162 read_vector_array (vec_info
*vinfo
,
163 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
164 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
166 tree vect_type
, vect
, vect_name
, array_ref
;
169 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
170 vect_type
= TREE_TYPE (TREE_TYPE (array
));
171 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
172 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
173 build_int_cst (size_type_node
, n
),
174 NULL_TREE
, NULL_TREE
);
176 new_stmt
= gimple_build_assign (vect
, array_ref
);
177 vect_name
= make_ssa_name (vect
, new_stmt
);
178 gimple_assign_set_lhs (new_stmt
, vect_name
);
179 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
189 write_vector_array (vec_info
*vinfo
,
190 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
191 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
196 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
197 build_int_cst (size_type_node
, n
),
198 NULL_TREE
, NULL_TREE
);
200 new_stmt
= gimple_build_assign (array_ref
, vect
);
201 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
209 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
213 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
223 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
224 gimple_stmt_iterator
*gsi
, tree var
)
226 tree clobber
= build_clobber (TREE_TYPE (var
));
227 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
228 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
233 /* Function vect_mark_relevant.
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
238 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
239 enum vect_relevant relevant
, bool live_p
)
241 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
242 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE
, vect_location
,
246 "mark relevant %d, live %d: %G", relevant
, live_p
,
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE
, vect_location
,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
265 stmt_vec_info old_stmt_info
= stmt_info
;
266 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
268 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
269 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
271 if (live_p
&& relevant
== vect_unused_in_scope
)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE
, vect_location
,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
277 relevant
= vect_used_only_live
;
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE
, vect_location
,
282 "mark relevant %d, live %d: %G", relevant
, live_p
,
286 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
287 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
288 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
290 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE
, vect_location
,
295 "already marked relevant/live.\n");
299 worklist
->safe_push (stmt_info
);
303 /* Function is_simple_and_all_uses_invariant
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
309 loop_vec_info loop_vinfo
)
314 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
318 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
320 enum vect_def_type dt
= vect_uninitialized_def
;
322 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
326 "use not simple.\n");
330 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
336 /* Function vect_stmt_relevant_p.
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
345 - it is an induction and we have multiple exits.
347 CHECKME: what other side effects would the vectorizer allow? */
350 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
351 enum vect_relevant
*relevant
, bool *live_p
)
353 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
355 imm_use_iterator imm_iter
;
359 *relevant
= vect_unused_in_scope
;
362 /* cond stmt other than loop exit cond. */
363 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
364 if (is_ctrl_stmt (stmt
)
365 && LOOP_VINFO_LOOP_IV_COND (loop_vinfo
) != stmt
366 && (!loop
->inner
|| gimple_bb (stmt
)->loop_father
== loop
))
367 *relevant
= vect_used_in_scope
;
369 /* changing memory. */
370 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
371 if (gimple_vdef (stmt_info
->stmt
)
372 && !gimple_clobber_p (stmt_info
->stmt
))
374 if (dump_enabled_p ())
375 dump_printf_loc (MSG_NOTE
, vect_location
,
376 "vec_stmt_relevant_p: stmt has vdefs.\n");
377 *relevant
= vect_used_in_scope
;
380 /* uses outside the loop. */
381 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
383 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
385 basic_block bb
= gimple_bb (USE_STMT (use_p
));
386 if (!flow_bb_inside_loop_p (loop
, bb
))
388 if (is_gimple_debug (USE_STMT (use_p
)))
391 if (dump_enabled_p ())
392 dump_printf_loc (MSG_NOTE
, vect_location
,
393 "vec_stmt_relevant_p: used out of loop.\n");
395 /* We expect all such uses to be in the loop exit phis
396 (because of loop closed form) */
397 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
404 /* Check if it's an induction and multiple exits. In this case there will be
405 a usage later on after peeling which is needed for the alternate exit. */
406 if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo
)
407 && STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
)
409 if (dump_enabled_p ())
410 dump_printf_loc (MSG_NOTE
, vect_location
,
411 "vec_stmt_relevant_p: induction forced for "
417 if (*live_p
&& *relevant
== vect_unused_in_scope
418 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
420 if (dump_enabled_p ())
421 dump_printf_loc (MSG_NOTE
, vect_location
,
422 "vec_stmt_relevant_p: stmt live but not relevant.\n");
423 *relevant
= vect_used_only_live
;
426 return (*live_p
|| *relevant
);
430 /* Function exist_non_indexing_operands_for_use_p
432 USE is one of the uses attached to STMT_INFO. Check if USE is
433 used in STMT_INFO for anything other than indexing an array. */
436 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
440 /* USE corresponds to some operand in STMT. If there is no data
441 reference in STMT, then any operand that corresponds to USE
442 is not indexing an array. */
443 if (!STMT_VINFO_DATA_REF (stmt_info
))
446 /* STMT has a data_ref. FORNOW this means that its of one of
450 (This should have been verified in analyze_data_refs).
452 'var' in the second case corresponds to a def, not a use,
453 so USE cannot correspond to any operands that are not used
456 Therefore, all we need to check is if STMT falls into the
457 first case, and whether var corresponds to USE. */
459 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
460 if (!assign
|| !gimple_assign_copy_p (assign
))
462 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
463 if (call
&& gimple_call_internal_p (call
))
465 internal_fn ifn
= gimple_call_internal_fn (call
);
466 int mask_index
= internal_fn_mask_index (ifn
);
468 && use
== gimple_call_arg (call
, mask_index
))
470 int stored_value_index
= internal_fn_stored_value_index (ifn
);
471 if (stored_value_index
>= 0
472 && use
== gimple_call_arg (call
, stored_value_index
))
474 if (internal_gather_scatter_fn_p (ifn
)
475 && use
== gimple_call_arg (call
, 1))
481 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
483 operand
= gimple_assign_rhs1 (assign
);
484 if (TREE_CODE (operand
) != SSA_NAME
)
495 Function process_use.
498 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
499 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
500 that defined USE. This is done by calling mark_relevant and passing it
501 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
502 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
506 Generally, LIVE_P and RELEVANT are used to define the liveness and
507 relevance info of the DEF_STMT of this USE:
508 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
509 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
511 - case 1: If USE is used only for address computations (e.g. array indexing),
512 which does not need to be directly vectorized, then the liveness/relevance
513 of the respective DEF_STMT is left unchanged.
514 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
515 we skip DEF_STMT cause it had already been processed.
516 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
517 "relevant" will be modified accordingly.
519 Return true if everything is as expected. Return false otherwise. */
522 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
523 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
526 stmt_vec_info dstmt_vinfo
;
527 enum vect_def_type dt
;
529 /* case 1: we are only interested in uses that need to be vectorized. Uses
530 that are used for address computation are not considered relevant. */
531 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
532 return opt_result::success ();
534 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
535 return opt_result::failure_at (stmt_vinfo
->stmt
,
537 " unsupported use in stmt.\n");
540 return opt_result::success ();
542 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
543 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
545 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
546 We have to force the stmt live since the epilogue loop needs it to
547 continue computing the reduction. */
548 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
549 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
550 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
551 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
552 && bb
->loop_father
== def_bb
->loop_father
)
554 if (dump_enabled_p ())
555 dump_printf_loc (MSG_NOTE
, vect_location
,
556 "reduc-stmt defining reduc-phi in the same nest.\n");
557 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
558 return opt_result::success ();
561 /* case 3a: outer-loop stmt defining an inner-loop stmt:
562 outer-loop-header-bb:
568 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
570 if (dump_enabled_p ())
571 dump_printf_loc (MSG_NOTE
, vect_location
,
572 "outer-loop def-stmt defining inner-loop stmt.\n");
576 case vect_unused_in_scope
:
577 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
578 vect_used_in_scope
: vect_unused_in_scope
;
581 case vect_used_in_outer_by_reduction
:
582 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
583 relevant
= vect_used_by_reduction
;
586 case vect_used_in_outer
:
587 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
588 relevant
= vect_used_in_scope
;
591 case vect_used_in_scope
:
599 /* case 3b: inner-loop stmt defining an outer-loop stmt:
600 outer-loop-header-bb:
604 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
606 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
608 if (dump_enabled_p ())
609 dump_printf_loc (MSG_NOTE
, vect_location
,
610 "inner-loop def-stmt defining outer-loop stmt.\n");
614 case vect_unused_in_scope
:
615 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
616 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
617 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
620 case vect_used_by_reduction
:
621 case vect_used_only_live
:
622 relevant
= vect_used_in_outer_by_reduction
;
625 case vect_used_in_scope
:
626 relevant
= vect_used_in_outer
;
633 /* We are also not interested in uses on loop PHI backedges that are
634 inductions. Otherwise we'll needlessly vectorize the IV increment
635 and cause hybrid SLP for SLP inductions. Unless the PHI is live
637 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
638 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
639 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
640 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
641 loop_latch_edge (bb
->loop_father
))
644 if (dump_enabled_p ())
645 dump_printf_loc (MSG_NOTE
, vect_location
,
646 "induction value on backedge.\n");
647 return opt_result::success ();
651 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
652 return opt_result::success ();
656 /* Function vect_mark_stmts_to_be_vectorized.
658 Not all stmts in the loop need to be vectorized. For example:
667 Stmt 1 and 3 do not need to be vectorized, because loop control and
668 addressing of vectorized data-refs are handled differently.
670 This pass detects such stmts. */
673 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
675 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
676 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
677 unsigned int nbbs
= loop
->num_nodes
;
678 gimple_stmt_iterator si
;
682 enum vect_relevant relevant
;
684 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
686 auto_vec
<stmt_vec_info
, 64> worklist
;
688 /* 1. Init worklist. */
689 for (i
= 0; i
< nbbs
; i
++)
692 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
694 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
695 if (dump_enabled_p ())
696 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
699 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
700 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
702 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
704 if (is_gimple_debug (gsi_stmt (si
)))
706 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
707 if (dump_enabled_p ())
708 dump_printf_loc (MSG_NOTE
, vect_location
,
709 "init: stmt relevant? %G", stmt_info
->stmt
);
711 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
712 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
716 /* 2. Process_worklist */
717 while (worklist
.length () > 0)
722 stmt_vec_info stmt_vinfo
= worklist
.pop ();
723 if (dump_enabled_p ())
724 dump_printf_loc (MSG_NOTE
, vect_location
,
725 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
727 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
728 (DEF_STMT) as relevant/irrelevant according to the relevance property
730 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
732 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
733 propagated as is to the DEF_STMTs of its USEs.
735 One exception is when STMT has been identified as defining a reduction
736 variable; in this case we set the relevance to vect_used_by_reduction.
737 This is because we distinguish between two kinds of relevant stmts -
738 those that are used by a reduction computation, and those that are
739 (also) used by a regular computation. This allows us later on to
740 identify stmts that are used solely by a reduction, and therefore the
741 order of the results that they produce does not have to be kept. */
743 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
745 case vect_reduction_def
:
746 gcc_assert (relevant
!= vect_unused_in_scope
);
747 if (relevant
!= vect_unused_in_scope
748 && relevant
!= vect_used_in_scope
749 && relevant
!= vect_used_by_reduction
750 && relevant
!= vect_used_only_live
)
751 return opt_result::failure_at
752 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
755 case vect_nested_cycle
:
756 if (relevant
!= vect_unused_in_scope
757 && relevant
!= vect_used_in_outer_by_reduction
758 && relevant
!= vect_used_in_outer
)
759 return opt_result::failure_at
760 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
763 case vect_double_reduction_def
:
764 if (relevant
!= vect_unused_in_scope
765 && relevant
!= vect_used_by_reduction
766 && relevant
!= vect_used_only_live
)
767 return opt_result::failure_at
768 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
775 if (is_pattern_stmt_p (stmt_vinfo
))
777 /* Pattern statements are not inserted into the code, so
778 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
779 have to scan the RHS or function arguments instead. */
780 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
782 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
783 tree op
= gimple_assign_rhs1 (assign
);
786 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
789 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
790 loop_vinfo
, relevant
, &worklist
, false);
793 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
794 loop_vinfo
, relevant
, &worklist
, false);
799 for (; i
< gimple_num_ops (assign
); i
++)
801 op
= gimple_op (assign
, i
);
802 if (TREE_CODE (op
) == SSA_NAME
)
805 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
812 else if (gcond
*cond
= dyn_cast
<gcond
*> (stmt_vinfo
->stmt
))
814 tree_code rhs_code
= gimple_cond_code (cond
);
815 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
817 = process_use (stmt_vinfo
, gimple_cond_lhs (cond
),
818 loop_vinfo
, relevant
, &worklist
, false);
821 res
= process_use (stmt_vinfo
, gimple_cond_rhs (cond
),
822 loop_vinfo
, relevant
, &worklist
, false);
826 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
828 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
830 tree arg
= gimple_call_arg (call
, i
);
832 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
842 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
844 tree op
= USE_FROM_PTR (use_p
);
846 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
852 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
854 gather_scatter_info gs_info
;
855 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
858 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
867 } /* while worklist */
869 return opt_result::success ();
872 /* Function vect_model_simple_cost.
874 Models cost for simple operations, i.e. those that only emit ncopies of a
875 single op. Right now, this does not account for multiple insns that could
876 be generated for the single vector op. We will handle that shortly. */
879 vect_model_simple_cost (vec_info
*,
880 stmt_vec_info stmt_info
, int ncopies
,
881 enum vect_def_type
*dt
,
884 stmt_vector_for_cost
*cost_vec
,
885 vect_cost_for_stmt kind
= vector_stmt
)
887 int inside_cost
= 0, prologue_cost
= 0;
889 gcc_assert (cost_vec
!= NULL
);
891 /* ??? Somehow we need to fix this at the callers. */
893 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
896 /* Cost the "broadcast" of a scalar operand in to a vector operand.
897 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
899 for (int i
= 0; i
< ndts
; i
++)
900 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
901 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
902 stmt_info
, 0, vect_prologue
);
904 /* Pass the inside-of-loop statements to the target-specific cost model. */
905 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
906 stmt_info
, 0, vect_body
);
908 if (dump_enabled_p ())
909 dump_printf_loc (MSG_NOTE
, vect_location
,
910 "vect_model_simple_cost: inside_cost = %d, "
911 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
915 /* Model cost for type demotion and promotion operations. PWR is
916 normally zero for single-step promotions and demotions. It will be
917 one if two-step promotion/demotion is required, and so on. NCOPIES
918 is the number of vector results (and thus number of instructions)
919 for the narrowest end of the operation chain. Each additional
920 step doubles the number of instructions required. If WIDEN_ARITH
921 is true the stmt is doing widening arithmetic. */
924 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
925 enum vect_def_type
*dt
,
926 unsigned int ncopies
, int pwr
,
927 stmt_vector_for_cost
*cost_vec
,
931 int inside_cost
= 0, prologue_cost
= 0;
933 for (i
= 0; i
< pwr
+ 1; i
++)
935 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
,
937 ? vector_stmt
: vec_promote_demote
,
938 stmt_info
, 0, vect_body
);
942 /* FORNOW: Assuming maximum 2 args per stmts. */
943 for (i
= 0; i
< 2; i
++)
944 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
945 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
946 stmt_info
, 0, vect_prologue
);
948 if (dump_enabled_p ())
949 dump_printf_loc (MSG_NOTE
, vect_location
,
950 "vect_model_promotion_demotion_cost: inside_cost = %d, "
951 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
954 /* Returns true if the current function returns DECL. */
957 cfun_returns (tree decl
)
961 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
963 greturn
*ret
= safe_dyn_cast
<greturn
*> (*gsi_last_bb (e
->src
));
966 if (gimple_return_retval (ret
) == decl
)
968 /* We often end up with an aggregate copy to the result decl,
969 handle that case as well. First skip intermediate clobbers
974 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
976 while (gimple_clobber_p (def
));
977 if (is_a
<gassign
*> (def
)
978 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
979 && gimple_assign_rhs1 (def
) == decl
)
985 /* Calculate cost of DR's memory access. */
987 vect_get_store_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
988 dr_alignment_support alignment_support_scheme
,
990 unsigned int *inside_cost
,
991 stmt_vector_for_cost
*body_cost_vec
)
993 switch (alignment_support_scheme
)
997 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
998 vector_store
, stmt_info
, 0,
1001 if (dump_enabled_p ())
1002 dump_printf_loc (MSG_NOTE
, vect_location
,
1003 "vect_model_store_cost: aligned.\n");
1007 case dr_unaligned_supported
:
1009 /* Here, we assign an additional cost for the unaligned store. */
1010 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1011 unaligned_store
, stmt_info
,
1012 misalignment
, vect_body
);
1013 if (dump_enabled_p ())
1014 dump_printf_loc (MSG_NOTE
, vect_location
,
1015 "vect_model_store_cost: unaligned supported by "
1020 case dr_unaligned_unsupported
:
1022 *inside_cost
= VECT_MAX_COST
;
1024 if (dump_enabled_p ())
1025 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1026 "vect_model_store_cost: unsupported access.\n");
1035 /* Calculate cost of DR's memory access. */
1037 vect_get_load_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1038 dr_alignment_support alignment_support_scheme
,
1040 bool add_realign_cost
, unsigned int *inside_cost
,
1041 unsigned int *prologue_cost
,
1042 stmt_vector_for_cost
*prologue_cost_vec
,
1043 stmt_vector_for_cost
*body_cost_vec
,
1044 bool record_prologue_costs
)
1046 switch (alignment_support_scheme
)
1050 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1051 stmt_info
, 0, vect_body
);
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE
, vect_location
,
1055 "vect_model_load_cost: aligned.\n");
1059 case dr_unaligned_supported
:
1061 /* Here, we assign an additional cost for the unaligned load. */
1062 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1063 unaligned_load
, stmt_info
,
1064 misalignment
, vect_body
);
1066 if (dump_enabled_p ())
1067 dump_printf_loc (MSG_NOTE
, vect_location
,
1068 "vect_model_load_cost: unaligned supported by "
1073 case dr_explicit_realign
:
1075 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1076 vector_load
, stmt_info
, 0, vect_body
);
1077 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1078 vec_perm
, stmt_info
, 0, vect_body
);
1080 /* FIXME: If the misalignment remains fixed across the iterations of
1081 the containing loop, the following cost should be added to the
1083 if (targetm
.vectorize
.builtin_mask_for_load
)
1084 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1085 stmt_info
, 0, vect_body
);
1087 if (dump_enabled_p ())
1088 dump_printf_loc (MSG_NOTE
, vect_location
,
1089 "vect_model_load_cost: explicit realign\n");
1093 case dr_explicit_realign_optimized
:
1095 if (dump_enabled_p ())
1096 dump_printf_loc (MSG_NOTE
, vect_location
,
1097 "vect_model_load_cost: unaligned software "
1100 /* Unaligned software pipeline has a load of an address, an initial
1101 load, and possibly a mask operation to "prime" the loop. However,
1102 if this is an access in a group of loads, which provide grouped
1103 access, then the above cost should only be considered for one
1104 access in the group. Inside the loop, there is a load op
1105 and a realignment op. */
1107 if (add_realign_cost
&& record_prologue_costs
)
1109 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1110 vector_stmt
, stmt_info
,
1112 if (targetm
.vectorize
.builtin_mask_for_load
)
1113 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1114 vector_stmt
, stmt_info
,
1118 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1119 stmt_info
, 0, vect_body
);
1120 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1121 stmt_info
, 0, vect_body
);
1123 if (dump_enabled_p ())
1124 dump_printf_loc (MSG_NOTE
, vect_location
,
1125 "vect_model_load_cost: explicit realign optimized"
1131 case dr_unaligned_unsupported
:
1133 *inside_cost
= VECT_MAX_COST
;
1135 if (dump_enabled_p ())
1136 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1137 "vect_model_load_cost: unsupported access.\n");
1146 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1147 the loop preheader for the vectorized stmt STMT_VINFO. */
1150 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1151 gimple_stmt_iterator
*gsi
)
1154 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1156 vinfo
->insert_on_entry (stmt_vinfo
, new_stmt
);
1158 if (dump_enabled_p ())
1159 dump_printf_loc (MSG_NOTE
, vect_location
,
1160 "created new init_stmt: %G", new_stmt
);
1163 /* Function vect_init_vector.
1165 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1166 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1167 vector type a vector with all elements equal to VAL is created first.
1168 Place the initialization at GSI if it is not NULL. Otherwise, place the
1169 initialization at the loop preheader.
1170 Return the DEF of INIT_STMT.
1171 It will be used in the vectorization of STMT_INFO. */
1174 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1175 gimple_stmt_iterator
*gsi
)
1180 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1181 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1183 gcc_assert (VECTOR_TYPE_P (type
));
1184 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1186 /* Scalar boolean value should be transformed into
1187 all zeros or all ones value before building a vector. */
1188 if (VECTOR_BOOLEAN_TYPE_P (type
))
1190 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1191 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1193 if (CONSTANT_CLASS_P (val
))
1194 val
= integer_zerop (val
) ? false_val
: true_val
;
1197 new_temp
= make_ssa_name (TREE_TYPE (type
));
1198 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1199 val
, true_val
, false_val
);
1200 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1206 gimple_seq stmts
= NULL
;
1207 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1208 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1209 TREE_TYPE (type
), val
);
1211 /* ??? Condition vectorization expects us to do
1212 promotion of invariant/external defs. */
1213 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1214 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1215 !gsi_end_p (gsi2
); )
1217 init_stmt
= gsi_stmt (gsi2
);
1218 gsi_remove (&gsi2
, false);
1219 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1223 val
= build_vector_from_val (type
, val
);
1226 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1227 init_stmt
= gimple_build_assign (new_temp
, val
);
1228 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1233 /* Function vect_get_vec_defs_for_operand.
1235 OP is an operand in STMT_VINFO. This function returns a vector of
1236 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1238 In the case that OP is an SSA_NAME which is defined in the loop, then
1239 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1241 In case OP is an invariant or constant, a new stmt that creates a vector def
1242 needs to be introduced. VECTYPE may be used to specify a required type for
1243 vector invariant. */
1246 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1248 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1251 enum vect_def_type dt
;
1253 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE
, vect_location
,
1257 "vect_get_vec_defs_for_operand: %T\n", op
);
1259 stmt_vec_info def_stmt_info
;
1260 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1261 &def_stmt_info
, &def_stmt
);
1262 gcc_assert (is_simple_use
);
1263 if (def_stmt
&& dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1266 vec_oprnds
->create (ncopies
);
1267 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1269 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1273 vector_type
= vectype
;
1274 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1275 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1276 vector_type
= truth_type_for (stmt_vectype
);
1278 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1280 gcc_assert (vector_type
);
1281 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1283 vec_oprnds
->quick_push (vop
);
1287 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1288 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1289 for (unsigned i
= 0; i
< ncopies
; ++i
)
1290 vec_oprnds
->quick_push (gimple_get_lhs
1291 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1296 /* Get vectorized definitions for OP0 and OP1. */
1299 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1301 tree op0
, tree vectype0
, vec
<tree
> *vec_oprnds0
,
1302 tree op1
, tree vectype1
, vec
<tree
> *vec_oprnds1
,
1303 tree op2
, tree vectype2
, vec
<tree
> *vec_oprnds2
,
1304 tree op3
, tree vectype3
, vec
<tree
> *vec_oprnds3
)
1309 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1311 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1313 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1315 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1320 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1321 op0
, vec_oprnds0
, vectype0
);
1323 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1324 op1
, vec_oprnds1
, vectype1
);
1326 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1327 op2
, vec_oprnds2
, vectype2
);
1329 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1330 op3
, vec_oprnds3
, vectype3
);
1335 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1337 tree op0
, vec
<tree
> *vec_oprnds0
,
1338 tree op1
, vec
<tree
> *vec_oprnds1
,
1339 tree op2
, vec
<tree
> *vec_oprnds2
,
1340 tree op3
, vec
<tree
> *vec_oprnds3
)
1342 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1343 op0
, NULL_TREE
, vec_oprnds0
,
1344 op1
, NULL_TREE
, vec_oprnds1
,
1345 op2
, NULL_TREE
, vec_oprnds2
,
1346 op3
, NULL_TREE
, vec_oprnds3
);
1349 /* Helper function called by vect_finish_replace_stmt and
1350 vect_finish_stmt_generation. Set the location of the new
1351 statement and create and return a stmt_vec_info for it. */
1354 vect_finish_stmt_generation_1 (vec_info
*,
1355 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1357 if (dump_enabled_p ())
1358 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1362 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1364 /* While EH edges will generally prevent vectorization, stmt might
1365 e.g. be in a must-not-throw region. Ensure newly created stmts
1366 that could throw are part of the same region. */
1367 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1368 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1369 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1372 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1375 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1376 which sets the same scalar result as STMT_INFO did. Create and return a
1377 stmt_vec_info for VEC_STMT. */
1380 vect_finish_replace_stmt (vec_info
*vinfo
,
1381 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1383 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1384 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1386 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1387 gsi_replace (&gsi
, vec_stmt
, true);
1389 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1392 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1393 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1396 vect_finish_stmt_generation (vec_info
*vinfo
,
1397 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1398 gimple_stmt_iterator
*gsi
)
1400 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1402 if (!gsi_end_p (*gsi
)
1403 && gimple_has_mem_ops (vec_stmt
))
1405 gimple
*at_stmt
= gsi_stmt (*gsi
);
1406 tree vuse
= gimple_vuse (at_stmt
);
1407 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1409 tree vdef
= gimple_vdef (at_stmt
);
1410 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1411 gimple_set_modified (vec_stmt
, true);
1412 /* If we have an SSA vuse and insert a store, update virtual
1413 SSA form to avoid triggering the renamer. Do so only
1414 if we can easily see all uses - which is what almost always
1415 happens with the way vectorized stmts are inserted. */
1416 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1417 && ((is_gimple_assign (vec_stmt
)
1418 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1419 || (is_gimple_call (vec_stmt
)
1420 && (!(gimple_call_flags (vec_stmt
)
1421 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
))
1422 || (gimple_call_lhs (vec_stmt
)
1423 && !is_gimple_reg (gimple_call_lhs (vec_stmt
)))))))
1425 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1426 gimple_set_vdef (vec_stmt
, new_vdef
);
1427 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1431 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1432 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1435 /* We want to vectorize a call to combined function CFN with function
1436 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1437 as the types of all inputs. Check whether this is possible using
1438 an internal function, returning its code if so or IFN_LAST if not. */
1441 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1442 tree vectype_out
, tree vectype_in
)
1445 if (internal_fn_p (cfn
))
1446 ifn
= as_internal_fn (cfn
);
1448 ifn
= associated_internal_fn (fndecl
);
1449 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1451 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1452 if (info
.vectorizable
)
1454 bool same_size_p
= TYPE_SIZE (vectype_in
) == TYPE_SIZE (vectype_out
);
1455 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1456 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1458 /* The type size of both the vectype_in and vectype_out should be
1459 exactly the same when vectype_out isn't participating the optab.
1460 While there is no restriction for type size when vectype_out
1461 is part of the optab query. */
1462 if (type0
!= vectype_out
&& type1
!= vectype_out
&& !same_size_p
)
1465 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1466 OPTIMIZE_FOR_SPEED
))
1474 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1475 gimple_stmt_iterator
*);
1477 /* Check whether a load or store statement in the loop described by
1478 LOOP_VINFO is possible in a loop using partial vectors. This is
1479 testing whether the vectorizer pass has the appropriate support,
1480 as well as whether the target does.
1482 VLS_TYPE says whether the statement is a load or store and VECTYPE
1483 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1484 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1485 says how the load or store is going to be implemented and GROUP_SIZE
1486 is the number of load or store statements in the containing group.
1487 If the access is a gather load or scatter store, GS_INFO describes
1488 its arguments. If the load or store is conditional, SCALAR_MASK is the
1489 condition under which it occurs.
1491 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1492 vectors is not supported, otherwise record the required rgroup control
1496 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo
, tree vectype
,
1498 vec_load_store_type vls_type
,
1500 vect_memory_access_type
1502 gather_scatter_info
*gs_info
,
1505 /* Invariant loads need no special support. */
1506 if (memory_access_type
== VMAT_INVARIANT
)
1509 unsigned int nvectors
;
1511 nvectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1513 nvectors
= vect_get_num_copies (loop_vinfo
, vectype
);
1515 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1516 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
1517 machine_mode vecmode
= TYPE_MODE (vectype
);
1518 bool is_load
= (vls_type
== VLS_LOAD
);
1519 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1522 = (is_load
? vect_load_lanes_supported (vectype
, group_size
, true)
1523 : vect_store_lanes_supported (vectype
, group_size
, true));
1524 if (ifn
== IFN_MASK_LEN_LOAD_LANES
|| ifn
== IFN_MASK_LEN_STORE_LANES
)
1525 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, 1);
1526 else if (ifn
== IFN_MASK_LOAD_LANES
|| ifn
== IFN_MASK_STORE_LANES
)
1527 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1531 if (dump_enabled_p ())
1532 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1533 "can't operate on partial vectors because"
1534 " the target doesn't have an appropriate"
1535 " load/store-lanes instruction.\n");
1536 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1541 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1543 internal_fn ifn
= (is_load
1544 ? IFN_MASK_GATHER_LOAD
1545 : IFN_MASK_SCATTER_STORE
);
1546 internal_fn len_ifn
= (is_load
1547 ? IFN_MASK_LEN_GATHER_LOAD
1548 : IFN_MASK_LEN_SCATTER_STORE
);
1549 if (internal_gather_scatter_fn_supported_p (len_ifn
, vectype
,
1550 gs_info
->memory_type
,
1551 gs_info
->offset_vectype
,
1553 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, 1);
1554 else if (internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1555 gs_info
->memory_type
,
1556 gs_info
->offset_vectype
,
1558 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1562 if (dump_enabled_p ())
1563 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1564 "can't operate on partial vectors because"
1565 " the target doesn't have an appropriate"
1566 " gather load or scatter store instruction.\n");
1567 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1572 if (memory_access_type
!= VMAT_CONTIGUOUS
1573 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1575 /* Element X of the data must come from iteration i * VF + X of the
1576 scalar loop. We need more work to support other mappings. */
1577 if (dump_enabled_p ())
1578 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1579 "can't operate on partial vectors because an"
1580 " access isn't contiguous.\n");
1581 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1585 if (!VECTOR_MODE_P (vecmode
))
1587 if (dump_enabled_p ())
1588 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1589 "can't operate on partial vectors when emulating"
1590 " vector operations.\n");
1591 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1595 /* We might load more scalars than we need for permuting SLP loads.
1596 We checked in get_group_load_store_type that the extra elements
1597 don't leak into a new vector. */
1598 auto group_memory_nvectors
= [](poly_uint64 size
, poly_uint64 nunits
)
1600 unsigned int nvectors
;
1601 if (can_div_away_from_zero_p (size
, nunits
, &nvectors
))
1606 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1607 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1608 machine_mode mask_mode
;
1610 bool using_partial_vectors_p
= false;
1611 if (get_len_load_store_mode (vecmode
, is_load
).exists (&vmode
))
1613 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1614 unsigned factor
= (vecmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vecmode
);
1615 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, factor
);
1616 using_partial_vectors_p
= true;
1618 else if (targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1619 && can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1621 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1622 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1623 using_partial_vectors_p
= true;
1626 if (!using_partial_vectors_p
)
1628 if (dump_enabled_p ())
1629 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1630 "can't operate on partial vectors because the"
1631 " target doesn't have the appropriate partial"
1632 " vectorization load or store.\n");
1633 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1637 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1638 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1639 that needs to be applied to all loads and stores in a vectorized loop.
1640 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1641 otherwise return VEC_MASK & LOOP_MASK.
1643 MASK_TYPE is the type of both masks. If new statements are needed,
1644 insert them before GSI. */
1647 prepare_vec_mask (loop_vec_info loop_vinfo
, tree mask_type
, tree loop_mask
,
1648 tree vec_mask
, gimple_stmt_iterator
*gsi
)
1650 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1654 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1656 if (loop_vinfo
->vec_cond_masked_set
.contains ({ vec_mask
, loop_mask
}))
1659 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1660 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1661 vec_mask
, loop_mask
);
1663 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1667 /* Determine whether we can use a gather load or scatter store to vectorize
1668 strided load or store STMT_INFO by truncating the current offset to a
1669 smaller width. We need to be able to construct an offset vector:
1671 { 0, X, X*2, X*3, ... }
1673 without loss of precision, where X is STMT_INFO's DR_STEP.
1675 Return true if this is possible, describing the gather load or scatter
1676 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1679 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1680 loop_vec_info loop_vinfo
, bool masked_p
,
1681 gather_scatter_info
*gs_info
)
1683 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1684 data_reference
*dr
= dr_info
->dr
;
1685 tree step
= DR_STEP (dr
);
1686 if (TREE_CODE (step
) != INTEGER_CST
)
1688 /* ??? Perhaps we could use range information here? */
1689 if (dump_enabled_p ())
1690 dump_printf_loc (MSG_NOTE
, vect_location
,
1691 "cannot truncate variable step.\n");
1695 /* Get the number of bits in an element. */
1696 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1697 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1698 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1700 /* Set COUNT to the upper limit on the number of elements - 1.
1701 Start with the maximum vectorization factor. */
1702 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1704 /* Try lowering COUNT to the number of scalar latch iterations. */
1705 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1706 widest_int max_iters
;
1707 if (max_loop_iterations (loop
, &max_iters
)
1708 && max_iters
< count
)
1709 count
= max_iters
.to_shwi ();
1711 /* Try scales of 1 and the element size. */
1712 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1713 wi::overflow_type overflow
= wi::OVF_NONE
;
1714 for (int i
= 0; i
< 2; ++i
)
1716 int scale
= scales
[i
];
1718 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1721 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1722 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1725 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1726 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1728 /* Find the narrowest viable offset type. */
1729 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1730 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1733 /* See whether the target supports the operation with an offset
1734 no narrower than OFFSET_TYPE. */
1735 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1736 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1737 vectype
, memory_type
, offset_type
, scale
,
1738 &gs_info
->ifn
, &gs_info
->offset_vectype
)
1739 || gs_info
->ifn
== IFN_LAST
)
1742 gs_info
->decl
= NULL_TREE
;
1743 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1744 but we don't need to store that here. */
1745 gs_info
->base
= NULL_TREE
;
1746 gs_info
->element_type
= TREE_TYPE (vectype
);
1747 gs_info
->offset
= fold_convert (offset_type
, step
);
1748 gs_info
->offset_dt
= vect_constant_def
;
1749 gs_info
->scale
= scale
;
1750 gs_info
->memory_type
= memory_type
;
1754 if (overflow
&& dump_enabled_p ())
1755 dump_printf_loc (MSG_NOTE
, vect_location
,
1756 "truncating gather/scatter offset to %d bits"
1757 " might change its value.\n", element_bits
);
1762 /* Return true if we can use gather/scatter internal functions to
1763 vectorize STMT_INFO, which is a grouped or strided load or store.
1764 MASKED_P is true if load or store is conditional. When returning
1765 true, fill in GS_INFO with the information required to perform the
1769 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1770 loop_vec_info loop_vinfo
, bool masked_p
,
1771 gather_scatter_info
*gs_info
)
1773 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1774 || gs_info
->ifn
== IFN_LAST
)
1775 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1778 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1779 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1781 gcc_assert (TYPE_PRECISION (new_offset_type
)
1782 >= TYPE_PRECISION (old_offset_type
));
1783 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1785 if (dump_enabled_p ())
1786 dump_printf_loc (MSG_NOTE
, vect_location
,
1787 "using gather/scatter for strided/grouped access,"
1788 " scale = %d\n", gs_info
->scale
);
1793 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1794 elements with a known constant step. Return -1 if that step
1795 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1798 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
1800 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1801 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
1805 /* If the target supports a permute mask that reverses the elements in
1806 a vector of type VECTYPE, return that mask, otherwise return null. */
1809 perm_mask_for_reverse (tree vectype
)
1811 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1813 /* The encoding has a single stepped pattern. */
1814 vec_perm_builder
sel (nunits
, 1, 3);
1815 for (int i
= 0; i
< 3; ++i
)
1816 sel
.quick_push (nunits
- 1 - i
);
1818 vec_perm_indices
indices (sel
, 1, nunits
);
1819 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), TYPE_MODE (vectype
),
1822 return vect_gen_perm_mask_checked (vectype
, indices
);
1825 /* A subroutine of get_load_store_type, with a subset of the same
1826 arguments. Handle the case where STMT_INFO is a load or store that
1827 accesses consecutive elements with a negative step. Sets *POFFSET
1828 to the offset to be applied to the DR for the first access. */
1830 static vect_memory_access_type
1831 get_negative_load_store_type (vec_info
*vinfo
,
1832 stmt_vec_info stmt_info
, tree vectype
,
1833 vec_load_store_type vls_type
,
1834 unsigned int ncopies
, poly_int64
*poffset
)
1836 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1837 dr_alignment_support alignment_support_scheme
;
1841 if (dump_enabled_p ())
1842 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1843 "multiple types with negative step.\n");
1844 return VMAT_ELEMENTWISE
;
1847 /* For backward running DRs the first access in vectype actually is
1848 N-1 elements before the address of the DR. */
1849 *poffset
= ((-TYPE_VECTOR_SUBPARTS (vectype
) + 1)
1850 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype
))));
1852 int misalignment
= dr_misalignment (dr_info
, vectype
, *poffset
);
1853 alignment_support_scheme
1854 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
, misalignment
);
1855 if (alignment_support_scheme
!= dr_aligned
1856 && alignment_support_scheme
!= dr_unaligned_supported
)
1858 if (dump_enabled_p ())
1859 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1860 "negative step but alignment required.\n");
1862 return VMAT_ELEMENTWISE
;
1865 if (vls_type
== VLS_STORE_INVARIANT
)
1867 if (dump_enabled_p ())
1868 dump_printf_loc (MSG_NOTE
, vect_location
,
1869 "negative step with invariant source;"
1870 " no permute needed.\n");
1871 return VMAT_CONTIGUOUS_DOWN
;
1874 if (!perm_mask_for_reverse (vectype
))
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1878 "negative step and reversing not supported.\n");
1880 return VMAT_ELEMENTWISE
;
1883 return VMAT_CONTIGUOUS_REVERSE
;
1886 /* STMT_INFO is either a masked or unconditional store. Return the value
1890 vect_get_store_rhs (stmt_vec_info stmt_info
)
1892 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1894 gcc_assert (gimple_assign_single_p (assign
));
1895 return gimple_assign_rhs1 (assign
);
1897 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1899 internal_fn ifn
= gimple_call_internal_fn (call
);
1900 int index
= internal_fn_stored_value_index (ifn
);
1901 gcc_assert (index
>= 0);
1902 return gimple_call_arg (call
, index
);
1907 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1909 This function returns a vector type which can be composed with NETLS pieces,
1910 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1911 same vector size as the return vector. It checks target whether supports
1912 pieces-size vector mode for construction firstly, if target fails to, check
1913 pieces-size scalar mode for construction further. It returns NULL_TREE if
1914 fails to find the available composition.
1916 For example, for (vtype=V16QI, nelts=4), we can probably get:
1917 - V16QI with PTYPE V4QI.
1918 - V4SI with PTYPE SI.
1922 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
1924 gcc_assert (VECTOR_TYPE_P (vtype
));
1925 gcc_assert (known_gt (nelts
, 0U));
1927 machine_mode vmode
= TYPE_MODE (vtype
);
1928 if (!VECTOR_MODE_P (vmode
))
1931 /* When we are asked to compose the vector from its components let
1932 that happen directly. */
1933 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype
), nelts
))
1935 *ptype
= TREE_TYPE (vtype
);
1939 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
1940 unsigned int pbsize
;
1941 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
1943 /* First check if vec_init optab supports construction from
1944 vector pieces directly. */
1945 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
1946 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
1948 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
1949 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
1950 != CODE_FOR_nothing
))
1952 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
1956 /* Otherwise check if exists an integer type of the same piece size and
1957 if vec_init optab supports construction from it directly. */
1958 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
1959 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
1960 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
1961 != CODE_FOR_nothing
))
1963 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
1964 return build_vector_type (*ptype
, nelts
);
1971 /* A subroutine of get_load_store_type, with a subset of the same
1972 arguments. Handle the case where STMT_INFO is part of a grouped load
1975 For stores, the statements in the group are all consecutive
1976 and there is no gap at the end. For loads, the statements in the
1977 group might not be consecutive; there can be gaps between statements
1978 as well as at the end. */
1981 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
1982 tree vectype
, slp_tree slp_node
,
1983 bool masked_p
, vec_load_store_type vls_type
,
1984 vect_memory_access_type
*memory_access_type
,
1985 poly_int64
*poffset
,
1986 dr_alignment_support
*alignment_support_scheme
,
1988 gather_scatter_info
*gs_info
,
1989 internal_fn
*lanes_ifn
)
1991 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1992 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
1993 stmt_vec_info first_stmt_info
;
1994 unsigned int group_size
;
1995 unsigned HOST_WIDE_INT gap
;
1996 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1998 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1999 group_size
= DR_GROUP_SIZE (first_stmt_info
);
2000 gap
= DR_GROUP_GAP (first_stmt_info
);
2004 first_stmt_info
= stmt_info
;
2008 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2009 bool single_element_p
= (stmt_info
== first_stmt_info
2010 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2011 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2013 /* True if the vectorized statements would access beyond the last
2014 statement in the group. */
2015 bool overrun_p
= false;
2017 /* True if we can cope with such overrun by peeling for gaps, so that
2018 there is at least one final scalar iteration after the vector loop. */
2019 bool can_overrun_p
= (!masked_p
2020 && vls_type
== VLS_LOAD
2024 /* There can only be a gap at the end of the group if the stride is
2025 known at compile time. */
2026 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2028 /* Stores can't yet have gaps. */
2029 gcc_assert (slp_node
|| vls_type
== VLS_LOAD
|| gap
== 0);
2033 /* For SLP vectorization we directly vectorize a subchain
2034 without permutation. */
2035 if (! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
2037 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node
)[0]);
2038 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2039 /* Try to use consecutive accesses of as many elements as possible,
2040 separated by the stride, until we have a complete vector.
2041 Fall back to scalar accesses if that isn't possible. */
2042 *memory_access_type
= VMAT_STRIDED_SLP
;
2045 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2048 if (single_element_p
)
2049 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2050 only correct for single element "interleaving" SLP. */
2051 *memory_access_type
= get_negative_load_store_type
2052 (vinfo
, stmt_info
, vectype
, vls_type
, 1, poffset
);
2055 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2056 separated by the stride, until we have a complete vector.
2057 Fall back to scalar accesses if that isn't possible. */
2058 if (multiple_p (nunits
, group_size
))
2059 *memory_access_type
= VMAT_STRIDED_SLP
;
2061 *memory_access_type
= VMAT_ELEMENTWISE
;
2064 else if (cmp
== 0 && loop_vinfo
)
2066 gcc_assert (vls_type
== VLS_LOAD
);
2067 *memory_access_type
= VMAT_INVARIANT
;
2068 /* Invariant accesses perform only component accesses, alignment
2069 is irrelevant for them. */
2070 *alignment_support_scheme
= dr_unaligned_supported
;
2073 *memory_access_type
= VMAT_CONTIGUOUS
;
2075 overrun_p
= loop_vinfo
&& gap
!= 0;
2076 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2078 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2079 "Grouped store with gaps requires"
2080 " non-consecutive accesses\n");
2083 /* An overrun is fine if the trailing elements are smaller
2084 than the alignment boundary B. Every vector access will
2085 be a multiple of B and so we are guaranteed to access a
2086 non-gap element in the same B-sized block. */
2088 && gap
< (vect_known_alignment_in_bytes (first_dr_info
,
2090 / vect_get_scalar_dr_size (first_dr_info
)))
2093 /* When we have a contiguous access across loop iterations
2094 but the access in the loop doesn't cover the full vector
2095 we can end up with no gap recorded but still excess
2096 elements accessed, see PR103116. Make sure we peel for
2097 gaps if necessary and sufficient and give up if not.
2099 If there is a combination of the access not covering the full
2100 vector and a gap recorded then we may need to peel twice. */
2102 && (*memory_access_type
== VMAT_CONTIGUOUS
2103 || *memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
2104 && SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
2105 && !multiple_p (group_size
* LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
2109 /* If the gap splits the vector in half and the target
2110 can do half-vector operations avoid the epilogue peeling
2111 by simply loading half of the vector only. Usually
2112 the construction with an upper zero half will be elided. */
2113 dr_alignment_support alss
;
2114 int misalign
= dr_misalignment (first_dr_info
, vectype
);
2117 unsigned HOST_WIDE_INT tem
, num
;
2120 && (((alss
= vect_supportable_dr_alignment (vinfo
, first_dr_info
,
2121 vectype
, misalign
)))
2123 || alss
== dr_unaligned_supported
)
2124 && can_div_trunc_p (group_size
2125 * LOOP_VINFO_VECT_FACTOR (loop_vinfo
) - gap
,
2126 nunits
, &tem
, &remain
)
2127 && (known_eq (remain
, 0u)
2128 || (constant_multiple_p (nunits
, remain
, &num
)
2129 && (vector_vector_composition_type (vectype
, num
,
2134 if (overrun_p
&& !can_overrun_p
)
2136 if (dump_enabled_p ())
2137 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2138 "Peeling for outer loop is not supported\n");
2141 /* Peeling for gaps assumes that a single scalar iteration
2142 is enough to make sure the last vector iteration doesn't
2143 access excess elements. */
2145 && (!can_div_trunc_p (group_size
2146 * LOOP_VINFO_VECT_FACTOR (loop_vinfo
) - gap
,
2147 nunits
, &tem
, &remain
)
2148 || maybe_lt (remain
+ group_size
, nunits
)))
2150 /* But peeling a single scalar iteration is enough if
2151 we can use the next power-of-two sized partial
2152 access and that is sufficiently small to be covered
2153 by the single scalar iteration. */
2154 unsigned HOST_WIDE_INT cnunits
, cvf
, cremain
, cpart_size
;
2155 if (!nunits
.is_constant (&cnunits
)
2156 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&cvf
)
2157 || (((cremain
= group_size
* cvf
- gap
% cnunits
), true)
2158 && ((cpart_size
= (1 << ceil_log2 (cremain
))) != cnunits
)
2159 && (cremain
+ group_size
< cpart_size
2160 || vector_vector_composition_type
2161 (vectype
, cnunits
/ cpart_size
,
2162 &half_vtype
) == NULL_TREE
)))
2164 if (dump_enabled_p ())
2165 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2166 "peeling for gaps insufficient for "
2172 /* If this is single-element interleaving with an element
2173 distance that leaves unused vector loads around punt - we
2174 at least create very sub-optimal code in that case (and
2175 blow up memory, see PR65518). */
2177 && *memory_access_type
== VMAT_CONTIGUOUS
2178 && SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
2180 && maybe_gt (group_size
, TYPE_VECTOR_SUBPARTS (vectype
)))
2182 if (dump_enabled_p ())
2183 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2184 "single-element interleaving not supported "
2185 "for not adjacent vector loads\n");
2192 /* We can always handle this case using elementwise accesses,
2193 but see if something more efficient is available. */
2194 *memory_access_type
= VMAT_ELEMENTWISE
;
2196 /* If there is a gap at the end of the group then these optimizations
2197 would access excess elements in the last iteration. */
2198 bool would_overrun_p
= (gap
!= 0);
2199 /* An overrun is fine if the trailing elements are smaller than the
2200 alignment boundary B. Every vector access will be a multiple of B
2201 and so we are guaranteed to access a non-gap element in the
2202 same B-sized block. */
2205 && gap
< (vect_known_alignment_in_bytes (first_dr_info
, vectype
)
2206 / vect_get_scalar_dr_size (first_dr_info
)))
2207 would_overrun_p
= false;
2209 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2210 && (can_overrun_p
|| !would_overrun_p
)
2211 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2213 /* First cope with the degenerate case of a single-element
2215 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2220 /* Otherwise try using LOAD/STORE_LANES. */
2222 = vls_type
== VLS_LOAD
2223 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2224 : vect_store_lanes_supported (vectype
, group_size
,
2226 if (*lanes_ifn
!= IFN_LAST
)
2228 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2229 overrun_p
= would_overrun_p
;
2232 /* If that fails, try using permuting loads. */
2233 else if (vls_type
== VLS_LOAD
2234 ? vect_grouped_load_supported (vectype
,
2237 : vect_grouped_store_supported (vectype
, group_size
))
2239 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2240 overrun_p
= would_overrun_p
;
2245 /* As a last resort, trying using a gather load or scatter store.
2247 ??? Although the code can handle all group sizes correctly,
2248 it probably isn't a win to use separate strided accesses based
2249 on nearby locations. Or, even if it's a win over scalar code,
2250 it might not be a win over vectorizing at a lower VF, if that
2251 allows us to use contiguous accesses. */
2252 if (*memory_access_type
== VMAT_ELEMENTWISE
2255 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2257 *memory_access_type
= VMAT_GATHER_SCATTER
;
2260 if (*memory_access_type
== VMAT_GATHER_SCATTER
2261 || *memory_access_type
== VMAT_ELEMENTWISE
)
2263 *alignment_support_scheme
= dr_unaligned_supported
;
2264 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2268 *misalignment
= dr_misalignment (first_dr_info
, vectype
, *poffset
);
2269 *alignment_support_scheme
2270 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, vectype
,
2274 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2276 /* STMT is the leader of the group. Check the operands of all the
2277 stmts of the group. */
2278 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2279 while (next_stmt_info
)
2281 tree op
= vect_get_store_rhs (next_stmt_info
);
2282 enum vect_def_type dt
;
2283 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2285 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2287 "use not simple.\n");
2290 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2296 gcc_assert (can_overrun_p
);
2297 if (dump_enabled_p ())
2298 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2299 "Data access with gaps requires scalar "
2301 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2307 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2308 if there is a memory access type that the vectorized form can use,
2309 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2310 or scatters, fill in GS_INFO accordingly. In addition
2311 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2312 the target does not support the alignment scheme. *MISALIGNMENT
2313 is set according to the alignment of the access (including
2314 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2316 SLP says whether we're performing SLP rather than loop vectorization.
2317 MASKED_P is true if the statement is conditional on a vectorized mask.
2318 VECTYPE is the vector type that the vectorized statements will use.
2319 NCOPIES is the number of vector statements that will be needed. */
2322 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2323 tree vectype
, slp_tree slp_node
,
2324 bool masked_p
, vec_load_store_type vls_type
,
2325 unsigned int ncopies
,
2326 vect_memory_access_type
*memory_access_type
,
2327 poly_int64
*poffset
,
2328 dr_alignment_support
*alignment_support_scheme
,
2330 gather_scatter_info
*gs_info
,
2331 internal_fn
*lanes_ifn
)
2333 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2334 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2335 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2337 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2339 *memory_access_type
= VMAT_GATHER_SCATTER
;
2340 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2342 /* When using internal functions, we rely on pattern recognition
2343 to convert the type of the offset to the type that the target
2344 requires, with the result being a call to an internal function.
2345 If that failed for some reason (e.g. because another pattern
2346 took priority), just handle cases in which the offset already
2347 has the right type. */
2348 else if (gs_info
->ifn
!= IFN_LAST
2349 && !is_gimple_call (stmt_info
->stmt
)
2350 && !tree_nop_conversion_p (TREE_TYPE (gs_info
->offset
),
2351 TREE_TYPE (gs_info
->offset_vectype
)))
2353 if (dump_enabled_p ())
2354 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2355 "%s offset requires a conversion\n",
2356 vls_type
== VLS_LOAD
? "gather" : "scatter");
2359 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2360 &gs_info
->offset_dt
,
2361 &gs_info
->offset_vectype
))
2363 if (dump_enabled_p ())
2364 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2365 "%s index use not simple.\n",
2366 vls_type
== VLS_LOAD
? "gather" : "scatter");
2369 else if (gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
2371 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant ()
2372 || !TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
).is_constant ()
2373 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2374 (gs_info
->offset_vectype
),
2375 TYPE_VECTOR_SUBPARTS (vectype
)))
2377 if (dump_enabled_p ())
2378 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2379 "unsupported vector types for emulated "
2384 /* Gather-scatter accesses perform only component accesses, alignment
2385 is irrelevant for them. */
2386 *alignment_support_scheme
= dr_unaligned_supported
;
2388 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) || slp_node
)
2390 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
,
2392 vls_type
, memory_access_type
, poffset
,
2393 alignment_support_scheme
,
2394 misalignment
, gs_info
, lanes_ifn
))
2397 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2399 gcc_assert (!slp_node
);
2401 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2403 *memory_access_type
= VMAT_GATHER_SCATTER
;
2405 *memory_access_type
= VMAT_ELEMENTWISE
;
2406 /* Alignment is irrelevant here. */
2407 *alignment_support_scheme
= dr_unaligned_supported
;
2411 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2414 gcc_assert (vls_type
== VLS_LOAD
);
2415 *memory_access_type
= VMAT_INVARIANT
;
2416 /* Invariant accesses perform only component accesses, alignment
2417 is irrelevant for them. */
2418 *alignment_support_scheme
= dr_unaligned_supported
;
2423 *memory_access_type
= get_negative_load_store_type
2424 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
, poffset
);
2426 *memory_access_type
= VMAT_CONTIGUOUS
;
2427 *misalignment
= dr_misalignment (STMT_VINFO_DR_INFO (stmt_info
),
2429 *alignment_support_scheme
2430 = vect_supportable_dr_alignment (vinfo
,
2431 STMT_VINFO_DR_INFO (stmt_info
),
2432 vectype
, *misalignment
);
2436 if ((*memory_access_type
== VMAT_ELEMENTWISE
2437 || *memory_access_type
== VMAT_STRIDED_SLP
)
2438 && !nunits
.is_constant ())
2440 if (dump_enabled_p ())
2441 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2442 "Not using elementwise accesses due to variable "
2443 "vectorization factor.\n");
2447 if (*alignment_support_scheme
== dr_unaligned_unsupported
)
2449 if (dump_enabled_p ())
2450 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2451 "unsupported unaligned access\n");
2455 /* FIXME: At the moment the cost model seems to underestimate the
2456 cost of using elementwise accesses. This check preserves the
2457 traditional behavior until that can be fixed. */
2458 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2459 if (!first_stmt_info
)
2460 first_stmt_info
= stmt_info
;
2461 if (*memory_access_type
== VMAT_ELEMENTWISE
2462 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2463 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2464 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2465 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2467 if (dump_enabled_p ())
2468 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2469 "not falling back to elementwise accesses\n");
2475 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2476 conditional operation STMT_INFO. When returning true, store the mask
2477 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2478 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2479 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2482 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2483 slp_tree slp_node
, unsigned mask_index
,
2484 tree
*mask
, slp_tree
*mask_node
,
2485 vect_def_type
*mask_dt_out
, tree
*mask_vectype_out
)
2487 enum vect_def_type mask_dt
;
2489 slp_tree mask_node_1
;
2490 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, mask_index
,
2491 mask
, &mask_node_1
, &mask_dt
, &mask_vectype
))
2493 if (dump_enabled_p ())
2494 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2495 "mask use not simple.\n");
2499 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask
)))
2501 if (dump_enabled_p ())
2502 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2503 "mask argument is not a boolean.\n");
2507 /* If the caller is not prepared for adjusting an external/constant
2508 SLP mask vector type fail. */
2511 && SLP_TREE_DEF_TYPE (mask_node_1
) != vect_internal_def
)
2513 if (dump_enabled_p ())
2514 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2515 "SLP mask argument is not vectorized.\n");
2519 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2521 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
),
2524 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2526 if (dump_enabled_p ())
2527 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2528 "could not find an appropriate vector mask type.\n");
2532 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2533 TYPE_VECTOR_SUBPARTS (vectype
)))
2535 if (dump_enabled_p ())
2536 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2537 "vector mask type %T"
2538 " does not match vector data type %T.\n",
2539 mask_vectype
, vectype
);
2544 *mask_dt_out
= mask_dt
;
2545 *mask_vectype_out
= mask_vectype
;
2547 *mask_node
= mask_node_1
;
2551 /* Return true if stored value is suitable for vectorizing store
2552 statement STMT_INFO. When returning true, store the scalar stored
2553 in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
2554 the type of the vectorized store value in
2555 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2558 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2559 slp_tree slp_node
, tree
*rhs
, slp_tree
*rhs_node
,
2560 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2561 vec_load_store_type
*vls_type_out
)
2564 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2566 if (gimple_call_internal_p (call
)
2567 && internal_store_fn_p (gimple_call_internal_fn (call
)))
2568 op_no
= internal_fn_stored_value_index (gimple_call_internal_fn (call
));
2571 op_no
= vect_slp_child_index_for_operand
2572 (stmt_info
->stmt
, op_no
, STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
2574 enum vect_def_type rhs_dt
;
2576 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, op_no
,
2577 rhs
, rhs_node
, &rhs_dt
, &rhs_vectype
))
2579 if (dump_enabled_p ())
2580 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2581 "use not simple.\n");
2585 /* In the case this is a store from a constant make sure
2586 native_encode_expr can handle it. */
2587 if (rhs_dt
== vect_constant_def
2588 && CONSTANT_CLASS_P (*rhs
) && native_encode_expr (*rhs
, NULL
, 64) == 0)
2590 if (dump_enabled_p ())
2591 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2592 "cannot encode constant as a byte sequence.\n");
2596 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2597 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2599 if (dump_enabled_p ())
2600 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2601 "incompatible vector types.\n");
2605 *rhs_dt_out
= rhs_dt
;
2606 *rhs_vectype_out
= rhs_vectype
;
2607 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2608 *vls_type_out
= VLS_STORE_INVARIANT
;
2610 *vls_type_out
= VLS_STORE
;
2614 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2615 Note that we support masks with floating-point type, in which case the
2616 floats are interpreted as a bitmask. */
2619 vect_build_all_ones_mask (vec_info
*vinfo
,
2620 stmt_vec_info stmt_info
, tree masktype
)
2622 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2623 return build_int_cst (masktype
, -1);
2624 else if (VECTOR_BOOLEAN_TYPE_P (masktype
)
2625 || TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2627 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2628 mask
= build_vector_from_val (masktype
, mask
);
2629 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2631 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2635 for (int j
= 0; j
< 6; ++j
)
2637 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2638 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2639 mask
= build_vector_from_val (masktype
, mask
);
2640 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2645 /* Build an all-zero merge value of type VECTYPE while vectorizing
2646 STMT_INFO as a gather load. */
2649 vect_build_zero_merge_argument (vec_info
*vinfo
,
2650 stmt_vec_info stmt_info
, tree vectype
)
2653 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2654 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2655 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2659 for (int j
= 0; j
< 6; ++j
)
2661 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2662 merge
= build_real (TREE_TYPE (vectype
), r
);
2666 merge
= build_vector_from_val (vectype
, merge
);
2667 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2670 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2671 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2672 the gather load operation. If the load is conditional, MASK is the
2673 vectorized condition, otherwise MASK is null. PTR is the base
2674 pointer and OFFSET is the vectorized offset. */
2677 vect_build_one_gather_load_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2678 gimple_stmt_iterator
*gsi
,
2679 gather_scatter_info
*gs_info
,
2680 tree ptr
, tree offset
, tree mask
)
2682 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2683 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2684 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2685 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2686 /* ptrtype */ arglist
= TREE_CHAIN (arglist
);
2687 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2688 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2689 tree scaletype
= TREE_VALUE (arglist
);
2691 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2693 || TREE_CODE (masktype
) == INTEGER_TYPE
2694 || types_compatible_p (srctype
, masktype
)));
2697 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2699 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2700 TYPE_VECTOR_SUBPARTS (idxtype
)));
2701 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2702 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2703 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2704 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2708 tree src_op
= NULL_TREE
;
2709 tree mask_op
= NULL_TREE
;
2712 if (!useless_type_conversion_p (masktype
, TREE_TYPE (mask
)))
2714 tree utype
, optype
= TREE_TYPE (mask
);
2715 if (VECTOR_TYPE_P (masktype
)
2716 || TYPE_MODE (masktype
) == TYPE_MODE (optype
))
2719 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2720 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2721 tree mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask
);
2723 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2724 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2726 if (!useless_type_conversion_p (masktype
, utype
))
2728 gcc_assert (TYPE_PRECISION (utype
)
2729 <= TYPE_PRECISION (masktype
));
2730 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
2731 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2732 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2735 src_op
= build_zero_cst (srctype
);
2746 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2747 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2750 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2751 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2754 if (!useless_type_conversion_p (vectype
, rettype
))
2756 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2757 TYPE_VECTOR_SUBPARTS (rettype
)));
2758 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2759 gimple_call_set_lhs (new_stmt
, op
);
2760 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2761 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2762 new_stmt
= gimple_build_assign (NULL_TREE
, VIEW_CONVERT_EXPR
, op
);
2768 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2769 instructions before GSI. GS_INFO describes the scatter store operation.
2770 PTR is the base pointer, OFFSET the vectorized offsets and OPRND the
2771 vectorized data to store.
2772 If the store is conditional, MASK is the vectorized condition, otherwise
2776 vect_build_one_scatter_store_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2777 gimple_stmt_iterator
*gsi
,
2778 gather_scatter_info
*gs_info
,
2779 tree ptr
, tree offset
, tree oprnd
, tree mask
)
2781 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2782 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2783 /* tree ptrtype = TREE_VALUE (arglist); */ arglist
= TREE_CHAIN (arglist
);
2784 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2785 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2786 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2787 tree scaletype
= TREE_VALUE (arglist
);
2788 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
2789 && TREE_CODE (rettype
) == VOID_TYPE
);
2791 tree mask_arg
= NULL_TREE
;
2795 tree optype
= TREE_TYPE (mask_arg
);
2797 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
2800 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2801 tree var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2802 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
2804 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2805 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2807 if (!useless_type_conversion_p (masktype
, utype
))
2809 gcc_assert (TYPE_PRECISION (utype
) <= TYPE_PRECISION (masktype
));
2810 tree var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
2811 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2812 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2818 mask_arg
= build_int_cst (masktype
, -1);
2819 mask_arg
= vect_init_vector (vinfo
, stmt_info
, mask_arg
, masktype
, NULL
);
2823 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
2825 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
2826 TYPE_VECTOR_SUBPARTS (srctype
)));
2827 tree var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
2828 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
2829 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
2830 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2835 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2837 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2838 TYPE_VECTOR_SUBPARTS (idxtype
)));
2839 tree var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2840 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2841 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2842 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2846 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2848 = gimple_build_call (gs_info
->decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
2852 /* Prepare the base and offset in GS_INFO for vectorization.
2853 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2854 to the vectorized offset argument for the first copy of STMT_INFO.
2855 STMT_INFO is the statement described by GS_INFO and LOOP is the
2859 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo
,
2860 class loop
*loop
, stmt_vec_info stmt_info
,
2861 slp_tree slp_node
, gather_scatter_info
*gs_info
,
2862 tree
*dataref_ptr
, vec
<tree
> *vec_offset
)
2864 gimple_seq stmts
= NULL
;
2865 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2869 edge pe
= loop_preheader_edge (loop
);
2870 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2871 gcc_assert (!new_bb
);
2874 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_offset
);
2878 = vect_get_num_copies (loop_vinfo
, gs_info
->offset_vectype
);
2879 vect_get_vec_defs_for_operand (loop_vinfo
, stmt_info
, ncopies
,
2880 gs_info
->offset
, vec_offset
,
2881 gs_info
->offset_vectype
);
2885 /* Prepare to implement a grouped or strided load or store using
2886 the gather load or scatter store operation described by GS_INFO.
2887 STMT_INFO is the load or store statement.
2889 Set *DATAREF_BUMP to the amount that should be added to the base
2890 address after each copy of the vectorized statement. Set *VEC_OFFSET
2891 to an invariant offset vector in which element I has the value
2892 I * DR_STEP / SCALE. */
2895 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2896 loop_vec_info loop_vinfo
,
2897 gimple_stmt_iterator
*gsi
,
2898 gather_scatter_info
*gs_info
,
2899 tree
*dataref_bump
, tree
*vec_offset
,
2900 vec_loop_lens
*loop_lens
)
2902 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2903 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2905 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
))
2907 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
2908 ivtmp_8 = _31 * 16 (step in bytes);
2909 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2910 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
2912 = vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
, 1, vectype
, 0, 0);
2914 = fold_build2 (MULT_EXPR
, sizetype
,
2915 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
2917 *dataref_bump
= force_gimple_operand_gsi (gsi
, tmp
, true, NULL_TREE
, true,
2923 = size_binop (MULT_EXPR
,
2924 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
2925 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2926 *dataref_bump
= cse_and_gimplify_to_preheader (loop_vinfo
, bump
);
2929 /* The offset given in GS_INFO can have pointer type, so use the element
2930 type of the vector instead. */
2931 tree offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
2933 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2934 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
2935 ssize_int (gs_info
->scale
));
2936 step
= fold_convert (offset_type
, step
);
2938 /* Create {0, X, X*2, X*3, ...}. */
2939 tree offset
= fold_build2 (VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
2940 build_zero_cst (offset_type
), step
);
2941 *vec_offset
= cse_and_gimplify_to_preheader (loop_vinfo
, offset
);
2944 /* Prepare the pointer IVs which needs to be updated by a variable amount.
2945 Such variable amount is the outcome of .SELECT_VL. In this case, we can
2946 allow each iteration process the flexible number of elements as long as
2947 the number <= vf elments.
2949 Return data reference according to SELECT_VL.
2950 If new statements are needed, insert them before GSI. */
2953 vect_get_loop_variant_data_ptr_increment (
2954 vec_info
*vinfo
, tree aggr_type
, gimple_stmt_iterator
*gsi
,
2955 vec_loop_lens
*loop_lens
, dr_vec_info
*dr_info
,
2956 vect_memory_access_type memory_access_type
)
2958 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2959 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
2961 /* gather/scatter never reach here. */
2962 gcc_assert (memory_access_type
!= VMAT_GATHER_SCATTER
);
2964 /* When we support SELECT_VL pattern, we dynamic adjust
2965 the memory address by .SELECT_VL result.
2967 The result of .SELECT_VL is the number of elements to
2968 be processed of each iteration. So the memory address
2969 adjustment operation should be:
2971 addr = addr + .SELECT_VL (ARG..) * step;
2974 = vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
, 1, aggr_type
, 0, 0);
2975 tree len_type
= TREE_TYPE (loop_len
);
2976 /* Since the outcome of .SELECT_VL is element size, we should adjust
2977 it into bytesize so that it can be used in address pointer variable
2978 amount IVs adjustment. */
2979 tree tmp
= fold_build2 (MULT_EXPR
, len_type
, loop_len
,
2980 wide_int_to_tree (len_type
, wi::to_widest (step
)));
2981 tree bump
= make_temp_ssa_name (len_type
, NULL
, "ivtmp");
2982 gassign
*assign
= gimple_build_assign (bump
, tmp
);
2983 gsi_insert_before (gsi
, assign
, GSI_SAME_STMT
);
2987 /* Return the amount that should be added to a vector pointer to move
2988 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2989 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2993 vect_get_data_ptr_increment (vec_info
*vinfo
, gimple_stmt_iterator
*gsi
,
2994 dr_vec_info
*dr_info
, tree aggr_type
,
2995 vect_memory_access_type memory_access_type
,
2996 vec_loop_lens
*loop_lens
= nullptr)
2998 if (memory_access_type
== VMAT_INVARIANT
)
2999 return size_zero_node
;
3001 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3002 if (loop_vinfo
&& LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
))
3003 return vect_get_loop_variant_data_ptr_increment (vinfo
, aggr_type
, gsi
,
3005 memory_access_type
);
3007 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3008 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3009 if (tree_int_cst_sgn (step
) == -1)
3010 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3014 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3017 vectorizable_bswap (vec_info
*vinfo
,
3018 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3019 gimple
**vec_stmt
, slp_tree slp_node
,
3021 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3024 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3025 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3028 op
= gimple_call_arg (stmt
, 0);
3029 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3030 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3032 /* Multiple types in SLP are handled by creating the appropriate number of
3033 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3038 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3040 gcc_assert (ncopies
>= 1);
3042 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype
))
3044 if (dump_enabled_p ())
3045 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3046 "mismatched vector sizes %T and %T\n",
3047 vectype_in
, vectype
);
3051 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3055 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3056 unsigned word_bytes
;
3057 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3060 /* The encoding uses one stepped pattern for each byte in the word. */
3061 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3062 for (unsigned i
= 0; i
< 3; ++i
)
3063 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3064 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3066 vec_perm_indices
indices (elts
, 1, num_bytes
);
3067 machine_mode vmode
= TYPE_MODE (char_vectype
);
3068 if (!can_vec_perm_const_p (vmode
, vmode
, indices
))
3074 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3076 if (dump_enabled_p ())
3077 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3078 "incompatible vector types for invariants\n");
3082 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3083 DUMP_VECT_SCOPE ("vectorizable_bswap");
3084 record_stmt_cost (cost_vec
,
3085 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3086 record_stmt_cost (cost_vec
,
3088 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
,
3089 vec_perm
, stmt_info
, 0, vect_body
);
3093 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3096 vec
<tree
> vec_oprnds
= vNULL
;
3097 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
3099 /* Arguments are ready. create the new vector stmt. */
3102 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3105 tree tem
= make_ssa_name (char_vectype
);
3106 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3107 char_vectype
, vop
));
3108 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3109 tree tem2
= make_ssa_name (char_vectype
);
3110 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3111 tem
, tem
, bswap_vconst
);
3112 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3113 tem
= make_ssa_name (vectype
);
3114 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3116 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3118 slp_node
->push_vec_def (new_stmt
);
3120 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3124 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3126 vec_oprnds
.release ();
3130 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3131 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3132 in a single step. On success, store the binary pack code in
3136 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3137 code_helper
*convert_code
)
3139 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3140 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3144 int multi_step_cvt
= 0;
3145 auto_vec
<tree
, 8> interm_types
;
3146 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3147 &code
, &multi_step_cvt
, &interm_types
)
3151 *convert_code
= code
;
3155 /* Function vectorizable_call.
3157 Check if STMT_INFO performs a function call that can be vectorized.
3158 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3159 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3160 Return true if STMT_INFO is vectorizable in this way. */
3163 vectorizable_call (vec_info
*vinfo
,
3164 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3165 gimple
**vec_stmt
, slp_tree slp_node
,
3166 stmt_vector_for_cost
*cost_vec
)
3172 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3173 tree vectype_out
, vectype_in
;
3174 poly_uint64 nunits_in
;
3175 poly_uint64 nunits_out
;
3176 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3177 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3178 tree fndecl
, new_temp
, rhs_type
;
3179 enum vect_def_type dt
[4]
3180 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3181 vect_unknown_def_type
};
3182 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3183 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3184 int ndts
= ARRAY_SIZE (dt
);
3186 auto_vec
<tree
, 8> vargs
;
3187 enum { NARROW
, NONE
, WIDEN
} modifier
;
3190 tree clz_ctz_arg1
= NULL_TREE
;
3192 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3195 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3199 /* Is STMT_INFO a vectorizable call? */
3200 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3204 if (gimple_call_internal_p (stmt
)
3205 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3206 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3207 /* Handled by vectorizable_load and vectorizable_store. */
3210 if (gimple_call_lhs (stmt
) == NULL_TREE
3211 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3214 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3216 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3218 /* Process function arguments. */
3219 rhs_type
= NULL_TREE
;
3220 vectype_in
= NULL_TREE
;
3221 nargs
= gimple_call_num_args (stmt
);
3223 /* Bail out if the function has more than four arguments, we do not have
3224 interesting builtin functions to vectorize with more than two arguments
3225 except for fma. No arguments is also not good. */
3226 if (nargs
== 0 || nargs
> 4)
3229 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3230 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3231 if (cfn
== CFN_GOMP_SIMD_LANE
)
3234 rhs_type
= unsigned_type_node
;
3236 /* Similarly pretend IFN_CLZ and IFN_CTZ only has one argument, the second
3237 argument just says whether it is well-defined at zero or not and what
3238 value should be returned for it. */
3239 if ((cfn
== CFN_CLZ
|| cfn
== CFN_CTZ
) && nargs
== 2)
3242 clz_ctz_arg1
= gimple_call_arg (stmt
, 1);
3246 if (internal_fn_p (cfn
))
3247 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3249 for (i
= 0; i
< nargs
; i
++)
3251 if ((int) i
== mask_opno
)
3253 if (!vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_opno
,
3254 &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3259 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3260 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3262 if (dump_enabled_p ())
3263 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3264 "use not simple.\n");
3268 /* We can only handle calls with arguments of the same type. */
3270 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3272 if (dump_enabled_p ())
3273 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3274 "argument types differ.\n");
3278 rhs_type
= TREE_TYPE (op
);
3281 vectype_in
= vectypes
[i
];
3282 else if (vectypes
[i
]
3283 && !types_compatible_p (vectypes
[i
], vectype_in
))
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3287 "argument vector types differ.\n");
3291 /* If all arguments are external or constant defs, infer the vector type
3292 from the scalar type. */
3294 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3296 gcc_assert (vectype_in
);
3299 if (dump_enabled_p ())
3300 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3301 "no vectype for scalar type %T\n", rhs_type
);
3306 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3307 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3309 if (dump_enabled_p ())
3310 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3311 "mixed mask and nonmask vector types\n");
3315 if (vect_emulated_vector_p (vectype_in
) || vect_emulated_vector_p (vectype_out
))
3317 if (dump_enabled_p ())
3318 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3319 "use emulated vector type for call\n");
3324 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3325 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3326 if (known_eq (nunits_in
* 2, nunits_out
))
3328 else if (known_eq (nunits_out
, nunits_in
))
3330 else if (known_eq (nunits_out
* 2, nunits_in
))
3335 /* We only handle functions that do not read or clobber memory. */
3336 if (gimple_vuse (stmt
))
3338 if (dump_enabled_p ())
3339 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3340 "function reads from or writes to memory.\n");
3344 /* For now, we only vectorize functions if a target specific builtin
3345 is available. TODO -- in some cases, it might be profitable to
3346 insert the calls for pieces of the vector, in order to be able
3347 to vectorize other operations in the loop. */
3349 internal_fn ifn
= IFN_LAST
;
3350 tree callee
= gimple_call_fndecl (stmt
);
3352 /* First try using an internal function. */
3353 code_helper convert_code
= MAX_TREE_CODES
;
3355 && (modifier
== NONE
3356 || (modifier
== NARROW
3357 && simple_integer_narrowing (vectype_out
, vectype_in
,
3359 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3362 /* If that fails, try asking for a target-specific built-in function. */
3363 if (ifn
== IFN_LAST
)
3365 if (cfn
!= CFN_LAST
)
3366 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3367 (cfn
, vectype_out
, vectype_in
);
3368 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3369 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3370 (callee
, vectype_out
, vectype_in
);
3373 if (ifn
== IFN_LAST
&& !fndecl
)
3375 if (cfn
== CFN_GOMP_SIMD_LANE
3378 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3379 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3380 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3381 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3383 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3384 { 0, 1, 2, ... vf - 1 } vector. */
3385 gcc_assert (nargs
== 0);
3387 else if (modifier
== NONE
3388 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3389 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3390 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3391 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3392 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3393 slp_op
, vectype_in
, cost_vec
);
3396 if (dump_enabled_p ())
3397 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3398 "function is not vectorizable.\n");
3405 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3406 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3408 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3410 /* Sanity check: make sure that at least one copy of the vectorized stmt
3411 needs to be generated. */
3412 gcc_assert (ncopies
>= 1);
3414 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
3415 internal_fn cond_fn
= get_conditional_internal_fn (ifn
);
3416 internal_fn cond_len_fn
= get_len_internal_fn (ifn
);
3417 int len_opno
= internal_fn_len_index (cond_len_fn
);
3418 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3419 vec_loop_lens
*lens
= (loop_vinfo
? &LOOP_VINFO_LENS (loop_vinfo
) : NULL
);
3420 if (!vec_stmt
) /* transformation not required. */
3423 for (i
= 0; i
< nargs
; ++i
)
3424 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
],
3426 ? vectypes
[i
] : vectype_in
))
3428 if (dump_enabled_p ())
3429 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3430 "incompatible vector types for invariants\n");
3433 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3434 DUMP_VECT_SCOPE ("vectorizable_call");
3435 vect_model_simple_cost (vinfo
, stmt_info
,
3436 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3437 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3438 record_stmt_cost (cost_vec
, ncopies
/ 2,
3439 vec_promote_demote
, stmt_info
, 0, vect_body
);
3442 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
3443 && (reduc_idx
>= 0 || mask_opno
>= 0))
3446 && (cond_fn
== IFN_LAST
3447 || !direct_internal_fn_supported_p (cond_fn
, vectype_out
,
3448 OPTIMIZE_FOR_SPEED
))
3449 && (cond_len_fn
== IFN_LAST
3450 || !direct_internal_fn_supported_p (cond_len_fn
, vectype_out
,
3451 OPTIMIZE_FOR_SPEED
)))
3453 if (dump_enabled_p ())
3454 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3455 "can't use a fully-masked loop because no"
3456 " conditional operation is available.\n");
3457 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
3461 unsigned int nvectors
3463 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3465 tree scalar_mask
= NULL_TREE
;
3467 scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3468 if (cond_len_fn
!= IFN_LAST
3469 && direct_internal_fn_supported_p (cond_len_fn
, vectype_out
,
3470 OPTIMIZE_FOR_SPEED
))
3471 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype_out
,
3474 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype_out
,
3483 if (dump_enabled_p ())
3484 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3487 scalar_dest
= gimple_call_lhs (stmt
);
3488 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3490 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3491 bool len_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
);
3492 unsigned int vect_nargs
= nargs
;
3498 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3501 else if (reduc_idx
>= 0)
3504 else if (masked_loop_p
&& reduc_idx
>= 0)
3512 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3514 tree prev_res
= NULL_TREE
;
3515 vargs
.safe_grow (vect_nargs
, true);
3516 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3517 for (j
= 0; j
< ncopies
; ++j
)
3519 /* Build argument list for the vectorized call. */
3522 vec
<tree
> vec_oprnds0
;
3524 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3525 vec_oprnds0
= vec_defs
[0];
3527 /* Arguments are ready. Create the new vector stmt. */
3528 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3531 if (masked_loop_p
&& reduc_idx
>= 0)
3533 unsigned int vec_num
= vec_oprnds0
.length ();
3534 /* Always true for SLP. */
3535 gcc_assert (ncopies
== 1);
3536 vargs
[varg
++] = vect_get_loop_mask (loop_vinfo
,
3537 gsi
, masks
, vec_num
,
3541 for (k
= 0; k
< nargs
; k
++)
3543 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3544 vargs
[varg
++] = vec_oprndsk
[i
];
3546 if (masked_loop_p
&& reduc_idx
>= 0)
3547 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3549 vargs
[varg
++] = clz_ctz_arg1
;
3552 if (modifier
== NARROW
)
3554 /* We don't define any narrowing conditional functions
3556 gcc_assert (mask_opno
< 0);
3557 tree half_res
= make_ssa_name (vectype_in
);
3559 = gimple_build_call_internal_vec (ifn
, vargs
);
3560 gimple_call_set_lhs (call
, half_res
);
3561 gimple_call_set_nothrow (call
, true);
3562 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3565 prev_res
= half_res
;
3568 new_temp
= make_ssa_name (vec_dest
);
3569 new_stmt
= vect_gimple_build (new_temp
, convert_code
,
3570 prev_res
, half_res
);
3571 vect_finish_stmt_generation (vinfo
, stmt_info
,
3576 if (len_opno
>= 0 && len_loop_p
)
3578 unsigned int vec_num
= vec_oprnds0
.length ();
3579 /* Always true for SLP. */
3580 gcc_assert (ncopies
== 1);
3582 = vect_get_loop_len (loop_vinfo
, gsi
, lens
, vec_num
,
3585 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
3586 tree bias
= build_int_cst (intQI_type_node
, biasval
);
3587 vargs
[len_opno
] = len
;
3588 vargs
[len_opno
+ 1] = bias
;
3590 else if (mask_opno
>= 0 && masked_loop_p
)
3592 unsigned int vec_num
= vec_oprnds0
.length ();
3593 /* Always true for SLP. */
3594 gcc_assert (ncopies
== 1);
3595 tree mask
= vect_get_loop_mask (loop_vinfo
,
3596 gsi
, masks
, vec_num
,
3598 vargs
[mask_opno
] = prepare_vec_mask
3599 (loop_vinfo
, TREE_TYPE (mask
), mask
,
3600 vargs
[mask_opno
], gsi
);
3604 if (ifn
!= IFN_LAST
)
3605 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3607 call
= gimple_build_call_vec (fndecl
, vargs
);
3608 new_temp
= make_ssa_name (vec_dest
, call
);
3609 gimple_call_set_lhs (call
, new_temp
);
3610 gimple_call_set_nothrow (call
, true);
3611 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3614 slp_node
->push_vec_def (new_stmt
);
3620 if (masked_loop_p
&& reduc_idx
>= 0)
3621 vargs
[varg
++] = vect_get_loop_mask (loop_vinfo
, gsi
, masks
, ncopies
,
3623 for (i
= 0; i
< nargs
; i
++)
3625 op
= gimple_call_arg (stmt
, i
);
3628 vec_defs
.quick_push (vNULL
);
3629 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3633 vargs
[varg
++] = vec_defs
[i
][j
];
3635 if (masked_loop_p
&& reduc_idx
>= 0)
3636 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3638 vargs
[varg
++] = clz_ctz_arg1
;
3640 if (len_opno
>= 0 && len_loop_p
)
3642 tree len
= vect_get_loop_len (loop_vinfo
, gsi
, lens
, ncopies
,
3645 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
3646 tree bias
= build_int_cst (intQI_type_node
, biasval
);
3647 vargs
[len_opno
] = len
;
3648 vargs
[len_opno
+ 1] = bias
;
3650 else if (mask_opno
>= 0 && masked_loop_p
)
3652 tree mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
, ncopies
,
3655 = prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
3656 vargs
[mask_opno
], gsi
);
3660 if (cfn
== CFN_GOMP_SIMD_LANE
)
3662 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3664 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3665 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3666 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3667 new_temp
= make_ssa_name (vec_dest
);
3668 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3669 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3671 else if (modifier
== NARROW
)
3673 /* We don't define any narrowing conditional functions at
3675 gcc_assert (mask_opno
< 0);
3676 tree half_res
= make_ssa_name (vectype_in
);
3677 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3678 gimple_call_set_lhs (call
, half_res
);
3679 gimple_call_set_nothrow (call
, true);
3680 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3683 prev_res
= half_res
;
3686 new_temp
= make_ssa_name (vec_dest
);
3687 new_stmt
= vect_gimple_build (new_temp
, convert_code
, prev_res
,
3689 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3694 if (ifn
!= IFN_LAST
)
3695 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3697 call
= gimple_build_call_vec (fndecl
, vargs
);
3698 new_temp
= make_ssa_name (vec_dest
, call
);
3699 gimple_call_set_lhs (call
, new_temp
);
3700 gimple_call_set_nothrow (call
, true);
3701 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3705 if (j
== (modifier
== NARROW
? 1 : 0))
3706 *vec_stmt
= new_stmt
;
3707 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3709 for (i
= 0; i
< nargs
; i
++)
3711 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3712 vec_oprndsi
.release ();
3715 else if (modifier
== NARROW
)
3717 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3718 /* We don't define any narrowing conditional functions at present. */
3719 gcc_assert (mask_opno
< 0);
3720 for (j
= 0; j
< ncopies
; ++j
)
3722 /* Build argument list for the vectorized call. */
3724 vargs
.create (nargs
* 2);
3730 vec
<tree
> vec_oprnds0
;
3732 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3733 vec_oprnds0
= vec_defs
[0];
3735 /* Arguments are ready. Create the new vector stmt. */
3736 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3740 for (k
= 0; k
< nargs
; k
++)
3742 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3743 vargs
.quick_push (vec_oprndsk
[i
]);
3744 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3747 if (ifn
!= IFN_LAST
)
3748 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3750 call
= gimple_build_call_vec (fndecl
, vargs
);
3751 new_temp
= make_ssa_name (vec_dest
, call
);
3752 gimple_call_set_lhs (call
, new_temp
);
3753 gimple_call_set_nothrow (call
, true);
3754 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3755 slp_node
->push_vec_def (call
);
3760 for (i
= 0; i
< nargs
; i
++)
3762 op
= gimple_call_arg (stmt
, i
);
3765 vec_defs
.quick_push (vNULL
);
3766 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
3767 op
, &vec_defs
[i
], vectypes
[i
]);
3769 vec_oprnd0
= vec_defs
[i
][2*j
];
3770 vec_oprnd1
= vec_defs
[i
][2*j
+1];
3772 vargs
.quick_push (vec_oprnd0
);
3773 vargs
.quick_push (vec_oprnd1
);
3776 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3777 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3778 gimple_call_set_lhs (new_stmt
, new_temp
);
3779 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3781 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3785 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3787 for (i
= 0; i
< nargs
; i
++)
3789 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3790 vec_oprndsi
.release ();
3794 /* No current target implements this case. */
3799 /* The call in STMT might prevent it from being removed in dce.
3800 We however cannot remove it here, due to the way the ssa name
3801 it defines is mapped to the new definition. So just replace
3802 rhs of the statement with something harmless. */
3807 stmt_info
= vect_orig_stmt (stmt_info
);
3808 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3811 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3812 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3818 struct simd_call_arg_info
3822 HOST_WIDE_INT linear_step
;
3823 enum vect_def_type dt
;
3825 bool simd_lane_linear
;
3828 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3829 is linear within simd lane (but not within whole loop), note it in
3833 vect_simd_lane_linear (tree op
, class loop
*loop
,
3834 struct simd_call_arg_info
*arginfo
)
3836 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3838 if (!is_gimple_assign (def_stmt
)
3839 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3840 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3843 tree base
= gimple_assign_rhs1 (def_stmt
);
3844 HOST_WIDE_INT linear_step
= 0;
3845 tree v
= gimple_assign_rhs2 (def_stmt
);
3846 while (TREE_CODE (v
) == SSA_NAME
)
3849 def_stmt
= SSA_NAME_DEF_STMT (v
);
3850 if (is_gimple_assign (def_stmt
))
3851 switch (gimple_assign_rhs_code (def_stmt
))
3854 t
= gimple_assign_rhs2 (def_stmt
);
3855 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3857 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3858 v
= gimple_assign_rhs1 (def_stmt
);
3861 t
= gimple_assign_rhs2 (def_stmt
);
3862 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3864 linear_step
= tree_to_shwi (t
);
3865 v
= gimple_assign_rhs1 (def_stmt
);
3868 t
= gimple_assign_rhs1 (def_stmt
);
3869 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3870 || (TYPE_PRECISION (TREE_TYPE (v
))
3871 < TYPE_PRECISION (TREE_TYPE (t
))))
3880 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3882 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3883 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3888 arginfo
->linear_step
= linear_step
;
3890 arginfo
->simd_lane_linear
= true;
3896 /* Function vectorizable_simd_clone_call.
3898 Check if STMT_INFO performs a function call that can be vectorized
3899 by calling a simd clone of the function.
3900 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3901 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3902 Return true if STMT_INFO is vectorizable in this way. */
3905 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3906 gimple_stmt_iterator
*gsi
,
3907 gimple
**vec_stmt
, slp_tree slp_node
,
3908 stmt_vector_for_cost
*)
3913 tree vec_oprnd0
= NULL_TREE
;
3916 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3917 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3918 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3919 tree fndecl
, new_temp
;
3921 auto_vec
<simd_call_arg_info
> arginfo
;
3922 vec
<tree
> vargs
= vNULL
;
3924 tree lhs
, rtype
, ratype
;
3925 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3926 int masked_call_offset
= 0;
3928 /* Is STMT a vectorizable call? */
3929 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3933 fndecl
= gimple_call_fndecl (stmt
);
3934 if (fndecl
== NULL_TREE
3935 && gimple_call_internal_p (stmt
, IFN_MASK_CALL
))
3937 fndecl
= gimple_call_arg (stmt
, 0);
3938 gcc_checking_assert (TREE_CODE (fndecl
) == ADDR_EXPR
);
3939 fndecl
= TREE_OPERAND (fndecl
, 0);
3940 gcc_checking_assert (TREE_CODE (fndecl
) == FUNCTION_DECL
);
3941 masked_call_offset
= 1;
3943 if (fndecl
== NULL_TREE
)
3946 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3947 if (node
== NULL
|| node
->simd_clones
== NULL
)
3950 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3953 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3957 if (gimple_call_lhs (stmt
)
3958 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3961 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3963 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3965 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3968 /* Process function arguments. */
3969 nargs
= gimple_call_num_args (stmt
) - masked_call_offset
;
3971 /* Bail out if the function has zero arguments. */
3975 vec
<tree
>& simd_clone_info
= (slp_node
? SLP_TREE_SIMD_CLONE_INFO (slp_node
)
3976 : STMT_VINFO_SIMD_CLONE_INFO (stmt_info
));
3977 arginfo
.reserve (nargs
, true);
3978 auto_vec
<slp_tree
> slp_op
;
3979 slp_op
.safe_grow_cleared (nargs
);
3981 for (i
= 0; i
< nargs
; i
++)
3983 simd_call_arg_info thisarginfo
;
3986 thisarginfo
.linear_step
= 0;
3987 thisarginfo
.align
= 0;
3988 thisarginfo
.op
= NULL_TREE
;
3989 thisarginfo
.simd_lane_linear
= false;
3991 int op_no
= i
+ masked_call_offset
;
3993 op_no
= vect_slp_child_index_for_operand (stmt
, op_no
, false);
3994 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3995 op_no
, &op
, &slp_op
[i
],
3996 &thisarginfo
.dt
, &thisarginfo
.vectype
)
3997 || thisarginfo
.dt
== vect_uninitialized_def
)
3999 if (dump_enabled_p ())
4000 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4001 "use not simple.\n");
4005 if (thisarginfo
.dt
== vect_constant_def
4006 || thisarginfo
.dt
== vect_external_def
)
4008 /* With SLP we determine the vector type of constants/externals
4009 at analysis time, handling conflicts via
4010 vect_maybe_update_slp_op_vectype. At transform time
4011 we have a vector type recorded for SLP. */
4012 gcc_assert (!vec_stmt
4014 || thisarginfo
.vectype
!= NULL_TREE
);
4016 thisarginfo
.vectype
= get_vectype_for_scalar_type (vinfo
,
4021 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
4023 /* For linear arguments, the analyze phase should have saved
4024 the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. */
4025 if (i
* 3 + 4 <= simd_clone_info
.length ()
4026 && simd_clone_info
[i
* 3 + 2])
4028 gcc_assert (vec_stmt
);
4029 thisarginfo
.linear_step
= tree_to_shwi (simd_clone_info
[i
* 3 + 2]);
4030 thisarginfo
.op
= simd_clone_info
[i
* 3 + 1];
4031 thisarginfo
.simd_lane_linear
4032 = (simd_clone_info
[i
* 3 + 3] == boolean_true_node
);
4033 /* If loop has been peeled for alignment, we need to adjust it. */
4034 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
4035 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
4036 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
4038 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
4039 tree step
= simd_clone_info
[i
* 3 + 2];
4040 tree opt
= TREE_TYPE (thisarginfo
.op
);
4041 bias
= fold_convert (TREE_TYPE (step
), bias
);
4042 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
4044 = fold_build2 (POINTER_TYPE_P (opt
)
4045 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
4046 thisarginfo
.op
, bias
);
4050 && thisarginfo
.dt
!= vect_constant_def
4051 && thisarginfo
.dt
!= vect_external_def
4053 && TREE_CODE (op
) == SSA_NAME
4054 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
4056 && tree_fits_shwi_p (iv
.step
))
4058 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
4059 thisarginfo
.op
= iv
.base
;
4061 else if ((thisarginfo
.dt
== vect_constant_def
4062 || thisarginfo
.dt
== vect_external_def
)
4063 && POINTER_TYPE_P (TREE_TYPE (op
)))
4064 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
4065 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4067 if (POINTER_TYPE_P (TREE_TYPE (op
))
4068 && !thisarginfo
.linear_step
4070 && thisarginfo
.dt
!= vect_constant_def
4071 && thisarginfo
.dt
!= vect_external_def
4073 && TREE_CODE (op
) == SSA_NAME
)
4074 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
4076 arginfo
.quick_push (thisarginfo
);
4079 poly_uint64 vf
= loop_vinfo
? LOOP_VINFO_VECT_FACTOR (loop_vinfo
) : 1;
4080 unsigned group_size
= slp_node
? SLP_TREE_LANES (slp_node
) : 1;
4081 unsigned int badness
= 0;
4082 struct cgraph_node
*bestn
= NULL
;
4083 if (simd_clone_info
.exists ())
4084 bestn
= cgraph_node::get (simd_clone_info
[0]);
4086 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4087 n
= n
->simdclone
->next_clone
)
4089 unsigned int this_badness
= 0;
4090 unsigned int num_calls
;
4091 /* The number of arguments in the call and the number of parameters in
4092 the simdclone should match. However, when the simdclone is
4093 'inbranch', it could have one more paramater than nargs when using
4094 an inbranch simdclone to call a non-inbranch call, either in a
4095 non-masked loop using a all true constant mask, or inside a masked
4096 loop using it's mask. */
4097 size_t simd_nargs
= n
->simdclone
->nargs
;
4098 if (!masked_call_offset
&& n
->simdclone
->inbranch
)
4100 if (!constant_multiple_p (vf
* group_size
, n
->simdclone
->simdlen
,
4102 || (!n
->simdclone
->inbranch
&& (masked_call_offset
> 0))
4103 || (nargs
!= simd_nargs
))
4106 this_badness
+= floor_log2 (num_calls
) * 4096;
4107 if (n
->simdclone
->inbranch
)
4108 this_badness
+= 8192;
4109 int target_badness
= targetm
.simd_clone
.usable (n
);
4110 if (target_badness
< 0)
4112 this_badness
+= target_badness
* 512;
4113 for (i
= 0; i
< nargs
; i
++)
4115 switch (n
->simdclone
->args
[i
].arg_type
)
4117 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4118 if (!useless_type_conversion_p
4119 (n
->simdclone
->args
[i
].orig_type
,
4120 TREE_TYPE (gimple_call_arg (stmt
,
4121 i
+ masked_call_offset
))))
4123 else if (arginfo
[i
].dt
== vect_constant_def
4124 || arginfo
[i
].dt
== vect_external_def
4125 || arginfo
[i
].linear_step
)
4128 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4129 if (arginfo
[i
].dt
!= vect_constant_def
4130 && arginfo
[i
].dt
!= vect_external_def
)
4133 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4134 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4135 if (arginfo
[i
].dt
== vect_constant_def
4136 || arginfo
[i
].dt
== vect_external_def
4137 || (arginfo
[i
].linear_step
4138 != n
->simdclone
->args
[i
].linear_step
))
4141 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4142 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4143 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4144 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4145 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4146 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4150 case SIMD_CLONE_ARG_TYPE_MASK
:
4151 /* While we can create a traditional data vector from
4152 an incoming integer mode mask we have no good way to
4153 force generate an integer mode mask from a traditional
4154 boolean vector input. */
4155 if (SCALAR_INT_MODE_P (n
->simdclone
->mask_mode
)
4156 && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo
[i
].vectype
)))
4158 else if (!SCALAR_INT_MODE_P (n
->simdclone
->mask_mode
)
4159 && SCALAR_INT_MODE_P (TYPE_MODE (arginfo
[i
].vectype
)))
4160 this_badness
+= 2048;
4163 if (i
== (size_t) -1)
4165 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4170 if (arginfo
[i
].align
)
4171 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4172 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4174 if (i
== (size_t) -1)
4176 if (masked_call_offset
== 0
4177 && n
->simdclone
->inbranch
4178 && n
->simdclone
->nargs
> nargs
)
4180 gcc_assert (n
->simdclone
->args
[n
->simdclone
->nargs
- 1].arg_type
==
4181 SIMD_CLONE_ARG_TYPE_MASK
);
4182 /* Penalize using a masked SIMD clone in a non-masked loop, that is
4183 not in a branch, as we'd have to construct an all-true mask. */
4184 if (!loop_vinfo
|| !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
4187 if (bestn
== NULL
|| this_badness
< badness
)
4190 badness
= this_badness
;
4197 unsigned int num_mask_args
= 0;
4198 if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4199 for (i
= 0; i
< nargs
; i
++)
4200 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_MASK
)
4203 for (i
= 0; i
< nargs
; i
++)
4205 if ((arginfo
[i
].dt
== vect_constant_def
4206 || arginfo
[i
].dt
== vect_external_def
)
4207 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4209 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
,
4210 i
+ masked_call_offset
));
4211 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4213 if (arginfo
[i
].vectype
== NULL
4214 || !constant_multiple_p (bestn
->simdclone
->simdlen
,
4215 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4219 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
4220 && VECTOR_BOOLEAN_TYPE_P (bestn
->simdclone
->args
[i
].vector_type
))
4222 if (dump_enabled_p ())
4223 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4224 "vector mask arguments are not supported.\n");
4228 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_MASK
)
4230 tree clone_arg_vectype
= bestn
->simdclone
->args
[i
].vector_type
;
4231 if (bestn
->simdclone
->mask_mode
== VOIDmode
)
4233 if (maybe_ne (TYPE_VECTOR_SUBPARTS (clone_arg_vectype
),
4234 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4236 /* FORNOW we only have partial support for vector-type masks
4237 that can't hold all of simdlen. */
4238 if (dump_enabled_p ())
4239 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4241 "in-branch vector clones are not yet"
4242 " supported for mismatched vector sizes.\n");
4245 if (!expand_vec_cond_expr_p (clone_arg_vectype
,
4246 arginfo
[i
].vectype
, ERROR_MARK
))
4248 if (dump_enabled_p ())
4249 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4251 "cannot compute mask argument for"
4252 " in-branch vector clones.\n");
4256 else if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4258 if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo
[i
].vectype
))
4259 || maybe_ne (exact_div (bestn
->simdclone
->simdlen
,
4261 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4263 /* FORNOW we only have partial support for integer-type masks
4264 that represent the same number of lanes as the
4265 vectorized mask inputs. */
4266 if (dump_enabled_p ())
4267 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4269 "in-branch vector clones are not yet "
4270 "supported for mismatched vector sizes.\n");
4276 if (dump_enabled_p ())
4277 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4279 "in-branch vector clones not supported"
4280 " on this target.\n");
4286 fndecl
= bestn
->decl
;
4287 nunits
= bestn
->simdclone
->simdlen
;
4289 ncopies
= vector_unroll_factor (vf
* group_size
, nunits
);
4291 ncopies
= vector_unroll_factor (vf
, nunits
);
4293 /* If the function isn't const, only allow it in simd loops where user
4294 has asserted that at least nunits consecutive iterations can be
4295 performed using SIMD instructions. */
4296 if ((loop
== NULL
|| maybe_lt ((unsigned) loop
->safelen
, nunits
))
4297 && gimple_vuse (stmt
))
4300 /* Sanity check: make sure that at least one copy of the vectorized stmt
4301 needs to be generated. */
4302 gcc_assert (ncopies
>= 1);
4304 if (!vec_stmt
) /* transformation not required. */
4307 for (unsigned i
= 0; i
< nargs
; ++i
)
4308 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
], arginfo
[i
].vectype
))
4310 if (dump_enabled_p ())
4311 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4312 "incompatible vector types for invariants\n");
4315 /* When the original call is pure or const but the SIMD ABI dictates
4316 an aggregate return we will have to use a virtual definition and
4317 in a loop eventually even need to add a virtual PHI. That's
4318 not straight-forward so allow to fix this up via renaming. */
4319 if (gimple_call_lhs (stmt
)
4320 && !gimple_vdef (stmt
)
4321 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn
->decl
))) == ARRAY_TYPE
)
4322 vinfo
->any_known_not_updated_vssa
= true;
4323 /* ??? For SLP code-gen we end up inserting after the last
4324 vector argument def rather than at the original call position
4325 so automagic virtual operand updating doesn't work. */
4326 if (gimple_vuse (stmt
) && slp_node
)
4327 vinfo
->any_known_not_updated_vssa
= true;
4328 simd_clone_info
.safe_push (bestn
->decl
);
4329 for (i
= 0; i
< bestn
->simdclone
->nargs
; i
++)
4331 switch (bestn
->simdclone
->args
[i
].arg_type
)
4335 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4336 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4338 simd_clone_info
.safe_grow_cleared (i
* 3 + 1, true);
4339 simd_clone_info
.safe_push (arginfo
[i
].op
);
4340 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4341 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4342 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4343 simd_clone_info
.safe_push (ls
);
4344 tree sll
= arginfo
[i
].simd_lane_linear
4345 ? boolean_true_node
: boolean_false_node
;
4346 simd_clone_info
.safe_push (sll
);
4349 case SIMD_CLONE_ARG_TYPE_MASK
:
4351 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
4354 = exact_div (ncopies
* bestn
->simdclone
->simdlen
,
4355 TYPE_VECTOR_SUBPARTS (vectype
)).to_constant ();
4356 vect_record_loop_mask (loop_vinfo
,
4357 &LOOP_VINFO_MASKS (loop_vinfo
),
4358 nmasks
, vectype
, op
);
4365 if (!bestn
->simdclone
->inbranch
&& loop_vinfo
)
4367 if (dump_enabled_p ()
4368 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
4369 dump_printf_loc (MSG_NOTE
, vect_location
,
4370 "can't use a fully-masked loop because a"
4371 " non-masked simd clone was selected.\n");
4372 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
4375 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4376 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4377 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4378 dt, slp_node, cost_vec); */
4384 if (dump_enabled_p ())
4385 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4388 scalar_dest
= gimple_call_lhs (stmt
);
4389 vec_dest
= NULL_TREE
;
4394 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4395 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4396 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4399 rtype
= TREE_TYPE (ratype
);
4403 auto_vec
<vec
<tree
> > vec_oprnds
;
4404 auto_vec
<unsigned> vec_oprnds_i
;
4405 vec_oprnds_i
.safe_grow_cleared (nargs
, true);
4408 vec_oprnds
.reserve_exact (nargs
);
4409 vect_get_slp_defs (vinfo
, slp_node
, &vec_oprnds
);
4412 vec_oprnds
.safe_grow_cleared (nargs
, true);
4413 for (j
= 0; j
< ncopies
; ++j
)
4415 poly_uint64 callee_nelements
;
4416 poly_uint64 caller_nelements
;
4417 /* Build argument list for the vectorized call. */
4419 vargs
.create (nargs
);
4423 for (i
= 0; i
< nargs
; i
++)
4425 unsigned int k
, l
, m
, o
;
4427 op
= gimple_call_arg (stmt
, i
+ masked_call_offset
);
4428 switch (bestn
->simdclone
->args
[i
].arg_type
)
4430 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4431 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4432 caller_nelements
= TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
);
4433 callee_nelements
= TYPE_VECTOR_SUBPARTS (atype
);
4434 o
= vector_unroll_factor (nunits
, callee_nelements
);
4435 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4437 if (known_lt (callee_nelements
, caller_nelements
))
4439 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4440 if (!constant_multiple_p (caller_nelements
,
4441 callee_nelements
, &k
))
4444 gcc_assert ((k
& (k
- 1)) == 0);
4448 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4449 ncopies
* o
/ k
, op
,
4451 vec_oprnds_i
[i
] = 0;
4452 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4456 vec_oprnd0
= arginfo
[i
].op
;
4457 if ((m
& (k
- 1)) == 0)
4458 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4460 arginfo
[i
].op
= vec_oprnd0
;
4462 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4464 bitsize_int ((m
& (k
- 1)) * prec
));
4466 = gimple_build_assign (make_ssa_name (atype
),
4468 vect_finish_stmt_generation (vinfo
, stmt_info
,
4470 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4474 if (!constant_multiple_p (callee_nelements
,
4475 caller_nelements
, &k
))
4477 gcc_assert ((k
& (k
- 1)) == 0);
4478 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4480 vec_alloc (ctor_elts
, k
);
4483 for (l
= 0; l
< k
; l
++)
4485 if (m
== 0 && l
== 0)
4488 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4492 vec_oprnds_i
[i
] = 0;
4493 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4496 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4497 arginfo
[i
].op
= vec_oprnd0
;
4500 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4504 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0
),
4507 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, atype
,
4510 = gimple_build_assign (make_ssa_name (atype
),
4512 vect_finish_stmt_generation (vinfo
, stmt_info
,
4514 vargs
.safe_push (gimple_get_lhs (new_stmt
));
4517 vargs
.safe_push (vec_oprnd0
);
4520 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4522 = gimple_build_assign (make_ssa_name (atype
),
4524 vect_finish_stmt_generation (vinfo
, stmt_info
,
4526 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4531 case SIMD_CLONE_ARG_TYPE_MASK
:
4532 if (bestn
->simdclone
->mask_mode
== VOIDmode
)
4534 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4535 tree elt_type
= TREE_TYPE (atype
);
4536 tree one
= fold_convert (elt_type
, integer_one_node
);
4537 tree zero
= fold_convert (elt_type
, integer_zero_node
);
4538 callee_nelements
= TYPE_VECTOR_SUBPARTS (atype
);
4539 caller_nelements
= TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
);
4540 o
= vector_unroll_factor (nunits
, callee_nelements
);
4541 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4543 if (maybe_lt (callee_nelements
, caller_nelements
))
4545 /* The mask type has fewer elements than simdlen. */
4550 else if (known_eq (callee_nelements
, caller_nelements
))
4552 /* The SIMD clone function has the same number of
4553 elements as the current function. */
4557 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4561 vec_oprnds_i
[i
] = 0;
4563 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4565 && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
4567 vec_loop_masks
*loop_masks
4568 = &LOOP_VINFO_MASKS (loop_vinfo
);
4570 = vect_get_loop_mask (loop_vinfo
, gsi
,
4571 loop_masks
, ncopies
,
4574 = prepare_vec_mask (loop_vinfo
,
4575 TREE_TYPE (loop_mask
),
4576 loop_mask
, vec_oprnd0
,
4578 loop_vinfo
->vec_cond_masked_set
.add ({ vec_oprnd0
,
4583 = build3 (VEC_COND_EXPR
, atype
, vec_oprnd0
,
4584 build_vector_from_val (atype
, one
),
4585 build_vector_from_val (atype
, zero
));
4587 = gimple_build_assign (make_ssa_name (atype
),
4589 vect_finish_stmt_generation (vinfo
, stmt_info
,
4591 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4595 /* The mask type has more elements than simdlen. */
4602 else if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4604 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4605 /* Guess the number of lanes represented by atype. */
4606 poly_uint64 atype_subparts
4607 = exact_div (bestn
->simdclone
->simdlen
,
4609 o
= vector_unroll_factor (nunits
, atype_subparts
);
4610 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4615 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4619 vec_oprnds_i
[i
] = 0;
4621 if (maybe_lt (atype_subparts
,
4622 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4624 /* The mask argument has fewer elements than the
4629 else if (known_eq (atype_subparts
,
4630 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4632 /* The vector mask argument matches the input
4633 in the number of lanes, but not necessarily
4635 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4636 tree st
= lang_hooks
.types
.type_for_mode
4637 (TYPE_MODE (TREE_TYPE (vec_oprnd0
)), 1);
4638 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, st
,
4641 = gimple_build_assign (make_ssa_name (st
),
4643 vect_finish_stmt_generation (vinfo
, stmt_info
,
4645 if (!types_compatible_p (atype
, st
))
4648 = gimple_build_assign (make_ssa_name (atype
),
4652 vect_finish_stmt_generation (vinfo
, stmt_info
,
4655 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4659 /* The mask argument has more elements than the
4669 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4670 vargs
.safe_push (op
);
4672 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4673 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4678 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4679 &stmts
, true, NULL_TREE
);
4683 edge pe
= loop_preheader_edge (loop
);
4684 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4685 gcc_assert (!new_bb
);
4687 if (arginfo
[i
].simd_lane_linear
)
4689 vargs
.safe_push (arginfo
[i
].op
);
4692 tree phi_res
= copy_ssa_name (op
);
4693 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4694 add_phi_arg (new_phi
, arginfo
[i
].op
,
4695 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4697 = POINTER_TYPE_P (TREE_TYPE (op
))
4698 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4699 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4700 ? sizetype
: TREE_TYPE (op
);
4702 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4704 tree tcst
= wide_int_to_tree (type
, cst
);
4705 tree phi_arg
= copy_ssa_name (op
);
4707 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4708 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4709 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4710 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4712 arginfo
[i
].op
= phi_res
;
4713 vargs
.safe_push (phi_res
);
4718 = POINTER_TYPE_P (TREE_TYPE (op
))
4719 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4720 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4721 ? sizetype
: TREE_TYPE (op
);
4723 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4725 tree tcst
= wide_int_to_tree (type
, cst
);
4726 new_temp
= make_ssa_name (TREE_TYPE (op
));
4728 = gimple_build_assign (new_temp
, code
,
4729 arginfo
[i
].op
, tcst
);
4730 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4731 vargs
.safe_push (new_temp
);
4734 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4735 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4736 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4737 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4738 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4739 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4745 if (masked_call_offset
== 0
4746 && bestn
->simdclone
->inbranch
4747 && bestn
->simdclone
->nargs
> nargs
)
4750 size_t mask_i
= bestn
->simdclone
->nargs
- 1;
4752 gcc_assert (bestn
->simdclone
->args
[mask_i
].arg_type
==
4753 SIMD_CLONE_ARG_TYPE_MASK
);
4755 tree masktype
= bestn
->simdclone
->args
[mask_i
].vector_type
;
4756 if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4757 /* Guess the number of lanes represented by masktype. */
4758 callee_nelements
= exact_div (bestn
->simdclone
->simdlen
,
4759 bestn
->simdclone
->nargs
- nargs
);
4761 callee_nelements
= TYPE_VECTOR_SUBPARTS (masktype
);
4762 o
= vector_unroll_factor (nunits
, callee_nelements
);
4763 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4765 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
4767 vec_loop_masks
*loop_masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
4768 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
4769 ncopies
, vectype
, j
);
4772 mask
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
4775 if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4777 /* This means we are dealing with integer mask modes.
4778 First convert to an integer type with the same size as
4779 the current vector type. */
4780 unsigned HOST_WIDE_INT intermediate_size
4781 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask
)));
4783 build_nonstandard_integer_type (intermediate_size
, 1);
4784 mask
= build1 (VIEW_CONVERT_EXPR
, mid_int_type
, mask
);
4786 = gimple_build_assign (make_ssa_name (mid_int_type
),
4788 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
4789 /* Then zero-extend to the mask mode. */
4790 mask
= fold_build1 (NOP_EXPR
, masktype
,
4791 gimple_get_lhs (new_stmt
));
4793 else if (bestn
->simdclone
->mask_mode
== VOIDmode
)
4795 tree one
= fold_convert (TREE_TYPE (masktype
),
4797 tree zero
= fold_convert (TREE_TYPE (masktype
),
4799 mask
= build3 (VEC_COND_EXPR
, masktype
, mask
,
4800 build_vector_from_val (masktype
, one
),
4801 build_vector_from_val (masktype
, zero
));
4806 new_stmt
= gimple_build_assign (make_ssa_name (masktype
), mask
);
4807 vect_finish_stmt_generation (vinfo
, stmt_info
,
4809 mask
= gimple_assign_lhs (new_stmt
);
4810 vargs
.safe_push (mask
);
4814 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4818 || known_eq (TYPE_VECTOR_SUBPARTS (rtype
), nunits
));
4820 new_temp
= create_tmp_var (ratype
);
4821 else if (useless_type_conversion_p (vectype
, rtype
))
4822 new_temp
= make_ssa_name (vec_dest
, new_call
);
4824 new_temp
= make_ssa_name (rtype
, new_call
);
4825 gimple_call_set_lhs (new_call
, new_temp
);
4827 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4828 gimple
*new_stmt
= new_call
;
4832 if (!multiple_p (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
4835 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4836 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4837 k
= vector_unroll_factor (nunits
,
4838 TYPE_VECTOR_SUBPARTS (vectype
));
4839 gcc_assert ((k
& (k
- 1)) == 0);
4840 for (l
= 0; l
< k
; l
++)
4845 t
= build_fold_addr_expr (new_temp
);
4846 t
= build2 (MEM_REF
, vectype
, t
,
4847 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4850 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4851 bitsize_int (prec
), bitsize_int (l
* prec
));
4852 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4853 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4855 if (j
== 0 && l
== 0)
4856 *vec_stmt
= new_stmt
;
4858 SLP_TREE_VEC_DEFS (slp_node
)
4859 .quick_push (gimple_assign_lhs (new_stmt
));
4861 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4865 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4868 else if (!multiple_p (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
4871 if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype
),
4872 TYPE_VECTOR_SUBPARTS (rtype
), &k
))
4874 gcc_assert ((k
& (k
- 1)) == 0);
4875 if ((j
& (k
- 1)) == 0)
4876 vec_alloc (ret_ctor_elts
, k
);
4880 o
= vector_unroll_factor (nunits
,
4881 TYPE_VECTOR_SUBPARTS (rtype
));
4882 for (m
= 0; m
< o
; m
++)
4884 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4885 size_int (m
), NULL_TREE
, NULL_TREE
);
4886 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4888 vect_finish_stmt_generation (vinfo
, stmt_info
,
4890 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4891 gimple_assign_lhs (new_stmt
));
4893 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4896 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4897 if ((j
& (k
- 1)) != k
- 1)
4899 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4901 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4902 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4904 if ((unsigned) j
== k
- 1)
4905 *vec_stmt
= new_stmt
;
4907 SLP_TREE_VEC_DEFS (slp_node
)
4908 .quick_push (gimple_assign_lhs (new_stmt
));
4910 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4915 tree t
= build_fold_addr_expr (new_temp
);
4916 t
= build2 (MEM_REF
, vectype
, t
,
4917 build_int_cst (TREE_TYPE (t
), 0));
4918 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4919 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4920 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4922 else if (!useless_type_conversion_p (vectype
, rtype
))
4924 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
4926 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4927 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4932 *vec_stmt
= new_stmt
;
4934 SLP_TREE_VEC_DEFS (slp_node
).quick_push (gimple_get_lhs (new_stmt
));
4936 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4939 for (i
= 0; i
< nargs
; ++i
)
4941 vec
<tree
> oprndsi
= vec_oprnds
[i
];
4946 /* Mark the clone as no longer being a candidate for GC. */
4947 bestn
->gc_candidate
= false;
4949 /* The call in STMT might prevent it from being removed in dce.
4950 We however cannot remove it here, due to the way the ssa name
4951 it defines is mapped to the new definition. So just replace
4952 rhs of the statement with something harmless. */
4960 type
= TREE_TYPE (scalar_dest
);
4961 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4962 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4965 new_stmt
= gimple_build_nop ();
4966 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4967 unlink_stmt_vdef (stmt
);
4973 /* Function vect_gen_widened_results_half
4975 Create a vector stmt whose code, type, number of arguments, and result
4976 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4977 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4978 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4979 needs to be created (DECL is a function-decl of a target-builtin).
4980 STMT_INFO is the original scalar stmt that we are vectorizing. */
4983 vect_gen_widened_results_half (vec_info
*vinfo
, code_helper ch
,
4984 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4985 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4986 stmt_vec_info stmt_info
)
4991 /* Generate half of the widened result: */
4992 if (op_type
!= binary_op
)
4994 new_stmt
= vect_gimple_build (vec_dest
, ch
, vec_oprnd0
, vec_oprnd1
);
4995 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4996 gimple_set_lhs (new_stmt
, new_temp
);
4997 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5003 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
5004 For multi-step conversions store the resulting vectors and call the function
5005 recursively. When NARROW_SRC_P is true, there's still a conversion after
5006 narrowing, don't store the vectors in the SLP_NODE or in vector info of
5007 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
5010 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
5012 stmt_vec_info stmt_info
,
5013 vec
<tree
> &vec_dsts
,
5014 gimple_stmt_iterator
*gsi
,
5015 slp_tree slp_node
, code_helper code
,
5019 tree vop0
, vop1
, new_tmp
, vec_dest
;
5021 vec_dest
= vec_dsts
.pop ();
5023 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
5025 /* Create demotion operation. */
5026 vop0
= (*vec_oprnds
)[i
];
5027 vop1
= (*vec_oprnds
)[i
+ 1];
5028 gimple
*new_stmt
= vect_gimple_build (vec_dest
, code
, vop0
, vop1
);
5029 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
5030 gimple_set_lhs (new_stmt
, new_tmp
);
5031 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5032 if (multi_step_cvt
|| narrow_src_p
)
5033 /* Store the resulting vector for next recursive call,
5034 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
5035 (*vec_oprnds
)[i
/2] = new_tmp
;
5038 /* This is the last step of the conversion sequence. Store the
5039 vectors in SLP_NODE or in vector info of the scalar statement
5040 (or in STMT_VINFO_RELATED_STMT chain). */
5042 slp_node
->push_vec_def (new_stmt
);
5044 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5048 /* For multi-step demotion operations we first generate demotion operations
5049 from the source type to the intermediate types, and then combine the
5050 results (stored in VEC_OPRNDS) in demotion operation to the destination
5054 /* At each level of recursion we have half of the operands we had at the
5056 vec_oprnds
->truncate ((i
+1)/2);
5057 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
5059 stmt_info
, vec_dsts
, gsi
,
5060 slp_node
, VEC_PACK_TRUNC_EXPR
,
5064 vec_dsts
.quick_push (vec_dest
);
5068 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
5069 and VEC_OPRNDS1, for a binary operation associated with scalar statement
5070 STMT_INFO. For multi-step conversions store the resulting vectors and
5071 call the function recursively. */
5074 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
5075 vec
<tree
> *vec_oprnds0
,
5076 vec
<tree
> *vec_oprnds1
,
5077 stmt_vec_info stmt_info
, tree vec_dest
,
5078 gimple_stmt_iterator
*gsi
,
5080 code_helper ch2
, int op_type
)
5083 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
5084 gimple
*new_stmt1
, *new_stmt2
;
5085 vec
<tree
> vec_tmp
= vNULL
;
5087 vec_tmp
.create (vec_oprnds0
->length () * 2);
5088 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
5090 if (op_type
== binary_op
)
5091 vop1
= (*vec_oprnds1
)[i
];
5095 /* Generate the two halves of promotion operation. */
5096 new_stmt1
= vect_gen_widened_results_half (vinfo
, ch1
, vop0
, vop1
,
5097 op_type
, vec_dest
, gsi
,
5099 new_stmt2
= vect_gen_widened_results_half (vinfo
, ch2
, vop0
, vop1
,
5100 op_type
, vec_dest
, gsi
,
5102 if (is_gimple_call (new_stmt1
))
5104 new_tmp1
= gimple_call_lhs (new_stmt1
);
5105 new_tmp2
= gimple_call_lhs (new_stmt2
);
5109 new_tmp1
= gimple_assign_lhs (new_stmt1
);
5110 new_tmp2
= gimple_assign_lhs (new_stmt2
);
5113 /* Store the results for the next step. */
5114 vec_tmp
.quick_push (new_tmp1
);
5115 vec_tmp
.quick_push (new_tmp2
);
5118 vec_oprnds0
->release ();
5119 *vec_oprnds0
= vec_tmp
;
5122 /* Create vectorized promotion stmts for widening stmts using only half the
5123 potential vector size for input. */
5125 vect_create_half_widening_stmts (vec_info
*vinfo
,
5126 vec
<tree
> *vec_oprnds0
,
5127 vec
<tree
> *vec_oprnds1
,
5128 stmt_vec_info stmt_info
, tree vec_dest
,
5129 gimple_stmt_iterator
*gsi
,
5138 vec
<tree
> vec_tmp
= vNULL
;
5140 vec_tmp
.create (vec_oprnds0
->length ());
5141 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
5143 tree new_tmp1
, new_tmp2
, new_tmp3
, out_type
;
5145 gcc_assert (op_type
== binary_op
);
5146 vop1
= (*vec_oprnds1
)[i
];
5148 /* Widen the first vector input. */
5149 out_type
= TREE_TYPE (vec_dest
);
5150 new_tmp1
= make_ssa_name (out_type
);
5151 new_stmt1
= gimple_build_assign (new_tmp1
, NOP_EXPR
, vop0
);
5152 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt1
, gsi
);
5153 if (VECTOR_TYPE_P (TREE_TYPE (vop1
)))
5155 /* Widen the second vector input. */
5156 new_tmp2
= make_ssa_name (out_type
);
5157 new_stmt2
= gimple_build_assign (new_tmp2
, NOP_EXPR
, vop1
);
5158 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt2
, gsi
);
5159 /* Perform the operation. With both vector inputs widened. */
5160 new_stmt3
= vect_gimple_build (vec_dest
, code1
, new_tmp1
, new_tmp2
);
5164 /* Perform the operation. With the single vector input widened. */
5165 new_stmt3
= vect_gimple_build (vec_dest
, code1
, new_tmp1
, vop1
);
5168 new_tmp3
= make_ssa_name (vec_dest
, new_stmt3
);
5169 gimple_assign_set_lhs (new_stmt3
, new_tmp3
);
5170 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt3
, gsi
);
5172 /* Store the results for the next step. */
5173 vec_tmp
.quick_push (new_tmp3
);
5176 vec_oprnds0
->release ();
5177 *vec_oprnds0
= vec_tmp
;
5181 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5182 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5183 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5184 Return true if STMT_INFO is vectorizable in this way. */
5187 vectorizable_conversion (vec_info
*vinfo
,
5188 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5189 gimple
**vec_stmt
, slp_tree slp_node
,
5190 stmt_vector_for_cost
*cost_vec
)
5192 tree vec_dest
, cvt_op
= NULL_TREE
;
5194 tree op0
, op1
= NULL_TREE
;
5195 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5197 code_helper code
, code1
, code2
;
5198 code_helper codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
5200 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5202 poly_uint64 nunits_in
;
5203 poly_uint64 nunits_out
;
5204 tree vectype_out
, vectype_in
;
5206 tree lhs_type
, rhs_type
;
5207 /* For conversions between floating point and integer, there're 2 NARROW
5208 cases. NARROW_SRC is for FLOAT_EXPR, means
5209 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5210 This is safe when the range of the source integer can fit into the lower
5211 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5212 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5213 For other conversions, when there's narrowing, NARROW_DST is used as
5215 enum { NARROW_SRC
, NARROW_DST
, NONE
, WIDEN
} modifier
;
5216 vec
<tree
> vec_oprnds0
= vNULL
;
5217 vec
<tree
> vec_oprnds1
= vNULL
;
5219 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5220 int multi_step_cvt
= 0;
5221 vec
<tree
> interm_types
= vNULL
;
5222 tree intermediate_type
, cvt_type
= NULL_TREE
;
5224 unsigned short fltsz
;
5226 /* Is STMT a vectorizable conversion? */
5228 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5231 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5235 gimple
* stmt
= stmt_info
->stmt
;
5236 if (!(is_gimple_assign (stmt
) || is_gimple_call (stmt
)))
5239 if (gimple_get_lhs (stmt
) == NULL_TREE
5240 || TREE_CODE (gimple_get_lhs (stmt
)) != SSA_NAME
)
5243 if (TREE_CODE (gimple_get_lhs (stmt
)) != SSA_NAME
)
5246 if (is_gimple_assign (stmt
))
5248 code
= gimple_assign_rhs_code (stmt
);
5249 op_type
= TREE_CODE_LENGTH ((tree_code
) code
);
5251 else if (gimple_call_internal_p (stmt
))
5253 code
= gimple_call_internal_fn (stmt
);
5254 op_type
= gimple_call_num_args (stmt
);
5259 bool widen_arith
= (code
== WIDEN_MULT_EXPR
5260 || code
== WIDEN_LSHIFT_EXPR
5261 || widening_fn_p (code
));
5264 && !CONVERT_EXPR_CODE_P (code
)
5265 && code
!= FIX_TRUNC_EXPR
5266 && code
!= FLOAT_EXPR
)
5269 /* Check types of lhs and rhs. */
5270 scalar_dest
= gimple_get_lhs (stmt
);
5271 lhs_type
= TREE_TYPE (scalar_dest
);
5272 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5274 /* Check the operands of the operation. */
5275 slp_tree slp_op0
, slp_op1
= NULL
;
5276 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5277 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
5279 if (dump_enabled_p ())
5280 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5281 "use not simple.\n");
5285 rhs_type
= TREE_TYPE (op0
);
5286 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
5287 && !((INTEGRAL_TYPE_P (lhs_type
)
5288 && INTEGRAL_TYPE_P (rhs_type
))
5289 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
5290 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
5293 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5294 && ((INTEGRAL_TYPE_P (lhs_type
)
5295 && !type_has_mode_precision_p (lhs_type
))
5296 || (INTEGRAL_TYPE_P (rhs_type
)
5297 && !type_has_mode_precision_p (rhs_type
))))
5299 if (dump_enabled_p ())
5300 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5301 "type conversion to/from bit-precision unsupported."
5306 if (op_type
== binary_op
)
5308 gcc_assert (code
== WIDEN_MULT_EXPR
5309 || code
== WIDEN_LSHIFT_EXPR
5310 || widening_fn_p (code
));
5312 op1
= is_gimple_assign (stmt
) ? gimple_assign_rhs2 (stmt
) :
5313 gimple_call_arg (stmt
, 0);
5315 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
5316 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
5318 if (dump_enabled_p ())
5319 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5320 "use not simple.\n");
5323 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5326 vectype_in
= vectype1_in
;
5329 /* If op0 is an external or constant def, infer the vector type
5330 from the scalar type. */
5332 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
5334 gcc_assert (vectype_in
);
5337 if (dump_enabled_p ())
5338 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5339 "no vectype for scalar type %T\n", rhs_type
);
5344 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5345 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5347 if (dump_enabled_p ())
5348 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5349 "can't convert between boolean and non "
5350 "boolean vectors %T\n", rhs_type
);
5355 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
5356 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5357 if (known_eq (nunits_out
, nunits_in
))
5362 else if (multiple_p (nunits_out
, nunits_in
))
5363 modifier
= NARROW_DST
;
5366 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
5370 /* Multiple types in SLP are handled by creating the appropriate number of
5371 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5375 else if (modifier
== NARROW_DST
)
5376 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
5378 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
5380 /* Sanity check: make sure that at least one copy of the vectorized stmt
5381 needs to be generated. */
5382 gcc_assert (ncopies
>= 1);
5384 bool found_mode
= false;
5385 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
5386 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
5387 opt_scalar_mode rhs_mode_iter
;
5388 vec
<std::pair
<tree
, tree_code
> > converts
= vNULL
;
5390 /* Supportable by target? */
5394 if (code
!= FIX_TRUNC_EXPR
5395 && code
!= FLOAT_EXPR
5396 && !CONVERT_EXPR_CODE_P (code
))
5398 gcc_assert (code
.is_tree_code ());
5399 if (supportable_indirect_convert_operation (code
,
5405 gcc_assert (converts
.length () <= 2);
5406 if (converts
.length () == 1)
5407 code1
= converts
[0].second
;
5410 cvt_type
= NULL_TREE
;
5411 multi_step_cvt
= converts
.length () - 1;
5412 codecvt1
= converts
[0].second
;
5413 code1
= converts
[1].second
;
5414 interm_types
.safe_push (converts
[0].first
);
5421 if (dump_enabled_p ())
5422 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5423 "conversion not supported by target.\n");
5427 if (known_eq (nunits_in
, nunits_out
))
5429 if (!(code
.is_tree_code ()
5430 && supportable_half_widening_operation ((tree_code
) code
,
5431 vectype_out
, vectype_in
,
5435 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5438 if (supportable_widening_operation (vinfo
, code
, stmt_info
,
5439 vectype_out
, vectype_in
, &code1
,
5440 &code2
, &multi_step_cvt
,
5443 /* Binary widening operation can only be supported directly by the
5445 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5449 if (code
!= FLOAT_EXPR
5450 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
5453 fltsz
= GET_MODE_SIZE (lhs_mode
);
5454 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
5456 rhs_mode
= rhs_mode_iter
.require ();
5457 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
5461 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5462 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5463 if (cvt_type
== NULL_TREE
)
5466 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5469 gcc_assert (code
.is_tree_code ());
5470 if (!supportable_convert_operation ((tree_code
) code
, vectype_out
,
5475 else if (!supportable_widening_operation (vinfo
, code
,
5476 stmt_info
, vectype_out
,
5477 cvt_type
, &codecvt1
,
5478 &codecvt2
, &multi_step_cvt
,
5482 gcc_assert (multi_step_cvt
== 0);
5484 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
5487 &code2
, &multi_step_cvt
,
5498 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5499 codecvt2
= ERROR_MARK
;
5503 interm_types
.safe_push (cvt_type
);
5504 cvt_type
= NULL_TREE
;
5509 gcc_assert (op_type
== unary_op
);
5510 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
5511 &code1
, &multi_step_cvt
,
5515 if (GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
5518 if (code
== FIX_TRUNC_EXPR
)
5521 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5522 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5523 if (cvt_type
== NULL_TREE
)
5525 if (supportable_convert_operation ((tree_code
) code
, cvt_type
, vectype_in
,
5530 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
5531 &code1
, &multi_step_cvt
,
5535 /* If op0 can be represented with low precision integer,
5536 truncate it to cvt_type and the do FLOAT_EXPR. */
5537 else if (code
== FLOAT_EXPR
)
5539 wide_int op_min_value
, op_max_value
;
5540 if (!vect_get_range_info (op0
, &op_min_value
, &op_max_value
))
5544 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode
), 0);
5545 if (cvt_type
== NULL_TREE
5546 || (wi::min_precision (op_max_value
, SIGNED
)
5547 > TYPE_PRECISION (cvt_type
))
5548 || (wi::min_precision (op_min_value
, SIGNED
)
5549 > TYPE_PRECISION (cvt_type
)))
5552 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_out
);
5553 if (cvt_type
== NULL_TREE
)
5555 if (!supportable_narrowing_operation (NOP_EXPR
, cvt_type
, vectype_in
,
5556 &code1
, &multi_step_cvt
,
5559 if (supportable_convert_operation ((tree_code
) code
, vectype_out
,
5563 modifier
= NARROW_SRC
;
5574 if (!vec_stmt
) /* transformation not required. */
5577 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
5578 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
5580 if (dump_enabled_p ())
5581 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5582 "incompatible vector types for invariants\n");
5585 DUMP_VECT_SCOPE ("vectorizable_conversion");
5586 if (modifier
== NONE
)
5588 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
5589 vect_model_simple_cost (vinfo
, stmt_info
,
5590 ncopies
* (1 + multi_step_cvt
),
5591 dt
, ndts
, slp_node
, cost_vec
);
5593 else if (modifier
== NARROW_SRC
|| modifier
== NARROW_DST
)
5595 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
5596 /* The final packing step produces one vector result per copy. */
5597 unsigned int nvectors
5598 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
5599 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5600 multi_step_cvt
, cost_vec
,
5605 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5606 /* The initial unpacking step produces two vector results
5607 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5608 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5609 unsigned int nvectors
5611 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5613 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5614 multi_step_cvt
, cost_vec
,
5617 interm_types
.release ();
5622 if (dump_enabled_p ())
5623 dump_printf_loc (MSG_NOTE
, vect_location
,
5624 "transform conversion. ncopies = %d.\n", ncopies
);
5626 if (op_type
== binary_op
)
5628 if (CONSTANT_CLASS_P (op0
))
5629 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5630 else if (CONSTANT_CLASS_P (op1
))
5631 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5634 /* In case of multi-step conversion, we first generate conversion operations
5635 to the intermediate types, and then from that types to the final one.
5636 We create vector destinations for the intermediate type (TYPES) received
5637 from supportable_*_operation, and store them in the correct order
5638 for future use in vect_create_vectorized_*_stmts (). */
5639 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5640 bool widen_or_narrow_float_p
5641 = cvt_type
&& (modifier
== WIDEN
|| modifier
== NARROW_SRC
);
5642 vec_dest
= vect_create_destination_var (scalar_dest
,
5643 widen_or_narrow_float_p
5644 ? cvt_type
: vectype_out
);
5645 vec_dsts
.quick_push (vec_dest
);
5649 for (i
= interm_types
.length () - 1;
5650 interm_types
.iterate (i
, &intermediate_type
); i
--)
5652 vec_dest
= vect_create_destination_var (scalar_dest
,
5654 vec_dsts
.quick_push (vec_dest
);
5659 vec_dest
= vect_create_destination_var (scalar_dest
,
5660 widen_or_narrow_float_p
5661 ? vectype_out
: cvt_type
);
5666 if (modifier
== WIDEN
)
5668 else if (modifier
== NARROW_SRC
|| modifier
== NARROW_DST
)
5671 ninputs
= vect_pow2 (multi_step_cvt
);
5679 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5680 op0
, vectype_in
, &vec_oprnds0
);
5681 /* vec_dest is intermediate type operand when multi_step_cvt. */
5685 vec_dest
= vec_dsts
[0];
5688 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5690 /* Arguments are ready, create the new vector stmt. */
5694 gcc_assert (multi_step_cvt
== 1);
5695 new_stmt
= vect_gimple_build (cvt_op
, codecvt1
, vop0
);
5696 new_temp
= make_ssa_name (cvt_op
, new_stmt
);
5697 gimple_assign_set_lhs (new_stmt
, new_temp
);
5698 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5701 new_stmt
= vect_gimple_build (vec_dest
, code1
, vop0
);
5702 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5703 gimple_set_lhs (new_stmt
, new_temp
);
5704 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5707 slp_node
->push_vec_def (new_stmt
);
5709 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5714 /* In case the vectorization factor (VF) is bigger than the number
5715 of elements that we can fit in a vectype (nunits), we have to
5716 generate more than one vector stmt - i.e - we need to "unroll"
5717 the vector stmt by a factor VF/nunits. */
5718 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5719 op0
, vectype_in
, &vec_oprnds0
,
5720 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
5721 vectype_in
, &vec_oprnds1
);
5722 if (code
== WIDEN_LSHIFT_EXPR
)
5724 int oprnds_size
= vec_oprnds0
.length ();
5725 vec_oprnds1
.create (oprnds_size
);
5726 for (i
= 0; i
< oprnds_size
; ++i
)
5727 vec_oprnds1
.quick_push (op1
);
5729 /* Arguments are ready. Create the new vector stmts. */
5730 for (i
= multi_step_cvt
; i
>= 0; i
--)
5732 tree this_dest
= vec_dsts
[i
];
5733 code_helper c1
= code1
, c2
= code2
;
5734 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5739 if (known_eq (nunits_out
, nunits_in
))
5740 vect_create_half_widening_stmts (vinfo
, &vec_oprnds0
, &vec_oprnds1
,
5741 stmt_info
, this_dest
, gsi
, c1
,
5744 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5745 &vec_oprnds1
, stmt_info
,
5750 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5755 new_temp
= make_ssa_name (vec_dest
);
5756 new_stmt
= vect_gimple_build (new_temp
, codecvt1
, vop0
);
5757 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5760 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
5763 slp_node
->push_vec_def (new_stmt
);
5765 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5771 /* In case the vectorization factor (VF) is bigger than the number
5772 of elements that we can fit in a vectype (nunits), we have to
5773 generate more than one vector stmt - i.e - we need to "unroll"
5774 the vector stmt by a factor VF/nunits. */
5775 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5776 op0
, vectype_in
, &vec_oprnds0
);
5777 /* Arguments are ready. Create the new vector stmts. */
5778 if (cvt_type
&& modifier
== NARROW_DST
)
5779 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5781 new_temp
= make_ssa_name (vec_dest
);
5782 gimple
*new_stmt
= vect_gimple_build (new_temp
, codecvt1
, vop0
);
5783 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5784 vec_oprnds0
[i
] = new_temp
;
5787 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5789 stmt_info
, vec_dsts
, gsi
,
5791 modifier
== NARROW_SRC
);
5792 /* After demoting op0 to cvt_type, convert it to dest. */
5793 if (cvt_type
&& code
== FLOAT_EXPR
)
5795 for (unsigned int i
= 0; i
!= vec_oprnds0
.length() / 2; i
++)
5797 /* Arguments are ready, create the new vector stmt. */
5798 gcc_assert (TREE_CODE_LENGTH ((tree_code
) codecvt1
) == unary_op
);
5800 = vect_gimple_build (vec_dest
, codecvt1
, vec_oprnds0
[i
]);
5801 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5802 gimple_set_lhs (new_stmt
, new_temp
);
5803 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5805 /* This is the last step of the conversion sequence. Store the
5806 vectors in SLP_NODE or in vector info of the scalar statement
5807 (or in STMT_VINFO_RELATED_STMT chain). */
5809 slp_node
->push_vec_def (new_stmt
);
5811 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5817 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5819 vec_oprnds0
.release ();
5820 vec_oprnds1
.release ();
5821 interm_types
.release ();
5826 /* Return true if we can assume from the scalar form of STMT_INFO that
5827 neither the scalar nor the vector forms will generate code. STMT_INFO
5828 is known not to involve a data reference. */
5831 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5833 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5837 tree lhs
= gimple_assign_lhs (stmt
);
5838 tree_code code
= gimple_assign_rhs_code (stmt
);
5839 tree rhs
= gimple_assign_rhs1 (stmt
);
5841 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5844 if (CONVERT_EXPR_CODE_P (code
))
5845 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5850 /* Function vectorizable_assignment.
5852 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5853 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5854 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5855 Return true if STMT_INFO is vectorizable in this way. */
5858 vectorizable_assignment (vec_info
*vinfo
,
5859 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5860 gimple
**vec_stmt
, slp_tree slp_node
,
5861 stmt_vector_for_cost
*cost_vec
)
5866 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5868 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5872 vec
<tree
> vec_oprnds
= vNULL
;
5874 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5875 enum tree_code code
;
5878 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5881 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5885 /* Is vectorizable assignment? */
5886 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5890 scalar_dest
= gimple_assign_lhs (stmt
);
5891 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5894 if (STMT_VINFO_DATA_REF (stmt_info
))
5897 code
= gimple_assign_rhs_code (stmt
);
5898 if (!(gimple_assign_single_p (stmt
)
5899 || code
== PAREN_EXPR
5900 || CONVERT_EXPR_CODE_P (code
)))
5903 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5904 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5906 /* Multiple types in SLP are handled by creating the appropriate number of
5907 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5912 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5914 gcc_assert (ncopies
>= 1);
5917 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5918 &dt
[0], &vectype_in
))
5920 if (dump_enabled_p ())
5921 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5922 "use not simple.\n");
5926 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5928 /* We can handle VIEW_CONVERT conversions that do not change the number
5929 of elements or the vector size or other conversions when the component
5930 types are nop-convertible. */
5932 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5933 || (code
== VIEW_CONVERT_EXPR
5934 && maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5935 GET_MODE_SIZE (TYPE_MODE (vectype_in
))))
5936 || (CONVERT_EXPR_CODE_P (code
)
5937 && !tree_nop_conversion_p (TREE_TYPE (vectype
),
5938 TREE_TYPE (vectype_in
))))
5941 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5943 if (dump_enabled_p ())
5944 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5945 "can't convert between boolean and non "
5946 "boolean vectors %T\n", TREE_TYPE (op
));
5951 /* We do not handle bit-precision changes. */
5952 if ((CONVERT_EXPR_CODE_P (code
)
5953 || code
== VIEW_CONVERT_EXPR
)
5954 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5955 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5956 || (INTEGRAL_TYPE_P (TREE_TYPE (op
))
5957 && !type_has_mode_precision_p (TREE_TYPE (op
))))
5958 /* But a conversion that does not change the bit-pattern is ok. */
5959 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5960 && INTEGRAL_TYPE_P (TREE_TYPE (op
))
5961 && (((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5962 > TYPE_PRECISION (TREE_TYPE (op
)))
5963 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5964 || (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5965 == TYPE_PRECISION (TREE_TYPE (op
))))))
5967 if (dump_enabled_p ())
5968 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5969 "type conversion to/from bit-precision "
5974 if (!vec_stmt
) /* transformation not required. */
5977 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
5979 if (dump_enabled_p ())
5980 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5981 "incompatible vector types for invariants\n");
5984 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5985 DUMP_VECT_SCOPE ("vectorizable_assignment");
5986 if (!vect_nop_conversion_p (stmt_info
))
5987 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5993 if (dump_enabled_p ())
5994 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5997 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6000 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
6002 /* Arguments are ready. create the new vector stmt. */
6003 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
6005 if (CONVERT_EXPR_CODE_P (code
)
6006 || code
== VIEW_CONVERT_EXPR
)
6007 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
6008 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
6009 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6010 gimple_assign_set_lhs (new_stmt
, new_temp
);
6011 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6013 slp_node
->push_vec_def (new_stmt
);
6015 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6018 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6020 vec_oprnds
.release ();
6025 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6026 either as shift by a scalar or by a vector. */
6029 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
6032 machine_mode vec_mode
;
6037 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
6041 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
6043 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
6045 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
6047 || (optab_handler (optab
, TYPE_MODE (vectype
))
6048 == CODE_FOR_nothing
))
6052 vec_mode
= TYPE_MODE (vectype
);
6053 icode
= (int) optab_handler (optab
, vec_mode
);
6054 if (icode
== CODE_FOR_nothing
)
6061 /* Function vectorizable_shift.
6063 Check if STMT_INFO performs a shift operation that can be vectorized.
6064 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6065 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6066 Return true if STMT_INFO is vectorizable in this way. */
6069 vectorizable_shift (vec_info
*vinfo
,
6070 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6071 gimple
**vec_stmt
, slp_tree slp_node
,
6072 stmt_vector_for_cost
*cost_vec
)
6076 tree op0
, op1
= NULL
;
6077 tree vec_oprnd1
= NULL_TREE
;
6079 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6080 enum tree_code code
;
6081 machine_mode vec_mode
;
6085 machine_mode optab_op2_mode
;
6086 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
6088 poly_uint64 nunits_in
;
6089 poly_uint64 nunits_out
;
6094 vec
<tree
> vec_oprnds0
= vNULL
;
6095 vec
<tree
> vec_oprnds1
= vNULL
;
6098 bool scalar_shift_arg
= true;
6099 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6100 bool incompatible_op1_vectype_p
= false;
6102 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6105 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6106 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
6110 /* Is STMT a vectorizable binary/unary operation? */
6111 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6115 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6118 code
= gimple_assign_rhs_code (stmt
);
6120 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
6121 || code
== RROTATE_EXPR
))
6124 scalar_dest
= gimple_assign_lhs (stmt
);
6125 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6126 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
6128 if (dump_enabled_p ())
6129 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6130 "bit-precision shifts not supported.\n");
6135 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6136 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6138 if (dump_enabled_p ())
6139 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6140 "use not simple.\n");
6143 /* If op0 is an external or constant def, infer the vector type
6144 from the scalar type. */
6146 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
6148 gcc_assert (vectype
);
6151 if (dump_enabled_p ())
6152 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6153 "no vectype for scalar type\n");
6157 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6158 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6159 if (maybe_ne (nunits_out
, nunits_in
))
6162 stmt_vec_info op1_def_stmt_info
;
6164 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
6165 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
6167 if (dump_enabled_p ())
6168 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6169 "use not simple.\n");
6173 /* Multiple types in SLP are handled by creating the appropriate number of
6174 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6179 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6181 gcc_assert (ncopies
>= 1);
6183 /* Determine whether the shift amount is a vector, or scalar. If the
6184 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6186 if ((dt
[1] == vect_internal_def
6187 || dt
[1] == vect_induction_def
6188 || dt
[1] == vect_nested_cycle
)
6189 && (!slp_node
|| SLP_TREE_LANES (slp_node
) == 1))
6190 scalar_shift_arg
= false;
6191 else if (dt
[1] == vect_constant_def
6192 || dt
[1] == vect_external_def
6193 || dt
[1] == vect_internal_def
)
6195 /* In SLP, need to check whether the shift count is the same,
6196 in loops if it is a constant or invariant, it is always
6200 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
6201 stmt_vec_info slpstmt_info
;
6203 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
6206 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
6207 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
6208 scalar_shift_arg
= false;
6211 /* For internal SLP defs we have to make sure we see scalar stmts
6212 for all vector elements.
6213 ??? For different vectors we could resort to a different
6214 scalar shift operand but code-generation below simply always
6216 if (dt
[1] == vect_internal_def
6217 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
6219 scalar_shift_arg
= false;
6222 /* If the shift amount is computed by a pattern stmt we cannot
6223 use the scalar amount directly thus give up and use a vector
6225 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
6226 scalar_shift_arg
= false;
6230 if (dump_enabled_p ())
6231 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6232 "operand mode requires invariant argument.\n");
6236 /* Vector shifted by vector. */
6237 bool was_scalar_shift_arg
= scalar_shift_arg
;
6238 if (!scalar_shift_arg
)
6240 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
6241 if (dump_enabled_p ())
6242 dump_printf_loc (MSG_NOTE
, vect_location
,
6243 "vector/vector shift/rotate found.\n");
6246 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
6248 incompatible_op1_vectype_p
6249 = (op1_vectype
== NULL_TREE
6250 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
6251 TYPE_VECTOR_SUBPARTS (vectype
))
6252 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
6253 if (incompatible_op1_vectype_p
6255 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
6256 || slp_op1
->refcnt
!= 1))
6258 if (dump_enabled_p ())
6259 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6260 "unusable type for last operand in"
6261 " vector/vector shift/rotate.\n");
6265 /* See if the machine has a vector shifted by scalar insn and if not
6266 then see if it has a vector shifted by vector insn. */
6269 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
6271 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
6273 if (dump_enabled_p ())
6274 dump_printf_loc (MSG_NOTE
, vect_location
,
6275 "vector/scalar shift/rotate found.\n");
6279 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
6281 && (optab_handler (optab
, TYPE_MODE (vectype
))
6282 != CODE_FOR_nothing
))
6284 scalar_shift_arg
= false;
6286 if (dump_enabled_p ())
6287 dump_printf_loc (MSG_NOTE
, vect_location
,
6288 "vector/vector shift/rotate found.\n");
6291 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
6295 /* Unlike the other binary operators, shifts/rotates have
6296 the rhs being int, instead of the same type as the lhs,
6297 so make sure the scalar is the right type if we are
6298 dealing with vectors of long long/long/short/char. */
6299 incompatible_op1_vectype_p
6301 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
6303 if (incompatible_op1_vectype_p
6304 && dt
[1] == vect_internal_def
)
6306 if (dump_enabled_p ())
6307 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6308 "unusable type for last operand in"
6309 " vector/vector shift/rotate.\n");
6316 /* Supportable by target? */
6319 if (dump_enabled_p ())
6320 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6324 vec_mode
= TYPE_MODE (vectype
);
6325 icode
= (int) optab_handler (optab
, vec_mode
);
6326 if (icode
== CODE_FOR_nothing
)
6328 if (dump_enabled_p ())
6329 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6330 "op not supported by target.\n");
6333 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6334 if (vect_emulated_vector_p (vectype
))
6337 if (!vec_stmt
) /* transformation not required. */
6340 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6341 || ((!scalar_shift_arg
|| dt
[1] == vect_internal_def
)
6342 && (!incompatible_op1_vectype_p
6343 || dt
[1] == vect_constant_def
)
6344 && !vect_maybe_update_slp_op_vectype
6346 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
6348 if (dump_enabled_p ())
6349 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6350 "incompatible vector types for invariants\n");
6353 /* Now adjust the constant shift amount in place. */
6355 && incompatible_op1_vectype_p
6356 && dt
[1] == vect_constant_def
)
6358 for (unsigned i
= 0;
6359 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
6361 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
6362 = fold_convert (TREE_TYPE (vectype
),
6363 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
6364 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
6368 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
6369 DUMP_VECT_SCOPE ("vectorizable_shift");
6370 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
6371 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
6377 if (dump_enabled_p ())
6378 dump_printf_loc (MSG_NOTE
, vect_location
,
6379 "transform binary/unary operation.\n");
6381 if (incompatible_op1_vectype_p
&& !slp_node
)
6383 gcc_assert (!scalar_shift_arg
&& was_scalar_shift_arg
);
6384 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
6385 if (dt
[1] != vect_constant_def
)
6386 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
6387 TREE_TYPE (vectype
), NULL
);
6391 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6393 if (scalar_shift_arg
&& dt
[1] != vect_internal_def
)
6395 /* Vector shl and shr insn patterns can be defined with scalar
6396 operand 2 (shift operand). In this case, use constant or loop
6397 invariant op1 directly, without extending it to vector mode
6399 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
6400 if (!VECTOR_MODE_P (optab_op2_mode
))
6402 if (dump_enabled_p ())
6403 dump_printf_loc (MSG_NOTE
, vect_location
,
6404 "operand 1 using scalar mode.\n");
6406 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
6407 vec_oprnds1
.quick_push (vec_oprnd1
);
6408 /* Store vec_oprnd1 for every vector stmt to be created.
6409 We check during the analysis that all the shift arguments
6411 TODO: Allow different constants for different vector
6412 stmts generated for an SLP instance. */
6414 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
6415 vec_oprnds1
.quick_push (vec_oprnd1
);
6418 else if (!scalar_shift_arg
&& slp_node
&& incompatible_op1_vectype_p
)
6420 if (was_scalar_shift_arg
)
6422 /* If the argument was the same in all lanes create
6423 the correctly typed vector shift amount directly. */
6424 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
6425 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
6426 !loop_vinfo
? gsi
: NULL
);
6427 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
6428 !loop_vinfo
? gsi
: NULL
);
6429 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
6430 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
6431 vec_oprnds1
.quick_push (vec_oprnd1
);
6433 else if (dt
[1] == vect_constant_def
)
6434 /* The constant shift amount has been adjusted in place. */
6437 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
6440 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6441 (a special case for certain kind of vector shifts); otherwise,
6442 operand 1 should be of a vector type (the usual case). */
6443 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6445 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
6447 /* Arguments are ready. Create the new vector stmt. */
6448 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6450 /* For internal defs where we need to use a scalar shift arg
6451 extract the first lane. */
6452 if (scalar_shift_arg
&& dt
[1] == vect_internal_def
)
6454 vop1
= vec_oprnds1
[0];
6455 new_temp
= make_ssa_name (TREE_TYPE (TREE_TYPE (vop1
)));
6457 = gimple_build_assign (new_temp
,
6458 build3 (BIT_FIELD_REF
, TREE_TYPE (new_temp
),
6460 TYPE_SIZE (TREE_TYPE (new_temp
)),
6461 bitsize_zero_node
));
6462 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6466 vop1
= vec_oprnds1
[i
];
6467 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
6468 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6469 gimple_assign_set_lhs (new_stmt
, new_temp
);
6470 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6472 slp_node
->push_vec_def (new_stmt
);
6474 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6478 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6480 vec_oprnds0
.release ();
6481 vec_oprnds1
.release ();
6486 /* Function vectorizable_operation.
6488 Check if STMT_INFO performs a binary, unary or ternary operation that can
6490 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6491 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6492 Return true if STMT_INFO is vectorizable in this way. */
6495 vectorizable_operation (vec_info
*vinfo
,
6496 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6497 gimple
**vec_stmt
, slp_tree slp_node
,
6498 stmt_vector_for_cost
*cost_vec
)
6502 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
6504 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6505 enum tree_code code
, orig_code
;
6506 machine_mode vec_mode
;
6510 bool target_support_p
;
6511 enum vect_def_type dt
[3]
6512 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
6514 poly_uint64 nunits_in
;
6515 poly_uint64 nunits_out
;
6517 int ncopies
, vec_num
;
6519 vec
<tree
> vec_oprnds0
= vNULL
;
6520 vec
<tree
> vec_oprnds1
= vNULL
;
6521 vec
<tree
> vec_oprnds2
= vNULL
;
6522 tree vop0
, vop1
, vop2
;
6523 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6525 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6528 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6532 /* Is STMT a vectorizable binary/unary operation? */
6533 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6537 /* Loads and stores are handled in vectorizable_{load,store}. */
6538 if (STMT_VINFO_DATA_REF (stmt_info
))
6541 orig_code
= code
= gimple_assign_rhs_code (stmt
);
6543 /* Shifts are handled in vectorizable_shift. */
6544 if (code
== LSHIFT_EXPR
6545 || code
== RSHIFT_EXPR
6546 || code
== LROTATE_EXPR
6547 || code
== RROTATE_EXPR
)
6550 /* Comparisons are handled in vectorizable_comparison. */
6551 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
6554 /* Conditions are handled in vectorizable_condition. */
6555 if (code
== COND_EXPR
)
6558 /* For pointer addition and subtraction, we should use the normal
6559 plus and minus for the vector operation. */
6560 if (code
== POINTER_PLUS_EXPR
)
6562 if (code
== POINTER_DIFF_EXPR
)
6565 /* Support only unary or binary operations. */
6566 op_type
= TREE_CODE_LENGTH (code
);
6567 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
6569 if (dump_enabled_p ())
6570 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6571 "num. args = %d (not unary/binary/ternary op).\n",
6576 scalar_dest
= gimple_assign_lhs (stmt
);
6577 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6579 /* Most operations cannot handle bit-precision types without extra
6581 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
6583 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
6584 /* Exception are bitwise binary operations. */
6585 && code
!= BIT_IOR_EXPR
6586 && code
!= BIT_XOR_EXPR
6587 && code
!= BIT_AND_EXPR
)
6589 if (dump_enabled_p ())
6590 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6591 "bit-precision arithmetic not supported.\n");
6596 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6597 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6599 if (dump_enabled_p ())
6600 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6601 "use not simple.\n");
6604 bool is_invariant
= (dt
[0] == vect_external_def
6605 || dt
[0] == vect_constant_def
);
6606 /* If op0 is an external or constant def, infer the vector type
6607 from the scalar type. */
6610 /* For boolean type we cannot determine vectype by
6611 invariant value (don't know whether it is a vector
6612 of booleans or vector of integers). We use output
6613 vectype because operations on boolean don't change
6615 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6617 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6619 if (dump_enabled_p ())
6620 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6621 "not supported operation on bool value.\n");
6624 vectype
= vectype_out
;
6627 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6631 gcc_assert (vectype
);
6634 if (dump_enabled_p ())
6635 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6636 "no vectype for scalar type %T\n",
6642 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6643 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6644 if (maybe_ne (nunits_out
, nunits_in
)
6645 || !tree_nop_conversion_p (TREE_TYPE (vectype_out
), TREE_TYPE (vectype
)))
6648 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6649 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6650 if (op_type
== binary_op
|| op_type
== ternary_op
)
6652 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6653 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6655 if (dump_enabled_p ())
6656 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6657 "use not simple.\n");
6660 is_invariant
&= (dt
[1] == vect_external_def
6661 || dt
[1] == vect_constant_def
);
6663 && (maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype2
))
6664 || !tree_nop_conversion_p (TREE_TYPE (vectype_out
),
6665 TREE_TYPE (vectype2
))))
6668 if (op_type
== ternary_op
)
6670 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6671 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6673 if (dump_enabled_p ())
6674 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6675 "use not simple.\n");
6678 is_invariant
&= (dt
[2] == vect_external_def
6679 || dt
[2] == vect_constant_def
);
6681 && (maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype3
))
6682 || !tree_nop_conversion_p (TREE_TYPE (vectype_out
),
6683 TREE_TYPE (vectype3
))))
6687 /* Multiple types in SLP are handled by creating the appropriate number of
6688 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6693 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6697 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6701 gcc_assert (ncopies
>= 1);
6703 /* Reject attempts to combine mask types with nonmask types, e.g. if
6704 we have an AND between a (nonmask) boolean loaded from memory and
6705 a (mask) boolean result of a comparison.
6707 TODO: We could easily fix these cases up using pattern statements. */
6708 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6709 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6710 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6712 if (dump_enabled_p ())
6713 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6714 "mixed mask and nonmask vector types\n");
6718 /* Supportable by target? */
6720 vec_mode
= TYPE_MODE (vectype
);
6721 if (code
== MULT_HIGHPART_EXPR
)
6722 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6725 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6728 if (dump_enabled_p ())
6729 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6733 target_support_p
= (optab_handler (optab
, vec_mode
) != CODE_FOR_nothing
6734 || optab_libfunc (optab
, vec_mode
));
6737 bool using_emulated_vectors_p
= vect_emulated_vector_p (vectype
);
6738 if (!target_support_p
|| using_emulated_vectors_p
)
6740 if (dump_enabled_p ())
6741 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6742 "op not supported by target.\n");
6743 /* When vec_mode is not a vector mode and we verified ops we
6744 do not have to lower like AND are natively supported let
6745 those through even when the mode isn't word_mode. For
6746 ops we have to lower the lowering code assumes we are
6747 dealing with word_mode. */
6748 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype
))
6749 || (((code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== NEGATE_EXPR
)
6750 || !target_support_p
)
6751 && maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
))
6752 /* Check only during analysis. */
6753 || (!vec_stmt
&& !vect_can_vectorize_without_simd_p (code
)))
6755 if (dump_enabled_p ())
6756 dump_printf (MSG_NOTE
, "using word mode not possible.\n");
6759 if (dump_enabled_p ())
6760 dump_printf_loc (MSG_NOTE
, vect_location
,
6761 "proceeding using word mode.\n");
6762 using_emulated_vectors_p
= true;
6765 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6766 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6767 vec_loop_lens
*lens
= (loop_vinfo
? &LOOP_VINFO_LENS (loop_vinfo
) : NULL
);
6768 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6769 internal_fn cond_len_fn
= get_conditional_len_internal_fn (code
);
6771 /* If operating on inactive elements could generate spurious traps,
6772 we need to restrict the operation to active lanes. Note that this
6773 specifically doesn't apply to unhoisted invariants, since they
6774 operate on the same value for every lane.
6776 Similarly, if this operation is part of a reduction, a fully-masked
6777 loop should only change the active lanes of the reduction chain,
6778 keeping the inactive lanes as-is. */
6779 bool mask_out_inactive
= ((!is_invariant
&& gimple_could_trap_p (stmt
))
6782 if (!vec_stmt
) /* transformation not required. */
6785 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
6786 && mask_out_inactive
)
6788 if (cond_len_fn
!= IFN_LAST
6789 && direct_internal_fn_supported_p (cond_len_fn
, vectype
,
6790 OPTIMIZE_FOR_SPEED
))
6791 vect_record_loop_len (loop_vinfo
, lens
, ncopies
* vec_num
, vectype
,
6793 else if (cond_fn
!= IFN_LAST
6794 && direct_internal_fn_supported_p (cond_fn
, vectype
,
6795 OPTIMIZE_FOR_SPEED
))
6796 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6800 if (dump_enabled_p ())
6801 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6802 "can't use a fully-masked loop because no"
6803 " conditional operation is available.\n");
6804 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
6808 /* Put types on constant and invariant SLP children. */
6810 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6811 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6812 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6814 if (dump_enabled_p ())
6815 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6816 "incompatible vector types for invariants\n");
6820 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6821 DUMP_VECT_SCOPE ("vectorizable_operation");
6822 vect_model_simple_cost (vinfo
, stmt_info
,
6823 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6824 if (using_emulated_vectors_p
)
6826 /* The above vect_model_simple_cost call handles constants
6827 in the prologue and (mis-)costs one of the stmts as
6828 vector stmt. See below for the actual lowering that will
6831 = slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
;
6844 /* Bit operations do not have extra cost and are accounted
6845 as vector stmt by vect_model_simple_cost. */
6851 /* We also need to materialize two large constants. */
6852 record_stmt_cost (cost_vec
, 2, scalar_stmt
, stmt_info
,
6854 record_stmt_cost (cost_vec
, n
, scalar_stmt
, stmt_info
,
6863 if (dump_enabled_p ())
6864 dump_printf_loc (MSG_NOTE
, vect_location
,
6865 "transform binary/unary operation.\n");
6867 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6868 bool len_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
);
6870 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6871 vectors with unsigned elements, but the result is signed. So, we
6872 need to compute the MINUS_EXPR into vectype temporary and
6873 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6874 tree vec_cvt_dest
= NULL_TREE
;
6875 if (orig_code
== POINTER_DIFF_EXPR
)
6877 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6878 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6882 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6884 /* In case the vectorization factor (VF) is bigger than the number
6885 of elements that we can fit in a vectype (nunits), we have to generate
6886 more than one vector stmt - i.e - we need to "unroll" the
6887 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6888 from one copy of the vector stmt to the next, in the field
6889 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6890 stages to find the correct vector defs to be used when vectorizing
6891 stmts that use the defs of the current stmt. The example below
6892 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6893 we need to create 4 vectorized stmts):
6895 before vectorization:
6896 RELATED_STMT VEC_STMT
6900 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6902 RELATED_STMT VEC_STMT
6903 VS1_0: vx0 = memref0 VS1_1 -
6904 VS1_1: vx1 = memref1 VS1_2 -
6905 VS1_2: vx2 = memref2 VS1_3 -
6906 VS1_3: vx3 = memref3 - -
6907 S1: x = load - VS1_0
6910 step2: vectorize stmt S2 (done here):
6911 To vectorize stmt S2 we first need to find the relevant vector
6912 def for the first operand 'x'. This is, as usual, obtained from
6913 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6914 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6915 relevant vector def 'vx0'. Having found 'vx0' we can generate
6916 the vector stmt VS2_0, and as usual, record it in the
6917 STMT_VINFO_VEC_STMT of stmt S2.
6918 When creating the second copy (VS2_1), we obtain the relevant vector
6919 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6920 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6921 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6922 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6923 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6924 chain of stmts and pointers:
6925 RELATED_STMT VEC_STMT
6926 VS1_0: vx0 = memref0 VS1_1 -
6927 VS1_1: vx1 = memref1 VS1_2 -
6928 VS1_2: vx2 = memref2 VS1_3 -
6929 VS1_3: vx3 = memref3 - -
6930 S1: x = load - VS1_0
6931 VS2_0: vz0 = vx0 + v1 VS2_1 -
6932 VS2_1: vz1 = vx1 + v1 VS2_2 -
6933 VS2_2: vz2 = vx2 + v1 VS2_3 -
6934 VS2_3: vz3 = vx3 + v1 - -
6935 S2: z = x + 1 - VS2_0 */
6937 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6938 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
6939 /* Arguments are ready. Create the new vector stmt. */
6940 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6942 gimple
*new_stmt
= NULL
;
6943 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6944 ? vec_oprnds1
[i
] : NULL_TREE
);
6945 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
6946 if (using_emulated_vectors_p
6947 && (code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== NEGATE_EXPR
))
6949 /* Lower the operation. This follows vector lowering. */
6950 unsigned int width
= vector_element_bits (vectype
);
6951 tree inner_type
= TREE_TYPE (vectype
);
6953 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode
), 1);
6954 HOST_WIDE_INT max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
6955 tree low_bits
= build_replicated_int_cst (word_type
, width
, max
>> 1);
6957 = build_replicated_int_cst (word_type
, width
, max
& ~(max
>> 1));
6958 tree wvop0
= make_ssa_name (word_type
);
6959 new_stmt
= gimple_build_assign (wvop0
, VIEW_CONVERT_EXPR
,
6960 build1 (VIEW_CONVERT_EXPR
,
6962 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6963 tree result_low
, signs
;
6964 if (code
== PLUS_EXPR
|| code
== MINUS_EXPR
)
6966 tree wvop1
= make_ssa_name (word_type
);
6967 new_stmt
= gimple_build_assign (wvop1
, VIEW_CONVERT_EXPR
,
6968 build1 (VIEW_CONVERT_EXPR
,
6970 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6971 signs
= make_ssa_name (word_type
);
6972 new_stmt
= gimple_build_assign (signs
,
6973 BIT_XOR_EXPR
, wvop0
, wvop1
);
6974 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6975 tree b_low
= make_ssa_name (word_type
);
6976 new_stmt
= gimple_build_assign (b_low
,
6977 BIT_AND_EXPR
, wvop1
, low_bits
);
6978 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6979 tree a_low
= make_ssa_name (word_type
);
6980 if (code
== PLUS_EXPR
)
6981 new_stmt
= gimple_build_assign (a_low
,
6982 BIT_AND_EXPR
, wvop0
, low_bits
);
6984 new_stmt
= gimple_build_assign (a_low
,
6985 BIT_IOR_EXPR
, wvop0
, high_bits
);
6986 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6987 if (code
== MINUS_EXPR
)
6989 new_stmt
= gimple_build_assign (NULL_TREE
,
6990 BIT_NOT_EXPR
, signs
);
6991 signs
= make_ssa_name (word_type
);
6992 gimple_assign_set_lhs (new_stmt
, signs
);
6993 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6995 new_stmt
= gimple_build_assign (NULL_TREE
,
6996 BIT_AND_EXPR
, signs
, high_bits
);
6997 signs
= make_ssa_name (word_type
);
6998 gimple_assign_set_lhs (new_stmt
, signs
);
6999 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7000 result_low
= make_ssa_name (word_type
);
7001 new_stmt
= gimple_build_assign (result_low
, code
, a_low
, b_low
);
7002 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7006 tree a_low
= make_ssa_name (word_type
);
7007 new_stmt
= gimple_build_assign (a_low
,
7008 BIT_AND_EXPR
, wvop0
, low_bits
);
7009 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7010 signs
= make_ssa_name (word_type
);
7011 new_stmt
= gimple_build_assign (signs
, BIT_NOT_EXPR
, wvop0
);
7012 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7013 new_stmt
= gimple_build_assign (NULL_TREE
,
7014 BIT_AND_EXPR
, signs
, high_bits
);
7015 signs
= make_ssa_name (word_type
);
7016 gimple_assign_set_lhs (new_stmt
, signs
);
7017 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7018 result_low
= make_ssa_name (word_type
);
7019 new_stmt
= gimple_build_assign (result_low
,
7020 MINUS_EXPR
, high_bits
, a_low
);
7021 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7023 new_stmt
= gimple_build_assign (NULL_TREE
, BIT_XOR_EXPR
, result_low
,
7025 result_low
= make_ssa_name (word_type
);
7026 gimple_assign_set_lhs (new_stmt
, result_low
);
7027 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7028 new_stmt
= gimple_build_assign (NULL_TREE
, VIEW_CONVERT_EXPR
,
7029 build1 (VIEW_CONVERT_EXPR
,
7030 vectype
, result_low
));
7031 new_temp
= make_ssa_name (vectype
);
7032 gimple_assign_set_lhs (new_stmt
, new_temp
);
7033 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7035 else if ((masked_loop_p
|| len_loop_p
) && mask_out_inactive
)
7039 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
,
7040 vec_num
* ncopies
, vectype
, i
);
7043 mask
= build_minus_one_cst (truth_type_for (vectype
));
7044 auto_vec
<tree
> vops (6);
7045 vops
.quick_push (mask
);
7046 vops
.quick_push (vop0
);
7048 vops
.quick_push (vop1
);
7050 vops
.quick_push (vop2
);
7053 /* Perform the operation on active elements only and take
7054 inactive elements from the reduction chain input. */
7056 vops
.quick_push (reduc_idx
== 1 ? vop1
: vop0
);
7060 auto else_value
= targetm
.preferred_else_value
7061 (cond_fn
, vectype
, vops
.length () - 1, &vops
[1]);
7062 vops
.quick_push (else_value
);
7066 tree len
= vect_get_loop_len (loop_vinfo
, gsi
, lens
,
7067 vec_num
* ncopies
, vectype
, i
, 1);
7069 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
7070 tree bias
= build_int_cst (intQI_type_node
, biasval
);
7071 vops
.quick_push (len
);
7072 vops
.quick_push (bias
);
7075 = gimple_build_call_internal_vec (masked_loop_p
? cond_fn
7078 new_temp
= make_ssa_name (vec_dest
, call
);
7079 gimple_call_set_lhs (call
, new_temp
);
7080 gimple_call_set_nothrow (call
, true);
7081 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
7086 tree mask
= NULL_TREE
;
7087 /* When combining two masks check if either of them is elsewhere
7088 combined with a loop mask, if that's the case we can mark that the
7089 new combined mask doesn't need to be combined with a loop mask. */
7091 && code
== BIT_AND_EXPR
7092 && VECTOR_BOOLEAN_TYPE_P (vectype
))
7094 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op0
,
7097 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
,
7098 vec_num
* ncopies
, vectype
, i
);
7100 vop0
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
7104 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op1
,
7107 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
,
7108 vec_num
* ncopies
, vectype
, i
);
7110 vop1
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
7115 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
7116 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7117 gimple_assign_set_lhs (new_stmt
, new_temp
);
7118 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7119 if (using_emulated_vectors_p
)
7120 suppress_warning (new_stmt
, OPT_Wvector_operation_performance
);
7122 /* Enter the combined value into the vector cond hash so we don't
7123 AND it with a loop mask again. */
7125 loop_vinfo
->vec_cond_masked_set
.add ({ new_temp
, mask
});
7130 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
7131 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
7133 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
7134 gimple_assign_set_lhs (new_stmt
, new_temp
);
7135 vect_finish_stmt_generation (vinfo
, stmt_info
,
7140 slp_node
->push_vec_def (new_stmt
);
7142 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
7146 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7148 vec_oprnds0
.release ();
7149 vec_oprnds1
.release ();
7150 vec_oprnds2
.release ();
7155 /* A helper function to ensure data reference DR_INFO's base alignment. */
7158 ensure_base_align (dr_vec_info
*dr_info
)
7160 /* Alignment is only analyzed for the first element of a DR group,
7161 use that to look at base alignment we need to enforce. */
7162 if (STMT_VINFO_GROUPED_ACCESS (dr_info
->stmt
))
7163 dr_info
= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info
->stmt
));
7165 gcc_assert (dr_info
->misalignment
!= DR_MISALIGNMENT_UNINITIALIZED
);
7167 if (dr_info
->base_misaligned
)
7169 tree base_decl
= dr_info
->base_decl
;
7171 // We should only be able to increase the alignment of a base object if
7172 // we know what its new alignment should be at compile time.
7173 unsigned HOST_WIDE_INT align_base_to
=
7174 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
7176 if (decl_in_symtab_p (base_decl
))
7177 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
7178 else if (DECL_ALIGN (base_decl
) < align_base_to
)
7180 SET_DECL_ALIGN (base_decl
, align_base_to
);
7181 DECL_USER_ALIGN (base_decl
) = 1;
7183 dr_info
->base_misaligned
= false;
7188 /* Function get_group_alias_ptr_type.
7190 Return the alias type for the group starting at FIRST_STMT_INFO. */
7193 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
7195 struct data_reference
*first_dr
, *next_dr
;
7197 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
7198 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
7199 while (next_stmt_info
)
7201 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
7202 if (get_alias_set (DR_REF (first_dr
))
7203 != get_alias_set (DR_REF (next_dr
)))
7205 if (dump_enabled_p ())
7206 dump_printf_loc (MSG_NOTE
, vect_location
,
7207 "conflicting alias set types.\n");
7208 return ptr_type_node
;
7210 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7212 return reference_alias_ptr_type (DR_REF (first_dr
));
7216 /* Function scan_operand_equal_p.
7218 Helper function for check_scan_store. Compare two references
7219 with .GOMP_SIMD_LANE bases. */
7222 scan_operand_equal_p (tree ref1
, tree ref2
)
7224 tree ref
[2] = { ref1
, ref2
};
7225 poly_int64 bitsize
[2], bitpos
[2];
7226 tree offset
[2], base
[2];
7227 for (int i
= 0; i
< 2; ++i
)
7230 int unsignedp
, reversep
, volatilep
= 0;
7231 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
7232 &offset
[i
], &mode
, &unsignedp
,
7233 &reversep
, &volatilep
);
7234 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
7236 if (TREE_CODE (base
[i
]) == MEM_REF
7237 && offset
[i
] == NULL_TREE
7238 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
7240 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
7241 if (is_gimple_assign (def_stmt
)
7242 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
7243 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
7244 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
7246 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
7248 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
7249 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
7254 if (!operand_equal_p (base
[0], base
[1], 0))
7256 if (maybe_ne (bitsize
[0], bitsize
[1]))
7258 if (offset
[0] != offset
[1])
7260 if (!offset
[0] || !offset
[1])
7262 if (!operand_equal_p (offset
[0], offset
[1], 0))
7265 for (int i
= 0; i
< 2; ++i
)
7267 step
[i
] = integer_one_node
;
7268 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
7270 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
7271 if (is_gimple_assign (def_stmt
)
7272 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
7273 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
7276 step
[i
] = gimple_assign_rhs2 (def_stmt
);
7277 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
7280 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
7282 step
[i
] = TREE_OPERAND (offset
[i
], 1);
7283 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
7285 tree rhs1
= NULL_TREE
;
7286 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
7288 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
7289 if (gimple_assign_cast_p (def_stmt
))
7290 rhs1
= gimple_assign_rhs1 (def_stmt
);
7292 else if (CONVERT_EXPR_P (offset
[i
]))
7293 rhs1
= TREE_OPERAND (offset
[i
], 0);
7295 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
7296 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
7297 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
7298 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
7301 if (!operand_equal_p (offset
[0], offset
[1], 0)
7302 || !operand_equal_p (step
[0], step
[1], 0))
7310 enum scan_store_kind
{
7311 /* Normal permutation. */
7312 scan_store_kind_perm
,
7314 /* Whole vector left shift permutation with zero init. */
7315 scan_store_kind_lshift_zero
,
7317 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7318 scan_store_kind_lshift_cond
7321 /* Function check_scan_store.
7323 Verify if we can perform the needed permutations or whole vector shifts.
7324 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7325 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7326 to do at each step. */
7329 scan_store_can_perm_p (tree vectype
, tree init
,
7330 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
7332 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7333 unsigned HOST_WIDE_INT nunits
;
7334 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7336 int units_log2
= exact_log2 (nunits
);
7337 if (units_log2
<= 0)
7341 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
7342 for (i
= 0; i
<= units_log2
; ++i
)
7344 unsigned HOST_WIDE_INT j
, k
;
7345 enum scan_store_kind kind
= scan_store_kind_perm
;
7346 vec_perm_builder
sel (nunits
, nunits
, 1);
7347 sel
.quick_grow (nunits
);
7348 if (i
== units_log2
)
7350 for (j
= 0; j
< nunits
; ++j
)
7351 sel
[j
] = nunits
- 1;
7355 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7357 for (k
= 0; j
< nunits
; ++j
, ++k
)
7358 sel
[j
] = nunits
+ k
;
7360 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7361 if (!can_vec_perm_const_p (vec_mode
, vec_mode
, indices
))
7363 if (i
== units_log2
)
7366 if (whole_vector_shift_kind
== scan_store_kind_perm
)
7368 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
7370 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
7371 /* Whole vector shifts shift in zeros, so if init is all zero
7372 constant, there is no need to do anything further. */
7373 if ((TREE_CODE (init
) != INTEGER_CST
7374 && TREE_CODE (init
) != REAL_CST
)
7375 || !initializer_zerop (init
))
7377 tree masktype
= truth_type_for (vectype
);
7378 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
7380 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
7383 kind
= whole_vector_shift_kind
;
7385 if (use_whole_vector
)
7387 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
7388 use_whole_vector
->safe_grow_cleared (i
, true);
7389 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
7390 use_whole_vector
->safe_push (kind
);
7398 /* Function check_scan_store.
7400 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7403 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
7404 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
7405 vect_memory_access_type memory_access_type
)
7407 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7408 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7411 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
7414 || memory_access_type
!= VMAT_CONTIGUOUS
7415 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
7416 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
7417 || loop_vinfo
== NULL
7418 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7419 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7420 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
7421 || !integer_zerop (DR_INIT (dr_info
->dr
))
7422 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
7423 || !alias_sets_conflict_p (get_alias_set (vectype
),
7424 get_alias_set (TREE_TYPE (ref_type
))))
7426 if (dump_enabled_p ())
7427 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7428 "unsupported OpenMP scan store.\n");
7432 /* We need to pattern match code built by OpenMP lowering and simplified
7433 by following optimizations into something we can handle.
7434 #pragma omp simd reduction(inscan,+:r)
7438 #pragma omp scan inclusive (r)
7441 shall have body with:
7442 // Initialization for input phase, store the reduction initializer:
7443 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7444 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7446 // Actual input phase:
7448 r.0_5 = D.2042[_20];
7451 // Initialization for scan phase:
7452 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7458 // Actual scan phase:
7460 r.1_8 = D.2042[_20];
7462 The "omp simd array" variable D.2042 holds the privatized copy used
7463 inside of the loop and D.2043 is another one that holds copies of
7464 the current original list item. The separate GOMP_SIMD_LANE ifn
7465 kinds are there in order to allow optimizing the initializer store
7466 and combiner sequence, e.g. if it is originally some C++ish user
7467 defined reduction, but allow the vectorizer to pattern recognize it
7468 and turn into the appropriate vectorized scan.
7470 For exclusive scan, this is slightly different:
7471 #pragma omp simd reduction(inscan,+:r)
7475 #pragma omp scan exclusive (r)
7478 shall have body with:
7479 // Initialization for input phase, store the reduction initializer:
7480 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7481 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7483 // Actual input phase:
7485 r.0_5 = D.2042[_20];
7488 // Initialization for scan phase:
7489 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7495 // Actual scan phase:
7497 r.1_8 = D.2044[_20];
7500 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
7502 /* Match the D.2042[_21] = 0; store above. Just require that
7503 it is a constant or external definition store. */
7504 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
7507 if (dump_enabled_p ())
7508 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7509 "unsupported OpenMP scan initializer store.\n");
7513 if (! loop_vinfo
->scan_map
)
7514 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
7515 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7516 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
7519 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
7521 /* These stores can be vectorized normally. */
7525 if (rhs_dt
!= vect_internal_def
)
7528 if (dump_enabled_p ())
7529 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7530 "unsupported OpenMP scan combiner pattern.\n");
7534 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7535 tree rhs
= gimple_assign_rhs1 (stmt
);
7536 if (TREE_CODE (rhs
) != SSA_NAME
)
7539 gimple
*other_store_stmt
= NULL
;
7540 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7541 bool inscan_var_store
7542 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7544 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7546 if (!inscan_var_store
)
7548 use_operand_p use_p
;
7549 imm_use_iterator iter
;
7550 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7552 gimple
*use_stmt
= USE_STMT (use_p
);
7553 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7555 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
7556 || !is_gimple_assign (use_stmt
)
7557 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
7559 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
7561 other_store_stmt
= use_stmt
;
7563 if (other_store_stmt
== NULL
)
7565 rhs
= gimple_assign_lhs (other_store_stmt
);
7566 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
7570 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
7572 use_operand_p use_p
;
7573 imm_use_iterator iter
;
7574 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7576 gimple
*use_stmt
= USE_STMT (use_p
);
7577 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7579 if (other_store_stmt
)
7581 other_store_stmt
= use_stmt
;
7587 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7588 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
7589 || !is_gimple_assign (def_stmt
)
7590 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
7593 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7594 /* For pointer addition, we should use the normal plus for the vector
7598 case POINTER_PLUS_EXPR
:
7601 case MULT_HIGHPART_EXPR
:
7606 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
7609 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7610 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7611 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
7614 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7615 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7616 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
7617 || !gimple_assign_load_p (load1_stmt
)
7618 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
7619 || !gimple_assign_load_p (load2_stmt
))
7622 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7623 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7624 if (load1_stmt_info
== NULL
7625 || load2_stmt_info
== NULL
7626 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
7627 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
7628 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
7629 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7632 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
7634 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7635 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
7636 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
7638 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7640 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7644 use_operand_p use_p
;
7645 imm_use_iterator iter
;
7646 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
7648 gimple
*use_stmt
= USE_STMT (use_p
);
7649 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
7651 if (other_store_stmt
)
7653 other_store_stmt
= use_stmt
;
7657 if (other_store_stmt
== NULL
)
7659 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
7660 || !gimple_store_p (other_store_stmt
))
7663 stmt_vec_info other_store_stmt_info
7664 = loop_vinfo
->lookup_stmt (other_store_stmt
);
7665 if (other_store_stmt_info
== NULL
7666 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
7667 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7670 gimple
*stmt1
= stmt
;
7671 gimple
*stmt2
= other_store_stmt
;
7672 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7673 std::swap (stmt1
, stmt2
);
7674 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7675 gimple_assign_rhs1 (load2_stmt
)))
7677 std::swap (rhs1
, rhs2
);
7678 std::swap (load1_stmt
, load2_stmt
);
7679 std::swap (load1_stmt_info
, load2_stmt_info
);
7681 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7682 gimple_assign_rhs1 (load1_stmt
)))
7685 tree var3
= NULL_TREE
;
7686 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
7687 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
7688 gimple_assign_rhs1 (load2_stmt
)))
7690 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7692 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7693 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
7694 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
7696 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7697 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
7698 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
7699 || lookup_attribute ("omp simd inscan exclusive",
7700 DECL_ATTRIBUTES (var3
)))
7704 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
7705 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
7706 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
7709 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7710 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
7711 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
7712 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
7713 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7714 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
7717 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7718 std::swap (var1
, var2
);
7720 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7722 if (!lookup_attribute ("omp simd inscan exclusive",
7723 DECL_ATTRIBUTES (var1
)))
7728 if (loop_vinfo
->scan_map
== NULL
)
7730 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7734 /* The IL is as expected, now check if we can actually vectorize it.
7741 should be vectorized as (where _40 is the vectorized rhs
7742 from the D.2042[_21] = 0; store):
7743 _30 = MEM <vector(8) int> [(int *)&D.2043];
7744 _31 = MEM <vector(8) int> [(int *)&D.2042];
7745 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7747 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7748 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7750 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7751 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7752 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7754 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7755 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7757 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7758 MEM <vector(8) int> [(int *)&D.2043] = _39;
7759 MEM <vector(8) int> [(int *)&D.2042] = _38;
7766 should be vectorized as (where _40 is the vectorized rhs
7767 from the D.2042[_21] = 0; store):
7768 _30 = MEM <vector(8) int> [(int *)&D.2043];
7769 _31 = MEM <vector(8) int> [(int *)&D.2042];
7770 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7771 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7773 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7774 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7775 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7777 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7778 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7779 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7781 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7782 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7785 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7786 MEM <vector(8) int> [(int *)&D.2044] = _39;
7787 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7788 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7789 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7790 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7793 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7794 if (units_log2
== -1)
7801 /* Function vectorizable_scan_store.
7803 Helper of vectorizable_score, arguments like on vectorizable_store.
7804 Handle only the transformation, checking is done in check_scan_store. */
7807 vectorizable_scan_store (vec_info
*vinfo
,
7808 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7809 gimple
**vec_stmt
, int ncopies
)
7811 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7812 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7813 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7814 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7816 if (dump_enabled_p ())
7817 dump_printf_loc (MSG_NOTE
, vect_location
,
7818 "transform scan store. ncopies = %d\n", ncopies
);
7820 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7821 tree rhs
= gimple_assign_rhs1 (stmt
);
7822 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7824 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7825 bool inscan_var_store
7826 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7828 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7830 use_operand_p use_p
;
7831 imm_use_iterator iter
;
7832 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7834 gimple
*use_stmt
= USE_STMT (use_p
);
7835 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7837 rhs
= gimple_assign_lhs (use_stmt
);
7842 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7843 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7844 if (code
== POINTER_PLUS_EXPR
)
7846 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7847 && commutative_tree_code (code
));
7848 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7849 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7850 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7851 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7852 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7853 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7854 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7855 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7856 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7857 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7858 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7860 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7862 std::swap (rhs1
, rhs2
);
7863 std::swap (var1
, var2
);
7864 std::swap (load1_dr_info
, load2_dr_info
);
7867 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7870 unsigned HOST_WIDE_INT nunits
;
7871 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7873 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7874 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7875 gcc_assert (units_log2
> 0);
7876 auto_vec
<tree
, 16> perms
;
7877 perms
.quick_grow (units_log2
+ 1);
7878 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7879 for (int i
= 0; i
<= units_log2
; ++i
)
7881 unsigned HOST_WIDE_INT j
, k
;
7882 vec_perm_builder
sel (nunits
, nunits
, 1);
7883 sel
.quick_grow (nunits
);
7884 if (i
== units_log2
)
7885 for (j
= 0; j
< nunits
; ++j
)
7886 sel
[j
] = nunits
- 1;
7889 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7891 for (k
= 0; j
< nunits
; ++j
, ++k
)
7892 sel
[j
] = nunits
+ k
;
7894 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7895 if (!use_whole_vector
.is_empty ()
7896 && use_whole_vector
[i
] != scan_store_kind_perm
)
7898 if (zero_vec
== NULL_TREE
)
7899 zero_vec
= build_zero_cst (vectype
);
7900 if (masktype
== NULL_TREE
7901 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7902 masktype
= truth_type_for (vectype
);
7903 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7906 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7909 tree vec_oprnd1
= NULL_TREE
;
7910 tree vec_oprnd2
= NULL_TREE
;
7911 tree vec_oprnd3
= NULL_TREE
;
7912 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7913 tree dataref_offset
= build_int_cst (ref_type
, 0);
7914 tree bump
= vect_get_data_ptr_increment (vinfo
, gsi
, dr_info
,
7915 vectype
, VMAT_CONTIGUOUS
);
7916 tree ldataref_ptr
= NULL_TREE
;
7917 tree orig
= NULL_TREE
;
7918 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7919 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7920 auto_vec
<tree
> vec_oprnds1
;
7921 auto_vec
<tree
> vec_oprnds2
;
7922 auto_vec
<tree
> vec_oprnds3
;
7923 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
7924 *init
, &vec_oprnds1
,
7925 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
7926 rhs2
, &vec_oprnds3
);
7927 for (int j
= 0; j
< ncopies
; j
++)
7929 vec_oprnd1
= vec_oprnds1
[j
];
7930 if (ldataref_ptr
== NULL
)
7931 vec_oprnd2
= vec_oprnds2
[j
];
7932 vec_oprnd3
= vec_oprnds3
[j
];
7935 else if (!inscan_var_store
)
7936 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7940 vec_oprnd2
= make_ssa_name (vectype
);
7941 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7942 unshare_expr (ldataref_ptr
),
7944 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7945 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7946 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7947 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7948 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7951 tree v
= vec_oprnd2
;
7952 for (int i
= 0; i
< units_log2
; ++i
)
7954 tree new_temp
= make_ssa_name (vectype
);
7955 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7957 && (use_whole_vector
[i
]
7958 != scan_store_kind_perm
))
7959 ? zero_vec
: vec_oprnd1
, v
,
7961 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7962 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7963 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7965 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7967 /* Whole vector shift shifted in zero bits, but if *init
7968 is not initializer_zerop, we need to replace those elements
7969 with elements from vec_oprnd1. */
7970 tree_vector_builder
vb (masktype
, nunits
, 1);
7971 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7972 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7973 ? boolean_false_node
: boolean_true_node
);
7975 tree new_temp2
= make_ssa_name (vectype
);
7976 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7977 new_temp
, vec_oprnd1
);
7978 vect_finish_stmt_generation (vinfo
, stmt_info
,
7980 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7981 new_temp
= new_temp2
;
7984 /* For exclusive scan, perform the perms[i] permutation once
7987 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7995 tree new_temp2
= make_ssa_name (vectype
);
7996 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7997 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7998 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8003 tree new_temp
= make_ssa_name (vectype
);
8004 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
8005 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8006 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8008 tree last_perm_arg
= new_temp
;
8009 /* For exclusive scan, new_temp computed above is the exclusive scan
8010 prefix sum. Turn it into inclusive prefix sum for the broadcast
8011 of the last element into orig. */
8012 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
8014 last_perm_arg
= make_ssa_name (vectype
);
8015 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
8016 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8017 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8020 orig
= make_ssa_name (vectype
);
8021 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
8022 last_perm_arg
, perms
[units_log2
]);
8023 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8024 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8026 if (!inscan_var_store
)
8028 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
8029 unshare_expr (dataref_ptr
),
8031 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
8032 g
= gimple_build_assign (data_ref
, new_temp
);
8033 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8034 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8038 if (inscan_var_store
)
8039 for (int j
= 0; j
< ncopies
; j
++)
8042 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8044 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
8045 unshare_expr (dataref_ptr
),
8047 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
8048 gimple
*g
= gimple_build_assign (data_ref
, orig
);
8049 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8050 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8056 /* Function vectorizable_store.
8058 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8059 that can be vectorized.
8060 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8061 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8062 Return true if STMT_INFO is vectorizable in this way. */
8065 vectorizable_store (vec_info
*vinfo
,
8066 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8067 gimple
**vec_stmt
, slp_tree slp_node
,
8068 stmt_vector_for_cost
*cost_vec
)
8071 tree vec_oprnd
= NULL_TREE
;
8073 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8074 class loop
*loop
= NULL
;
8075 machine_mode vec_mode
;
8077 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
8078 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8079 tree dataref_ptr
= NULL_TREE
;
8080 tree dataref_offset
= NULL_TREE
;
8081 gimple
*ptr_incr
= NULL
;
8084 stmt_vec_info first_stmt_info
;
8086 unsigned int group_size
, i
;
8087 bool slp
= (slp_node
!= NULL
);
8088 unsigned int vec_num
;
8089 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8091 gather_scatter_info gs_info
;
8093 vec_load_store_type vls_type
;
8096 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8099 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8103 /* Is vectorizable store? */
8105 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8106 slp_tree mask_node
= NULL
;
8107 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8109 tree scalar_dest
= gimple_assign_lhs (assign
);
8110 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
8111 && is_pattern_stmt_p (stmt_info
))
8112 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
8113 if (TREE_CODE (scalar_dest
) != ARRAY_REF
8114 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
8115 && TREE_CODE (scalar_dest
) != INDIRECT_REF
8116 && TREE_CODE (scalar_dest
) != COMPONENT_REF
8117 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
8118 && TREE_CODE (scalar_dest
) != REALPART_EXPR
8119 && TREE_CODE (scalar_dest
) != MEM_REF
)
8124 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8125 if (!call
|| !gimple_call_internal_p (call
))
8128 internal_fn ifn
= gimple_call_internal_fn (call
);
8129 if (!internal_store_fn_p (ifn
))
8132 int mask_index
= internal_fn_mask_index (ifn
);
8133 if (mask_index
>= 0 && slp_node
)
8134 mask_index
= vect_slp_child_index_for_operand
8135 (call
, mask_index
, STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8137 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
8138 &mask
, &mask_node
, &mask_dt
,
8143 /* Cannot have hybrid store SLP -- that would mean storing to the
8144 same location twice. */
8145 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
8147 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
8148 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8152 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8153 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8158 /* Multiple types in SLP are handled by creating the appropriate number of
8159 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8164 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8166 gcc_assert (ncopies
>= 1);
8168 /* FORNOW. This restriction should be relaxed. */
8170 && nested_in_vect_loop_p (loop
, stmt_info
)
8171 && (ncopies
> 1 || (slp
&& SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) > 1)))
8173 if (dump_enabled_p ())
8174 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8175 "multiple types in nested loop.\n");
8181 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
8182 &op
, &op_node
, &rhs_dt
, &rhs_vectype
, &vls_type
))
8185 elem_type
= TREE_TYPE (vectype
);
8186 vec_mode
= TYPE_MODE (vectype
);
8188 if (!STMT_VINFO_DATA_REF (stmt_info
))
8191 vect_memory_access_type memory_access_type
;
8192 enum dr_alignment_support alignment_support_scheme
;
8195 internal_fn lanes_ifn
;
8196 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, vls_type
,
8197 ncopies
, &memory_access_type
, &poffset
,
8198 &alignment_support_scheme
, &misalignment
, &gs_info
,
8204 if (memory_access_type
== VMAT_CONTIGUOUS
)
8206 if (!VECTOR_MODE_P (vec_mode
)
8207 || !can_vec_mask_load_store_p (vec_mode
,
8208 TYPE_MODE (mask_vectype
), false))
8211 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8212 && (memory_access_type
!= VMAT_GATHER_SCATTER
8213 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
8215 if (dump_enabled_p ())
8216 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8217 "unsupported access type for masked store.\n");
8220 else if (memory_access_type
== VMAT_GATHER_SCATTER
8221 && gs_info
.ifn
== IFN_LAST
8224 if (dump_enabled_p ())
8225 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8226 "unsupported masked emulated scatter.\n");
8232 /* FORNOW. In some cases can vectorize even if data-type not supported
8233 (e.g. - array initialization with 0). */
8234 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
8238 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8239 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
8240 && memory_access_type
!= VMAT_GATHER_SCATTER
8241 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
8244 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8245 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8246 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8250 first_stmt_info
= stmt_info
;
8251 first_dr_info
= dr_info
;
8252 group_size
= vec_num
= 1;
8255 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
8257 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
8258 memory_access_type
))
8262 bool costing_p
= !vec_stmt
;
8263 if (costing_p
) /* transformation not required. */
8265 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8268 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
8269 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
8270 vls_type
, group_size
,
8271 memory_access_type
, &gs_info
,
8275 && (!vect_maybe_update_slp_op_vectype (op_node
, vectype
)
8277 && !vect_maybe_update_slp_op_vectype (mask_node
,
8280 if (dump_enabled_p ())
8281 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8282 "incompatible vector types for invariants\n");
8286 if (dump_enabled_p ()
8287 && memory_access_type
!= VMAT_ELEMENTWISE
8288 && memory_access_type
!= VMAT_GATHER_SCATTER
8289 && alignment_support_scheme
!= dr_aligned
)
8290 dump_printf_loc (MSG_NOTE
, vect_location
,
8291 "Vectorizing an unaligned access.\n");
8293 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
8295 /* As function vect_transform_stmt shows, for interleaving stores
8296 the whole chain is vectorized when the last store in the chain
8297 is reached, the other stores in the group are skipped. So we
8298 want to only cost the last one here, but it's not trivial to
8299 get the last, as it's equivalent to use the first one for
8300 costing, use the first one instead. */
8303 && first_stmt_info
!= stmt_info
)
8306 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8310 ensure_base_align (dr_info
);
8312 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
8314 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS
);
8318 unsigned int inside_cost
= 0, prologue_cost
= 0;
8319 if (vls_type
== VLS_STORE_INVARIANT
)
8320 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
8321 stmt_info
, 0, vect_prologue
);
8322 vect_get_store_cost (vinfo
, stmt_info
, ncopies
,
8323 alignment_support_scheme
, misalignment
,
8324 &inside_cost
, cost_vec
);
8326 if (dump_enabled_p ())
8327 dump_printf_loc (MSG_NOTE
, vect_location
,
8328 "vect_model_store_cost: inside_cost = %d, "
8329 "prologue_cost = %d .\n",
8330 inside_cost
, prologue_cost
);
8334 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
8340 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
8344 grouped_store
= false;
8345 /* VEC_NUM is the number of vect stmts to be created for this
8347 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8348 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8349 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
8350 == first_stmt_info
);
8351 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8352 op
= vect_get_store_rhs (first_stmt_info
);
8355 /* VEC_NUM is the number of vect stmts to be created for this
8357 vec_num
= group_size
;
8359 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8362 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
8364 if (!costing_p
&& dump_enabled_p ())
8365 dump_printf_loc (MSG_NOTE
, vect_location
, "transform store. ncopies = %d\n",
8368 /* Check if we need to update prologue cost for invariant,
8369 and update it accordingly if so. If it's not for
8370 interleaving store, we can just check vls_type; but if
8371 it's for interleaving store, need to check the def_type
8372 of the stored value since the current vls_type is just
8373 for first_stmt_info. */
8374 auto update_prologue_cost
= [&](unsigned *prologue_cost
, tree store_rhs
)
8376 gcc_assert (costing_p
);
8381 gcc_assert (store_rhs
);
8382 enum vect_def_type cdt
;
8383 gcc_assert (vect_is_simple_use (store_rhs
, vinfo
, &cdt
));
8384 if (cdt
!= vect_constant_def
&& cdt
!= vect_external_def
)
8387 else if (vls_type
!= VLS_STORE_INVARIANT
)
8389 *prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
, stmt_info
,
8393 if (memory_access_type
== VMAT_ELEMENTWISE
8394 || memory_access_type
== VMAT_STRIDED_SLP
)
8396 unsigned inside_cost
= 0, prologue_cost
= 0;
8397 gimple_stmt_iterator incr_gsi
;
8403 tree stride_base
, stride_step
, alias_off
;
8404 tree vec_oprnd
= NULL_TREE
;
8407 /* Checked by get_load_store_type. */
8408 unsigned int const_nunits
= nunits
.to_constant ();
8410 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
8411 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
8413 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
8415 = fold_build_pointer_plus
8416 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8417 size_binop (PLUS_EXPR
,
8418 convert_to_ptrofftype (dr_offset
),
8419 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8420 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8422 /* For a store with loop-invariant (but other than power-of-2)
8423 stride (i.e. not a grouped access) like so:
8425 for (i = 0; i < n; i += stride)
8428 we generate a new induction variable and new stores from
8429 the components of the (vectorized) rhs:
8431 for (j = 0; ; j += VF*stride)
8436 array[j + stride] = tmp2;
8440 unsigned nstores
= const_nunits
;
8442 tree ltype
= elem_type
;
8443 tree lvectype
= vectype
;
8446 HOST_WIDE_INT n
= gcd (group_size
, const_nunits
);
8447 if (n
== const_nunits
)
8449 int mis_align
= dr_misalignment (first_dr_info
, vectype
);
8450 dr_alignment_support dr_align
8451 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
,
8453 if (dr_align
== dr_aligned
8454 || dr_align
== dr_unaligned_supported
)
8457 lnel
= const_nunits
;
8460 alignment_support_scheme
= dr_align
;
8461 misalignment
= mis_align
;
8466 nstores
= const_nunits
/ n
;
8468 ltype
= build_vector_type (elem_type
, n
);
8471 /* First check if vec_extract optab doesn't support extraction
8472 of vector elts directly. */
8473 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
8475 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
8476 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
8478 || (convert_optab_handler (vec_extract_optab
,
8479 TYPE_MODE (vectype
), vmode
)
8480 == CODE_FOR_nothing
))
8482 /* Try to avoid emitting an extract of vector elements
8483 by performing the extracts using an integer type of the
8484 same size, extracting from a vector of those and then
8485 re-interpreting it as the original vector type if
8488 = n
* GET_MODE_BITSIZE (elmode
);
8489 unsigned int lnunits
= const_nunits
/ n
;
8490 /* If we can't construct such a vector fall back to
8491 element extracts from the original vector type and
8492 element size stores. */
8493 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
8494 && VECTOR_MODE_P (TYPE_MODE (vectype
))
8495 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
8496 lnunits
).exists (&vmode
)
8497 && (convert_optab_handler (vec_extract_optab
,
8499 != CODE_FOR_nothing
))
8503 ltype
= build_nonstandard_integer_type (lsize
, 1);
8504 lvectype
= build_vector_type (ltype
, nstores
);
8506 /* Else fall back to vector extraction anyway.
8507 Fewer stores are more important than avoiding spilling
8508 of the vector we extract from. Compared to the
8509 construction case in vectorizable_load no store-forwarding
8510 issue exists here for reasonable archs. */
8513 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
8514 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8519 ivstep
= stride_step
;
8520 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
8521 build_int_cst (TREE_TYPE (ivstep
), vf
));
8523 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8525 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8526 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8527 create_iv (stride_base
, PLUS_EXPR
, ivstep
, NULL
, loop
, &incr_gsi
,
8528 insert_after
, &offvar
, NULL
);
8529 incr
= gsi_stmt (incr_gsi
);
8531 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8534 alias_off
= build_int_cst (ref_type
, 0);
8535 stmt_vec_info next_stmt_info
= first_stmt_info
;
8536 auto_vec
<tree
> vec_oprnds
;
8537 /* For costing some adjacent vector stores, we'd like to cost with
8538 the total number of them once instead of cost each one by one. */
8539 unsigned int n_adjacent_stores
= 0;
8540 for (g
= 0; g
< group_size
; g
++)
8542 running_off
= offvar
;
8547 tree size
= TYPE_SIZE_UNIT (ltype
);
8549 = fold_build2 (MULT_EXPR
, sizetype
, size_int (g
), size
);
8550 tree newoff
= copy_ssa_name (running_off
, NULL
);
8551 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8553 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8554 running_off
= newoff
;
8558 op
= vect_get_store_rhs (next_stmt_info
);
8560 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
, op
,
8563 update_prologue_cost (&prologue_cost
, op
);
8564 unsigned int group_el
= 0;
8565 unsigned HOST_WIDE_INT
8566 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8567 for (j
= 0; j
< ncopies
; j
++)
8571 vec_oprnd
= vec_oprnds
[j
];
8572 /* Pun the vector to extract from if necessary. */
8573 if (lvectype
!= vectype
)
8575 tree tem
= make_ssa_name (lvectype
);
8577 = build1 (VIEW_CONVERT_EXPR
, lvectype
, vec_oprnd
);
8578 gimple
*pun
= gimple_build_assign (tem
, cvt
);
8579 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
8583 for (i
= 0; i
< nstores
; i
++)
8587 /* Only need vector extracting when there are more
8591 += record_stmt_cost (cost_vec
, 1, vec_to_scalar
,
8592 stmt_info
, 0, vect_body
);
8593 /* Take a single lane vector type store as scalar
8594 store to avoid ICE like 110776. */
8595 if (VECTOR_TYPE_P (ltype
)
8596 && known_ne (TYPE_VECTOR_SUBPARTS (ltype
), 1U))
8597 n_adjacent_stores
++;
8600 += record_stmt_cost (cost_vec
, 1, scalar_store
,
8601 stmt_info
, 0, vect_body
);
8604 tree newref
, newoff
;
8605 gimple
*incr
, *assign
;
8606 tree size
= TYPE_SIZE (ltype
);
8607 /* Extract the i'th component. */
8608 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
8609 bitsize_int (i
), size
);
8610 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
8613 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
8617 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8619 newref
= build2 (MEM_REF
, ltype
,
8620 running_off
, this_off
);
8621 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
8623 /* And store it to *running_off. */
8624 assign
= gimple_build_assign (newref
, elem
);
8625 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
8629 || group_el
== group_size
)
8631 newoff
= copy_ssa_name (running_off
, NULL
);
8632 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8633 running_off
, stride_step
);
8634 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8636 running_off
= newoff
;
8639 if (g
== group_size
- 1
8642 if (j
== 0 && i
== 0)
8644 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
8648 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8649 vec_oprnds
.truncate(0);
8656 if (n_adjacent_stores
> 0)
8657 vect_get_store_cost (vinfo
, stmt_info
, n_adjacent_stores
,
8658 alignment_support_scheme
, misalignment
,
8659 &inside_cost
, cost_vec
);
8660 if (dump_enabled_p ())
8661 dump_printf_loc (MSG_NOTE
, vect_location
,
8662 "vect_model_store_cost: inside_cost = %d, "
8663 "prologue_cost = %d .\n",
8664 inside_cost
, prologue_cost
);
8670 gcc_assert (alignment_support_scheme
);
8671 vec_loop_masks
*loop_masks
8672 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8673 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8675 vec_loop_lens
*loop_lens
8676 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
8677 ? &LOOP_VINFO_LENS (loop_vinfo
)
8680 /* The vect_transform_stmt and vect_analyze_stmt will go here but there
8681 are some difference here. We cannot enable both the lens and masks
8682 during transform but it is allowed during analysis.
8683 Shouldn't go with length-based approach if fully masked. */
8684 if (cost_vec
== NULL
)
8685 /* The cost_vec is NULL during transfrom. */
8686 gcc_assert ((!loop_lens
|| !loop_masks
));
8688 /* Targets with store-lane instructions must not require explicit
8689 realignment. vect_supportable_dr_alignment always returns either
8690 dr_aligned or dr_unaligned_supported for masked operations. */
8691 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8694 || alignment_support_scheme
== dr_aligned
8695 || alignment_support_scheme
== dr_unaligned_supported
);
8697 tree offset
= NULL_TREE
;
8698 if (!known_eq (poffset
, 0))
8699 offset
= size_int (poffset
);
8702 tree vec_offset
= NULL_TREE
;
8703 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8705 aggr_type
= NULL_TREE
;
8708 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8710 aggr_type
= elem_type
;
8712 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, gsi
, &gs_info
,
8713 &bump
, &vec_offset
, loop_lens
);
8717 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8718 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8720 aggr_type
= vectype
;
8721 bump
= vect_get_data_ptr_increment (vinfo
, gsi
, dr_info
, aggr_type
,
8722 memory_access_type
, loop_lens
);
8725 if (mask
&& !costing_p
)
8726 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8728 /* In case the vectorization factor (VF) is bigger than the number
8729 of elements that we can fit in a vectype (nunits), we have to generate
8730 more than one vector stmt - i.e - we need to "unroll" the
8731 vector stmt by a factor VF/nunits. */
8733 /* In case of interleaving (non-unit grouped access):
8740 We create vectorized stores starting from base address (the access of the
8741 first stmt in the chain (S2 in the above example), when the last store stmt
8742 of the chain (S4) is reached:
8745 VS2: &base + vec_size*1 = vx0
8746 VS3: &base + vec_size*2 = vx1
8747 VS4: &base + vec_size*3 = vx3
8749 Then permutation statements are generated:
8751 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8752 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8755 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8756 (the order of the data-refs in the output of vect_permute_store_chain
8757 corresponds to the order of scalar stmts in the interleaving chain - see
8758 the documentation of vect_permute_store_chain()).
8760 In case of both multiple types and interleaving, above vector stores and
8761 permutation stmts are created for every copy. The result vector stmts are
8762 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8763 STMT_VINFO_RELATED_STMT for the next copies.
8766 auto_vec
<tree
> dr_chain (group_size
);
8767 auto_vec
<tree
> vec_masks
;
8768 tree vec_mask
= NULL
;
8769 auto_delete_vec
<auto_vec
<tree
>> gvec_oprnds (group_size
);
8770 for (i
= 0; i
< group_size
; i
++)
8771 gvec_oprnds
.quick_push (new auto_vec
<tree
> ());
8773 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8775 gcc_assert (!slp
&& grouped_store
);
8776 unsigned inside_cost
= 0, prologue_cost
= 0;
8777 /* For costing some adjacent vector stores, we'd like to cost with
8778 the total number of them once instead of cost each one by one. */
8779 unsigned int n_adjacent_stores
= 0;
8780 for (j
= 0; j
< ncopies
; j
++)
8785 /* For interleaved stores we collect vectorized defs for all
8786 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8787 as an input to vect_permute_store_chain(). */
8788 stmt_vec_info next_stmt_info
= first_stmt_info
;
8789 for (i
= 0; i
< group_size
; i
++)
8791 /* Since gaps are not supported for interleaved stores,
8792 DR_GROUP_SIZE is the exact number of stmts in the
8793 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8794 op
= vect_get_store_rhs (next_stmt_info
);
8796 update_prologue_cost (&prologue_cost
, op
);
8799 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
8802 vec_oprnd
= (*gvec_oprnds
[i
])[0];
8803 dr_chain
.quick_push (vec_oprnd
);
8805 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8812 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
8815 vec_mask
= vec_masks
[0];
8818 /* We should have catched mismatched types earlier. */
8820 useless_type_conversion_p (vectype
, TREE_TYPE (vec_oprnd
)));
8822 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
,
8823 aggr_type
, NULL
, offset
, &dummy
,
8824 gsi
, &ptr_incr
, false, bump
);
8827 else if (!costing_p
)
8829 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
8830 /* DR_CHAIN is then used as an input to
8831 vect_permute_store_chain(). */
8832 for (i
= 0; i
< group_size
; i
++)
8834 vec_oprnd
= (*gvec_oprnds
[i
])[j
];
8835 dr_chain
[i
] = vec_oprnd
;
8838 vec_mask
= vec_masks
[j
];
8839 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8845 n_adjacent_stores
+= vec_num
;
8849 /* Get an array into which we can store the individual vectors. */
8850 tree vec_array
= create_vector_array (vectype
, vec_num
);
8852 /* Invalidate the current contents of VEC_ARRAY. This should
8853 become an RTL clobber too, which prevents the vector registers
8854 from being upward-exposed. */
8855 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8857 /* Store the individual vectors into the array. */
8858 for (i
= 0; i
< vec_num
; i
++)
8860 vec_oprnd
= dr_chain
[i
];
8861 write_vector_array (vinfo
, stmt_info
, gsi
, vec_oprnd
, vec_array
,
8865 tree final_mask
= NULL
;
8866 tree final_len
= NULL
;
8869 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
8870 ncopies
, vectype
, j
);
8872 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
8875 if (lanes_ifn
== IFN_MASK_LEN_STORE_LANES
)
8878 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
8879 ncopies
, vectype
, j
, 1);
8881 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
8883 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
8884 bias
= build_int_cst (intQI_type_node
, biasval
);
8887 mask_vectype
= truth_type_for (vectype
);
8888 final_mask
= build_minus_one_cst (mask_vectype
);
8893 if (final_len
&& final_mask
)
8896 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8897 LEN, BIAS, VEC_ARRAY). */
8898 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8899 tree alias_ptr
= build_int_cst (ref_type
, align
);
8900 call
= gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES
, 6,
8901 dataref_ptr
, alias_ptr
,
8902 final_mask
, final_len
, bias
,
8905 else if (final_mask
)
8908 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8910 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8911 tree alias_ptr
= build_int_cst (ref_type
, align
);
8912 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8913 dataref_ptr
, alias_ptr
,
8914 final_mask
, vec_array
);
8919 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8920 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8921 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
8922 gimple_call_set_lhs (call
, data_ref
);
8924 gimple_call_set_nothrow (call
, true);
8925 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8928 /* Record that VEC_ARRAY is now dead. */
8929 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8931 *vec_stmt
= new_stmt
;
8932 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8937 if (n_adjacent_stores
> 0)
8938 vect_get_store_cost (vinfo
, stmt_info
, n_adjacent_stores
,
8939 alignment_support_scheme
, misalignment
,
8940 &inside_cost
, cost_vec
);
8941 if (dump_enabled_p ())
8942 dump_printf_loc (MSG_NOTE
, vect_location
,
8943 "vect_model_store_cost: inside_cost = %d, "
8944 "prologue_cost = %d .\n",
8945 inside_cost
, prologue_cost
);
8951 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8953 gcc_assert (!grouped_store
);
8954 auto_vec
<tree
> vec_offsets
;
8955 unsigned int inside_cost
= 0, prologue_cost
= 0;
8956 for (j
= 0; j
< ncopies
; j
++)
8961 if (costing_p
&& vls_type
== VLS_STORE_INVARIANT
)
8962 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
8963 stmt_info
, 0, vect_prologue
);
8964 else if (!costing_p
)
8966 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
8967 DR_CHAIN is of size 1. */
8968 gcc_assert (group_size
== 1);
8970 vect_get_slp_defs (op_node
, gvec_oprnds
[0]);
8972 vect_get_vec_defs_for_operand (vinfo
, first_stmt_info
,
8973 ncopies
, op
, gvec_oprnds
[0]);
8977 vect_get_slp_defs (mask_node
, &vec_masks
);
8979 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
8985 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8986 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
8988 &dataref_ptr
, &vec_offsets
);
8991 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
,
8992 aggr_type
, NULL
, offset
,
8993 &dummy
, gsi
, &ptr_incr
, false,
8997 else if (!costing_p
)
8999 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
9000 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9001 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9002 gsi
, stmt_info
, bump
);
9006 for (i
= 0; i
< vec_num
; ++i
)
9010 vec_oprnd
= (*gvec_oprnds
[0])[vec_num
* j
+ i
];
9012 vec_mask
= vec_masks
[vec_num
* j
+ i
];
9013 /* We should have catched mismatched types earlier. */
9014 gcc_assert (useless_type_conversion_p (vectype
,
9015 TREE_TYPE (vec_oprnd
)));
9017 unsigned HOST_WIDE_INT align
;
9018 tree final_mask
= NULL_TREE
;
9019 tree final_len
= NULL_TREE
;
9020 tree bias
= NULL_TREE
;
9024 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
,
9025 loop_masks
, ncopies
,
9028 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
9029 final_mask
, vec_mask
, gsi
);
9032 if (gs_info
.ifn
!= IFN_LAST
)
9036 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
9038 += record_stmt_cost (cost_vec
, cnunits
, scalar_store
,
9039 stmt_info
, 0, vect_body
);
9043 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9044 vec_offset
= vec_offsets
[vec_num
* j
+ i
];
9045 tree scale
= size_int (gs_info
.scale
);
9047 if (gs_info
.ifn
== IFN_MASK_LEN_SCATTER_STORE
)
9050 final_len
= vect_get_loop_len (loop_vinfo
, gsi
,
9054 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9056 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
9057 bias
= build_int_cst (intQI_type_node
, biasval
);
9060 mask_vectype
= truth_type_for (vectype
);
9061 final_mask
= build_minus_one_cst (mask_vectype
);
9066 if (final_len
&& final_mask
)
9067 call
= gimple_build_call_internal
9068 (IFN_MASK_LEN_SCATTER_STORE
, 7, dataref_ptr
,
9069 vec_offset
, scale
, vec_oprnd
, final_mask
,
9071 else if (final_mask
)
9072 call
= gimple_build_call_internal
9073 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
,
9074 vec_offset
, scale
, vec_oprnd
, final_mask
);
9076 call
= gimple_build_call_internal (IFN_SCATTER_STORE
, 4,
9077 dataref_ptr
, vec_offset
,
9079 gimple_call_set_nothrow (call
, true);
9080 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9083 else if (gs_info
.decl
)
9085 /* The builtin decls path for scatter is legacy, x86 only. */
9086 gcc_assert (nunits
.is_constant ()
9088 || SCALAR_INT_MODE_P
9089 (TYPE_MODE (TREE_TYPE (final_mask
)))));
9092 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
9094 += record_stmt_cost (cost_vec
, cnunits
, scalar_store
,
9095 stmt_info
, 0, vect_body
);
9098 poly_uint64 offset_nunits
9099 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
9100 if (known_eq (nunits
, offset_nunits
))
9102 new_stmt
= vect_build_one_scatter_store_call
9103 (vinfo
, stmt_info
, gsi
, &gs_info
,
9104 dataref_ptr
, vec_offsets
[vec_num
* j
+ i
],
9105 vec_oprnd
, final_mask
);
9106 vect_finish_stmt_generation (vinfo
, stmt_info
,
9109 else if (known_eq (nunits
, offset_nunits
* 2))
9111 /* We have a offset vector with half the number of
9112 lanes but the builtins will store full vectype
9113 data from the lower lanes. */
9114 new_stmt
= vect_build_one_scatter_store_call
9115 (vinfo
, stmt_info
, gsi
, &gs_info
,
9117 vec_offsets
[2 * vec_num
* j
+ 2 * i
],
9118 vec_oprnd
, final_mask
);
9119 vect_finish_stmt_generation (vinfo
, stmt_info
,
9121 int count
= nunits
.to_constant ();
9122 vec_perm_builder
sel (count
, count
, 1);
9123 sel
.quick_grow (count
);
9124 for (int i
= 0; i
< count
; ++i
)
9125 sel
[i
] = i
| (count
/ 2);
9126 vec_perm_indices
indices (sel
, 2, count
);
9128 = vect_gen_perm_mask_checked (vectype
, indices
);
9129 new_stmt
= gimple_build_assign (NULL_TREE
, VEC_PERM_EXPR
,
9130 vec_oprnd
, vec_oprnd
,
9132 vec_oprnd
= make_ssa_name (vectype
);
9133 gimple_set_lhs (new_stmt
, vec_oprnd
);
9134 vect_finish_stmt_generation (vinfo
, stmt_info
,
9138 new_stmt
= gimple_build_assign (NULL_TREE
,
9141 final_mask
= make_ssa_name
9142 (truth_type_for (gs_info
.offset_vectype
));
9143 gimple_set_lhs (new_stmt
, final_mask
);
9144 vect_finish_stmt_generation (vinfo
, stmt_info
,
9147 new_stmt
= vect_build_one_scatter_store_call
9148 (vinfo
, stmt_info
, gsi
, &gs_info
,
9150 vec_offsets
[2 * vec_num
* j
+ 2 * i
+ 1],
9151 vec_oprnd
, final_mask
);
9152 vect_finish_stmt_generation (vinfo
, stmt_info
,
9155 else if (known_eq (nunits
* 2, offset_nunits
))
9157 /* We have a offset vector with double the number of
9158 lanes. Select the low/high part accordingly. */
9159 vec_offset
= vec_offsets
[(vec_num
* j
+ i
) / 2];
9160 if ((vec_num
* j
+ i
) & 1)
9162 int count
= offset_nunits
.to_constant ();
9163 vec_perm_builder
sel (count
, count
, 1);
9164 sel
.quick_grow (count
);
9165 for (int i
= 0; i
< count
; ++i
)
9166 sel
[i
] = i
| (count
/ 2);
9167 vec_perm_indices
indices (sel
, 2, count
);
9168 tree perm_mask
= vect_gen_perm_mask_checked
9169 (TREE_TYPE (vec_offset
), indices
);
9170 new_stmt
= gimple_build_assign (NULL_TREE
,
9175 vec_offset
= make_ssa_name (TREE_TYPE (vec_offset
));
9176 gimple_set_lhs (new_stmt
, vec_offset
);
9177 vect_finish_stmt_generation (vinfo
, stmt_info
,
9180 new_stmt
= vect_build_one_scatter_store_call
9181 (vinfo
, stmt_info
, gsi
, &gs_info
,
9182 dataref_ptr
, vec_offset
,
9183 vec_oprnd
, final_mask
);
9184 vect_finish_stmt_generation (vinfo
, stmt_info
,
9192 /* Emulated scatter. */
9193 gcc_assert (!final_mask
);
9196 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
9197 /* For emulated scatter N offset vector element extracts
9198 (we assume the scalar scaling and ptr + offset add is
9199 consumed by the load). */
9201 += record_stmt_cost (cost_vec
, cnunits
, vec_to_scalar
,
9202 stmt_info
, 0, vect_body
);
9203 /* N scalar stores plus extracting the elements. */
9205 += record_stmt_cost (cost_vec
, cnunits
, vec_to_scalar
,
9206 stmt_info
, 0, vect_body
);
9208 += record_stmt_cost (cost_vec
, cnunits
, scalar_store
,
9209 stmt_info
, 0, vect_body
);
9213 unsigned HOST_WIDE_INT const_nunits
= nunits
.to_constant ();
9214 unsigned HOST_WIDE_INT const_offset_nunits
9215 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
).to_constant ();
9216 vec
<constructor_elt
, va_gc
> *ctor_elts
;
9217 vec_alloc (ctor_elts
, const_nunits
);
9218 gimple_seq stmts
= NULL
;
9219 tree elt_type
= TREE_TYPE (vectype
);
9220 unsigned HOST_WIDE_INT elt_size
9221 = tree_to_uhwi (TYPE_SIZE (elt_type
));
9222 /* We support offset vectors with more elements
9223 than the data vector for now. */
9224 unsigned HOST_WIDE_INT factor
9225 = const_offset_nunits
/ const_nunits
;
9226 vec_offset
= vec_offsets
[(vec_num
* j
+ i
) / factor
];
9228 = ((vec_num
* j
+ i
) % factor
) * const_nunits
;
9229 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
9230 tree scale
= size_int (gs_info
.scale
);
9231 align
= get_object_alignment (DR_REF (first_dr_info
->dr
));
9232 tree ltype
= build_aligned_type (TREE_TYPE (vectype
), align
);
9233 for (unsigned k
= 0; k
< const_nunits
; ++k
)
9235 /* Compute the offsetted pointer. */
9236 tree boff
= size_binop (MULT_EXPR
, TYPE_SIZE (idx_type
),
9237 bitsize_int (k
+ elt_offset
));
9239 = gimple_build (&stmts
, BIT_FIELD_REF
, idx_type
,
9240 vec_offset
, TYPE_SIZE (idx_type
), boff
);
9241 idx
= gimple_convert (&stmts
, sizetype
, idx
);
9242 idx
= gimple_build (&stmts
, MULT_EXPR
, sizetype
,
9245 = gimple_build (&stmts
, PLUS_EXPR
,
9246 TREE_TYPE (dataref_ptr
),
9248 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
9249 /* Extract the element to be stored. */
9251 = gimple_build (&stmts
, BIT_FIELD_REF
,
9252 TREE_TYPE (vectype
),
9253 vec_oprnd
, TYPE_SIZE (elt_type
),
9254 bitsize_int (k
* elt_size
));
9255 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
9258 = build2 (MEM_REF
, ltype
, ptr
,
9259 build_int_cst (ref_type
, 0));
9260 new_stmt
= gimple_build_assign (ref
, elt
);
9261 vect_finish_stmt_generation (vinfo
, stmt_info
,
9265 slp_node
->push_vec_def (new_stmt
);
9268 if (!slp
&& !costing_p
)
9269 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9272 if (!slp
&& !costing_p
)
9273 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9275 if (costing_p
&& dump_enabled_p ())
9276 dump_printf_loc (MSG_NOTE
, vect_location
,
9277 "vect_model_store_cost: inside_cost = %d, "
9278 "prologue_cost = %d .\n",
9279 inside_cost
, prologue_cost
);
9284 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS
9285 || memory_access_type
== VMAT_CONTIGUOUS_DOWN
9286 || memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
9287 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
);
9289 unsigned inside_cost
= 0, prologue_cost
= 0;
9290 /* For costing some adjacent vector stores, we'd like to cost with
9291 the total number of them once instead of cost each one by one. */
9292 unsigned int n_adjacent_stores
= 0;
9293 auto_vec
<tree
> result_chain (group_size
);
9294 auto_vec
<tree
, 1> vec_oprnds
;
9295 for (j
= 0; j
< ncopies
; j
++)
9300 if (slp
&& !costing_p
)
9302 /* Get vectorized arguments for SLP_NODE. */
9303 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1, op
,
9304 &vec_oprnds
, mask
, &vec_masks
);
9305 vec_oprnd
= vec_oprnds
[0];
9307 vec_mask
= vec_masks
[0];
9311 /* For interleaved stores we collect vectorized defs for all the
9312 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9313 input to vect_permute_store_chain().
9315 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9317 stmt_vec_info next_stmt_info
= first_stmt_info
;
9318 for (i
= 0; i
< group_size
; i
++)
9320 /* Since gaps are not supported for interleaved stores,
9321 DR_GROUP_SIZE is the exact number of stmts in the chain.
9322 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9323 that there is no interleaving, DR_GROUP_SIZE is 1,
9324 and only one iteration of the loop will be executed. */
9325 op
= vect_get_store_rhs (next_stmt_info
);
9327 update_prologue_cost (&prologue_cost
, op
);
9330 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
9333 vec_oprnd
= (*gvec_oprnds
[i
])[0];
9334 dr_chain
.quick_push (vec_oprnd
);
9336 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9338 if (mask
&& !costing_p
)
9340 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
9343 vec_mask
= vec_masks
[0];
9347 /* We should have catched mismatched types earlier. */
9348 gcc_assert (costing_p
9349 || useless_type_conversion_p (vectype
,
9350 TREE_TYPE (vec_oprnd
)));
9351 bool simd_lane_access_p
9352 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9354 && simd_lane_access_p
9356 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9357 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9358 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9359 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9360 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9361 get_alias_set (TREE_TYPE (ref_type
))))
9363 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9364 dataref_offset
= build_int_cst (ref_type
, 0);
9366 else if (!costing_p
)
9368 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9369 simd_lane_access_p
? loop
: NULL
,
9370 offset
, &dummy
, gsi
, &ptr_incr
,
9371 simd_lane_access_p
, bump
);
9373 else if (!costing_p
)
9375 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
9376 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9377 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9379 for (i
= 0; i
< group_size
; i
++)
9381 vec_oprnd
= (*gvec_oprnds
[i
])[j
];
9382 dr_chain
[i
] = vec_oprnd
;
9385 vec_mask
= vec_masks
[j
];
9387 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
9389 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9397 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
);
9400 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
9401 int nstmts
= ceil_log2 (group_size
) * group_size
;
9402 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
9403 stmt_info
, 0, vect_body
);
9404 if (dump_enabled_p ())
9405 dump_printf_loc (MSG_NOTE
, vect_location
,
9406 "vect_model_store_cost: "
9407 "strided group_size = %d .\n",
9411 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
9412 gsi
, &result_chain
);
9415 stmt_vec_info next_stmt_info
= first_stmt_info
;
9416 for (i
= 0; i
< vec_num
; i
++)
9421 vec_oprnd
= vec_oprnds
[i
];
9422 else if (grouped_store
)
9423 /* For grouped stores vectorized defs are interleaved in
9424 vect_permute_store_chain(). */
9425 vec_oprnd
= result_chain
[i
];
9428 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9431 inside_cost
+= record_stmt_cost (cost_vec
, 1, vec_perm
,
9432 stmt_info
, 0, vect_body
);
9435 tree perm_mask
= perm_mask_for_reverse (vectype
);
9436 tree perm_dest
= vect_create_destination_var (
9437 vect_get_store_rhs (stmt_info
), vectype
);
9438 tree new_temp
= make_ssa_name (perm_dest
);
9440 /* Generate the permute statement. */
9442 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
9443 vec_oprnd
, perm_mask
);
9444 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
,
9447 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9448 vec_oprnd
= new_temp
;
9454 n_adjacent_stores
++;
9458 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9459 if (!next_stmt_info
)
9466 tree final_mask
= NULL_TREE
;
9467 tree final_len
= NULL_TREE
;
9468 tree bias
= NULL_TREE
;
9470 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
9471 vec_num
* ncopies
, vectype
,
9473 if (slp
&& vec_mask
)
9474 vec_mask
= vec_masks
[i
];
9476 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
9480 /* Bump the vector pointer. */
9481 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9485 unsigned HOST_WIDE_INT align
;
9486 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9487 if (alignment_support_scheme
== dr_aligned
)
9489 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
9491 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
9495 misalign
= misalignment
;
9496 if (dataref_offset
== NULL_TREE
9497 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9498 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
9500 align
= least_bit_hwi (misalign
| align
);
9502 /* Compute IFN when LOOP_LENS or final_mask valid. */
9503 machine_mode vmode
= TYPE_MODE (vectype
);
9504 machine_mode new_vmode
= vmode
;
9505 internal_fn partial_ifn
= IFN_LAST
;
9508 opt_machine_mode new_ovmode
9509 = get_len_load_store_mode (vmode
, false, &partial_ifn
);
9510 new_vmode
= new_ovmode
.require ();
9512 = (new_ovmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vmode
);
9513 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
9514 vec_num
* ncopies
, vectype
,
9515 vec_num
* j
+ i
, factor
);
9517 else if (final_mask
)
9519 if (!can_vec_mask_load_store_p (
9520 vmode
, TYPE_MODE (TREE_TYPE (final_mask
)), false,
9525 if (partial_ifn
== IFN_MASK_LEN_STORE
)
9529 /* Pass VF value to 'len' argument of
9530 MASK_LEN_STORE if LOOP_LENS is invalid. */
9531 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9535 /* Pass all ones value to 'mask' argument of
9536 MASK_LEN_STORE if final_mask is invalid. */
9537 mask_vectype
= truth_type_for (vectype
);
9538 final_mask
= build_minus_one_cst (mask_vectype
);
9544 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
9546 bias
= build_int_cst (intQI_type_node
, biasval
);
9549 /* Arguments are ready. Create the new vector stmt. */
9553 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
9554 /* Need conversion if it's wrapped with VnQI. */
9555 if (vmode
!= new_vmode
)
9558 = build_vector_type_for_mode (unsigned_intQI_type_node
,
9560 tree var
= vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
9561 vec_oprnd
= build1 (VIEW_CONVERT_EXPR
, new_vtype
, vec_oprnd
);
9563 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, vec_oprnd
);
9564 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9568 if (partial_ifn
== IFN_MASK_LEN_STORE
)
9569 call
= gimple_build_call_internal (IFN_MASK_LEN_STORE
, 6,
9570 dataref_ptr
, ptr
, final_mask
,
9571 final_len
, bias
, vec_oprnd
);
9573 call
= gimple_build_call_internal (IFN_LEN_STORE
, 5,
9574 dataref_ptr
, ptr
, final_len
,
9576 gimple_call_set_nothrow (call
, true);
9577 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9580 else if (final_mask
)
9582 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
9584 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
9585 ptr
, final_mask
, vec_oprnd
);
9586 gimple_call_set_nothrow (call
, true);
9587 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9593 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
9594 dataref_offset
? dataref_offset
9595 : build_int_cst (ref_type
, 0));
9596 if (alignment_support_scheme
== dr_aligned
)
9599 TREE_TYPE (data_ref
)
9600 = build_aligned_type (TREE_TYPE (data_ref
),
9601 align
* BITS_PER_UNIT
);
9602 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9603 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
9604 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9610 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9611 if (!next_stmt_info
)
9614 if (!slp
&& !costing_p
)
9617 *vec_stmt
= new_stmt
;
9618 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9624 if (n_adjacent_stores
> 0)
9625 vect_get_store_cost (vinfo
, stmt_info
, n_adjacent_stores
,
9626 alignment_support_scheme
, misalignment
,
9627 &inside_cost
, cost_vec
);
9629 /* When vectorizing a store into the function result assign
9630 a penalty if the function returns in a multi-register location.
9631 In this case we assume we'll end up with having to spill the
9632 vector result and do piecewise loads as a conservative estimate. */
9633 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
9635 && (TREE_CODE (base
) == RESULT_DECL
9636 || (DECL_P (base
) && cfun_returns (base
)))
9637 && !aggregate_value_p (base
, cfun
->decl
))
9639 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
9640 /* ??? Handle PARALLEL in some way. */
9643 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
9644 /* Assume that a single reg-reg move is possible and cheap,
9645 do not account for vector to gp register move cost. */
9650 += record_stmt_cost (cost_vec
, ncopies
, vector_store
,
9651 stmt_info
, 0, vect_epilogue
);
9654 += record_stmt_cost (cost_vec
, ncopies
* nregs
, scalar_load
,
9655 stmt_info
, 0, vect_epilogue
);
9659 if (dump_enabled_p ())
9660 dump_printf_loc (MSG_NOTE
, vect_location
,
9661 "vect_model_store_cost: inside_cost = %d, "
9662 "prologue_cost = %d .\n",
9663 inside_cost
, prologue_cost
);
9669 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9670 VECTOR_CST mask. No checks are made that the target platform supports the
9671 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9672 vect_gen_perm_mask_checked. */
9675 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
9679 poly_uint64 nunits
= sel
.length ();
9680 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
9682 mask_type
= build_vector_type (ssizetype
, nunits
);
9683 return vec_perm_indices_to_tree (mask_type
, sel
);
9686 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9687 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9690 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
9692 machine_mode vmode
= TYPE_MODE (vectype
);
9693 gcc_assert (can_vec_perm_const_p (vmode
, vmode
, sel
));
9694 return vect_gen_perm_mask_any (vectype
, sel
);
9697 /* Given a vector variable X and Y, that was generated for the scalar
9698 STMT_INFO, generate instructions to permute the vector elements of X and Y
9699 using permutation mask MASK_VEC, insert them at *GSI and return the
9700 permuted vector variable. */
9703 permute_vec_elements (vec_info
*vinfo
,
9704 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
9705 gimple_stmt_iterator
*gsi
)
9707 tree vectype
= TREE_TYPE (x
);
9708 tree perm_dest
, data_ref
;
9711 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
9712 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
9713 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9715 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
9716 data_ref
= make_ssa_name (perm_dest
);
9718 /* Generate the permute statement. */
9719 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
9720 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
9725 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9726 inserting them on the loops preheader edge. Returns true if we
9727 were successful in doing so (and thus STMT_INFO can be moved then),
9728 otherwise returns false. HOIST_P indicates if we want to hoist the
9729 definitions of all SSA uses, it would be false when we are costing. */
9732 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
, bool hoist_p
)
9738 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
9740 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
9741 if (!gimple_nop_p (def_stmt
)
9742 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
9744 /* Make sure we don't need to recurse. While we could do
9745 so in simple cases when there are more complex use webs
9746 we don't have an easy way to preserve stmt order to fulfil
9747 dependencies within them. */
9750 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
9752 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
9754 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
9755 if (!gimple_nop_p (def_stmt2
)
9756 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
9769 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
9771 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
9772 if (!gimple_nop_p (def_stmt
)
9773 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
9775 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
9776 gsi_remove (&gsi
, false);
9777 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
9784 /* vectorizable_load.
9786 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9787 that can be vectorized.
9788 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9789 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9790 Return true if STMT_INFO is vectorizable in this way. */
9793 vectorizable_load (vec_info
*vinfo
,
9794 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9795 gimple
**vec_stmt
, slp_tree slp_node
,
9796 stmt_vector_for_cost
*cost_vec
)
9799 tree vec_dest
= NULL
;
9800 tree data_ref
= NULL
;
9801 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
9802 class loop
*loop
= NULL
;
9803 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
9804 bool nested_in_vect_loop
= false;
9806 /* Avoid false positive uninitialized warning, see PR110652. */
9807 tree new_temp
= NULL_TREE
;
9810 tree dataref_ptr
= NULL_TREE
;
9811 tree dataref_offset
= NULL_TREE
;
9812 gimple
*ptr_incr
= NULL
;
9815 unsigned int group_size
;
9816 poly_uint64 group_gap_adj
;
9817 tree msq
= NULL_TREE
, lsq
;
9818 tree realignment_token
= NULL_TREE
;
9820 vec
<tree
> dr_chain
= vNULL
;
9821 bool grouped_load
= false;
9822 stmt_vec_info first_stmt_info
;
9823 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
9824 bool compute_in_loop
= false;
9825 class loop
*at_loop
;
9827 bool slp
= (slp_node
!= NULL
);
9828 bool slp_perm
= false;
9829 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
9832 gather_scatter_info gs_info
;
9834 enum vect_def_type mask_dt
= vect_unknown_def_type
;
9836 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9839 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
9843 if (!STMT_VINFO_DATA_REF (stmt_info
))
9846 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
9847 int mask_index
= -1;
9848 slp_tree slp_op
= NULL
;
9849 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
9851 scalar_dest
= gimple_assign_lhs (assign
);
9852 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
9855 tree_code code
= gimple_assign_rhs_code (assign
);
9856 if (code
!= ARRAY_REF
9857 && code
!= BIT_FIELD_REF
9858 && code
!= INDIRECT_REF
9859 && code
!= COMPONENT_REF
9860 && code
!= IMAGPART_EXPR
9861 && code
!= REALPART_EXPR
9863 && TREE_CODE_CLASS (code
) != tcc_declaration
)
9868 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
9869 if (!call
|| !gimple_call_internal_p (call
))
9872 internal_fn ifn
= gimple_call_internal_fn (call
);
9873 if (!internal_load_fn_p (ifn
))
9876 scalar_dest
= gimple_call_lhs (call
);
9880 mask_index
= internal_fn_mask_index (ifn
);
9881 if (mask_index
>= 0 && slp_node
)
9882 mask_index
= vect_slp_child_index_for_operand
9883 (call
, mask_index
, STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
9885 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
9886 &mask
, &slp_op
, &mask_dt
, &mask_vectype
))
9890 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9891 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
9895 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
9896 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
9897 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
9902 /* Multiple types in SLP are handled by creating the appropriate number of
9903 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9908 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9910 gcc_assert (ncopies
>= 1);
9912 /* FORNOW. This restriction should be relaxed. */
9913 if (nested_in_vect_loop
9914 && (ncopies
> 1 || (slp
&& SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) > 1)))
9916 if (dump_enabled_p ())
9917 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9918 "multiple types in nested loop.\n");
9922 /* Invalidate assumptions made by dependence analysis when vectorization
9923 on the unrolled body effectively re-orders stmts. */
9925 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
9926 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
9927 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
9929 if (dump_enabled_p ())
9930 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9931 "cannot perform implicit CSE when unrolling "
9932 "with negative dependence distance\n");
9936 elem_type
= TREE_TYPE (vectype
);
9937 mode
= TYPE_MODE (vectype
);
9939 /* FORNOW. In some cases can vectorize even if data-type not supported
9940 (e.g. - data copies). */
9941 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
9943 if (dump_enabled_p ())
9944 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9945 "Aligned load, but unsupported type.\n");
9949 /* Check if the load is a part of an interleaving chain. */
9950 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
9952 grouped_load
= true;
9954 gcc_assert (!nested_in_vect_loop
);
9955 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
9957 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9958 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9960 /* Refuse non-SLP vectorization of SLP-only groups. */
9961 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
9963 if (dump_enabled_p ())
9964 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9965 "cannot vectorize load in non-SLP mode.\n");
9969 /* Invalidate assumptions made by dependence analysis when vectorization
9970 on the unrolled body effectively re-orders stmts. */
9971 if (STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
9972 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
9973 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
9975 if (dump_enabled_p ())
9976 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9977 "cannot perform implicit CSE when performing "
9978 "group loads with negative dependence distance\n");
9985 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9991 /* In BB vectorization we may not actually use a loaded vector
9992 accessing elements in excess of DR_GROUP_SIZE. */
9993 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9994 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
9995 unsigned HOST_WIDE_INT nunits
;
9996 unsigned j
, k
, maxk
= 0;
9997 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
10000 tree vectype
= SLP_TREE_VECTYPE (slp_node
);
10001 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
10002 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
10004 if (dump_enabled_p ())
10005 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10006 "BB vectorization with gaps at the end of "
10007 "a load is not supported\n");
10012 auto_vec
<tree
> tem
;
10014 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
10017 if (dump_enabled_p ())
10018 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
10020 "unsupported load permutation\n");
10025 vect_memory_access_type memory_access_type
;
10026 enum dr_alignment_support alignment_support_scheme
;
10028 poly_int64 poffset
;
10029 internal_fn lanes_ifn
;
10030 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, VLS_LOAD
,
10031 ncopies
, &memory_access_type
, &poffset
,
10032 &alignment_support_scheme
, &misalignment
, &gs_info
,
10038 if (memory_access_type
== VMAT_CONTIGUOUS
)
10040 machine_mode vec_mode
= TYPE_MODE (vectype
);
10041 if (!VECTOR_MODE_P (vec_mode
)
10042 || !can_vec_mask_load_store_p (vec_mode
,
10043 TYPE_MODE (mask_vectype
), true))
10046 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
10047 && memory_access_type
!= VMAT_GATHER_SCATTER
)
10049 if (dump_enabled_p ())
10050 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10051 "unsupported access type for masked load.\n");
10054 else if (memory_access_type
== VMAT_GATHER_SCATTER
10055 && gs_info
.ifn
== IFN_LAST
10058 if (dump_enabled_p ())
10059 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10060 "unsupported masked emulated gather.\n");
10063 else if (memory_access_type
== VMAT_ELEMENTWISE
10064 || memory_access_type
== VMAT_STRIDED_SLP
)
10066 if (dump_enabled_p ())
10067 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10068 "unsupported masked strided access.\n");
10073 bool costing_p
= !vec_stmt
;
10075 if (costing_p
) /* transformation not required. */
10079 && !vect_maybe_update_slp_op_vectype (slp_op
,
10082 if (dump_enabled_p ())
10083 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10084 "incompatible vector types for invariants\n");
10089 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
10092 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
10093 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
10094 VLS_LOAD
, group_size
,
10095 memory_access_type
, &gs_info
,
10098 if (dump_enabled_p ()
10099 && memory_access_type
!= VMAT_ELEMENTWISE
10100 && memory_access_type
!= VMAT_GATHER_SCATTER
10101 && alignment_support_scheme
!= dr_aligned
)
10102 dump_printf_loc (MSG_NOTE
, vect_location
,
10103 "Vectorizing an unaligned access.\n");
10105 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
10106 vinfo
->any_known_not_updated_vssa
= true;
10108 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
10112 gcc_assert (memory_access_type
10113 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
10115 if (dump_enabled_p () && !costing_p
)
10116 dump_printf_loc (MSG_NOTE
, vect_location
,
10117 "transform load. ncopies = %d\n", ncopies
);
10121 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
10122 ensure_base_align (dr_info
);
10124 if (memory_access_type
== VMAT_INVARIANT
)
10126 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
10127 /* If we have versioned for aliasing or the loop doesn't
10128 have any data dependencies that would preclude this,
10129 then we are sure this is a loop invariant load and
10130 thus we can insert it on the preheader edge.
10131 TODO: hoist_defs_of_uses should ideally be computed
10132 once at analysis time, remembered and used in the
10134 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
10135 && !nested_in_vect_loop
10136 && hoist_defs_of_uses (stmt_info
, loop
, !costing_p
));
10139 enum vect_cost_model_location cost_loc
10140 = hoist_p
? vect_prologue
: vect_body
;
10141 unsigned int cost
= record_stmt_cost (cost_vec
, 1, scalar_load
,
10142 stmt_info
, 0, cost_loc
);
10143 cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
, stmt_info
, 0,
10145 unsigned int prologue_cost
= hoist_p
? cost
: 0;
10146 unsigned int inside_cost
= hoist_p
? 0 : cost
;
10147 if (dump_enabled_p ())
10148 dump_printf_loc (MSG_NOTE
, vect_location
,
10149 "vect_model_load_cost: inside_cost = %d, "
10150 "prologue_cost = %d .\n",
10151 inside_cost
, prologue_cost
);
10156 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
10157 if (dump_enabled_p ())
10158 dump_printf_loc (MSG_NOTE
, vect_location
,
10159 "hoisting out of the vectorized loop: %G",
10161 scalar_dest
= copy_ssa_name (scalar_dest
);
10162 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
10163 edge pe
= loop_preheader_edge (loop
);
10164 gphi
*vphi
= get_virtual_phi (loop
->header
);
10167 vuse
= PHI_ARG_DEF_FROM_EDGE (vphi
, pe
);
10169 vuse
= gimple_vuse (gsi_stmt (*gsi
));
10170 gimple
*new_stmt
= gimple_build_assign (scalar_dest
, rhs
);
10171 gimple_set_vuse (new_stmt
, vuse
);
10172 gsi_insert_on_edge_immediate (pe
, new_stmt
);
10174 /* These copies are all equivalent. */
10176 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
10180 gimple_stmt_iterator gsi2
= *gsi
;
10182 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
10185 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
10187 for (j
= 0; j
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
); ++j
)
10188 slp_node
->push_vec_def (new_stmt
);
10191 for (j
= 0; j
< ncopies
; ++j
)
10192 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10193 *vec_stmt
= new_stmt
;
10198 if (memory_access_type
== VMAT_ELEMENTWISE
10199 || memory_access_type
== VMAT_STRIDED_SLP
)
10201 gimple_stmt_iterator incr_gsi
;
10206 vec
<constructor_elt
, va_gc
> *v
= NULL
;
10207 tree stride_base
, stride_step
, alias_off
;
10208 /* Checked by get_load_store_type. */
10209 unsigned int const_nunits
= nunits
.to_constant ();
10210 unsigned HOST_WIDE_INT cst_offset
= 0;
10212 unsigned int inside_cost
= 0;
10214 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
));
10215 gcc_assert (!nested_in_vect_loop
);
10219 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
10220 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
10224 first_stmt_info
= stmt_info
;
10225 first_dr_info
= dr_info
;
10228 if (slp
&& grouped_load
)
10230 group_size
= DR_GROUP_SIZE (first_stmt_info
);
10231 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
10237 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
10238 * vect_get_place_in_interleaving_chain (stmt_info
,
10241 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
10246 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
10247 stride_base
= fold_build_pointer_plus (
10248 DR_BASE_ADDRESS (first_dr_info
->dr
),
10249 size_binop (PLUS_EXPR
, convert_to_ptrofftype (dr_offset
),
10250 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
10251 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
10253 /* For a load with loop-invariant (but other than power-of-2)
10254 stride (i.e. not a grouped access) like so:
10256 for (i = 0; i < n; i += stride)
10259 we generate a new induction variable and new accesses to
10260 form a new vector (or vectors, depending on ncopies):
10262 for (j = 0; ; j += VF*stride)
10264 tmp2 = array[j + stride];
10266 vectemp = {tmp1, tmp2, ...}
10269 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
10270 build_int_cst (TREE_TYPE (stride_step
), vf
));
10272 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
10274 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
10275 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
10276 create_iv (stride_base
, PLUS_EXPR
, ivstep
, NULL
,
10277 loop
, &incr_gsi
, insert_after
,
10280 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
10283 running_off
= offvar
;
10284 alias_off
= build_int_cst (ref_type
, 0);
10285 int nloads
= const_nunits
;
10287 tree ltype
= TREE_TYPE (vectype
);
10288 tree lvectype
= vectype
;
10289 auto_vec
<tree
> dr_chain
;
10290 if (memory_access_type
== VMAT_STRIDED_SLP
)
10292 HOST_WIDE_INT n
= gcd (group_size
, const_nunits
);
10293 /* Use the target vector type if the group size is a multiple
10295 if (n
== const_nunits
)
10298 lnel
= const_nunits
;
10301 /* Else use the biggest vector we can load the group without
10302 accessing excess elements. */
10307 = vector_vector_composition_type (vectype
, const_nunits
/ n
,
10309 if (vtype
!= NULL_TREE
)
10311 nloads
= const_nunits
/ n
;
10317 /* Else fall back to the default element-wise access. */
10318 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
10320 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10321 else if (nloads
== 1)
10326 /* For SLP permutation support we need to load the whole group,
10327 not only the number of vector stmts the permutation result
10331 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10333 unsigned int const_vf
= vf
.to_constant ();
10334 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
10335 dr_chain
.create (ncopies
);
10338 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10340 unsigned int group_el
= 0;
10341 unsigned HOST_WIDE_INT
10342 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
10343 unsigned int n_groups
= 0;
10344 /* For costing some adjacent vector loads, we'd like to cost with
10345 the total number of them once instead of cost each one by one. */
10346 unsigned int n_adjacent_loads
= 0;
10347 for (j
= 0; j
< ncopies
; j
++)
10349 if (nloads
> 1 && !costing_p
)
10350 vec_alloc (v
, nloads
);
10351 gimple
*new_stmt
= NULL
;
10352 for (i
= 0; i
< nloads
; i
++)
10356 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10357 avoid ICE, see PR110776. */
10358 if (VECTOR_TYPE_P (ltype
)
10359 && memory_access_type
!= VMAT_ELEMENTWISE
)
10360 n_adjacent_loads
++;
10362 inside_cost
+= record_stmt_cost (cost_vec
, 1, scalar_load
,
10363 stmt_info
, 0, vect_body
);
10366 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
10367 group_el
* elsz
+ cst_offset
);
10368 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
10369 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
10370 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
10371 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10373 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
10374 gimple_assign_lhs (new_stmt
));
10378 || group_el
== group_size
)
10381 /* When doing SLP make sure to not load elements from
10382 the next vector iteration, those will not be accessed
10383 so just use the last element again. See PR107451. */
10384 if (!slp
|| known_lt (n_groups
, vf
))
10386 tree newoff
= copy_ssa_name (running_off
);
10388 = gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
10389 running_off
, stride_step
);
10390 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
10391 running_off
= newoff
;
10400 inside_cost
+= record_stmt_cost (cost_vec
, 1, vec_construct
,
10401 stmt_info
, 0, vect_body
);
10404 tree vec_inv
= build_constructor (lvectype
, v
);
10405 new_temp
= vect_init_vector (vinfo
, stmt_info
, vec_inv
,
10407 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
10408 if (lvectype
!= vectype
)
10411 = gimple_build_assign (make_ssa_name (vectype
),
10413 build1 (VIEW_CONVERT_EXPR
,
10414 vectype
, new_temp
));
10415 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
10426 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
10428 slp_node
->push_vec_def (new_stmt
);
10433 *vec_stmt
= new_stmt
;
10434 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10444 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
, vf
,
10445 true, &n_perms
, &n_loads
);
10446 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
10447 first_stmt_info
, 0, vect_body
);
10450 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
10456 if (n_adjacent_loads
> 0)
10457 vect_get_load_cost (vinfo
, stmt_info
, n_adjacent_loads
,
10458 alignment_support_scheme
, misalignment
, false,
10459 &inside_cost
, nullptr, cost_vec
, cost_vec
,
10461 if (dump_enabled_p ())
10462 dump_printf_loc (MSG_NOTE
, vect_location
,
10463 "vect_model_load_cost: inside_cost = %u, "
10464 "prologue_cost = 0 .\n",
10471 if (memory_access_type
== VMAT_GATHER_SCATTER
10472 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
10473 grouped_load
= false;
10476 || (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()))
10480 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
10481 group_size
= DR_GROUP_SIZE (first_stmt_info
);
10485 first_stmt_info
= stmt_info
;
10488 /* For SLP vectorization we directly vectorize a subchain
10489 without permutation. */
10490 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
10491 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
10492 /* For BB vectorization always use the first stmt to base
10493 the data ref pointer on. */
10495 first_stmt_info_for_drptr
10496 = vect_find_first_scalar_stmt_in_slp (slp_node
);
10498 /* Check if the chain of loads is already vectorized. */
10499 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
10500 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10501 ??? But we can only do so if there is exactly one
10502 as we have no way to get at the rest. Leave the CSE
10504 ??? With the group load eventually participating
10505 in multiple different permutations (having multiple
10506 slp nodes which refer to the same group) the CSE
10507 is even wrong code. See PR56270. */
10510 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10513 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
10516 /* VEC_NUM is the number of vect stmts to be created for this group. */
10519 grouped_load
= false;
10520 /* If an SLP permutation is from N elements to N elements,
10521 and if one vector holds a whole number of N, we can load
10522 the inputs to the permutation in the same way as an
10523 unpermuted sequence. In other cases we need to load the
10524 whole group, not only the number of vector stmts the
10525 permutation result fits in. */
10526 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
10527 if (nested_in_vect_loop
)
10528 /* We do not support grouped accesses in a nested loop,
10529 instead the access is contiguous but it might be
10530 permuted. No gap adjustment is needed though. */
10531 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10533 && (group_size
!= scalar_lanes
10534 || !multiple_p (nunits
, group_size
)))
10536 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10537 variable VF; see vect_transform_slp_perm_load. */
10538 unsigned int const_vf
= vf
.to_constant ();
10539 unsigned int const_nunits
= nunits
.to_constant ();
10540 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
10541 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
10545 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10547 = group_size
- scalar_lanes
;
10551 vec_num
= group_size
;
10553 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
10557 first_stmt_info
= stmt_info
;
10558 first_dr_info
= dr_info
;
10559 group_size
= vec_num
= 1;
10561 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
10563 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10566 gcc_assert (alignment_support_scheme
);
10567 vec_loop_masks
*loop_masks
10568 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
10569 ? &LOOP_VINFO_MASKS (loop_vinfo
)
10571 vec_loop_lens
*loop_lens
10572 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
10573 ? &LOOP_VINFO_LENS (loop_vinfo
)
10576 /* The vect_transform_stmt and vect_analyze_stmt will go here but there
10577 are some difference here. We cannot enable both the lens and masks
10578 during transform but it is allowed during analysis.
10579 Shouldn't go with length-based approach if fully masked. */
10580 if (cost_vec
== NULL
)
10581 /* The cost_vec is NULL during transfrom. */
10582 gcc_assert ((!loop_lens
|| !loop_masks
));
10584 /* Targets with store-lane instructions must not require explicit
10585 realignment. vect_supportable_dr_alignment always returns either
10586 dr_aligned or dr_unaligned_supported for masked operations. */
10587 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
10590 || alignment_support_scheme
== dr_aligned
10591 || alignment_support_scheme
== dr_unaligned_supported
);
10593 /* In case the vectorization factor (VF) is bigger than the number
10594 of elements that we can fit in a vectype (nunits), we have to generate
10595 more than one vector stmt - i.e - we need to "unroll" the
10596 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10597 from one copy of the vector stmt to the next, in the field
10598 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10599 stages to find the correct vector defs to be used when vectorizing
10600 stmts that use the defs of the current stmt. The example below
10601 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10602 need to create 4 vectorized stmts):
10604 before vectorization:
10605 RELATED_STMT VEC_STMT
10609 step 1: vectorize stmt S1:
10610 We first create the vector stmt VS1_0, and, as usual, record a
10611 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10612 Next, we create the vector stmt VS1_1, and record a pointer to
10613 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10614 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10615 stmts and pointers:
10616 RELATED_STMT VEC_STMT
10617 VS1_0: vx0 = memref0 VS1_1 -
10618 VS1_1: vx1 = memref1 VS1_2 -
10619 VS1_2: vx2 = memref2 VS1_3 -
10620 VS1_3: vx3 = memref3 - -
10621 S1: x = load - VS1_0
10625 /* In case of interleaving (non-unit grouped access):
10632 Vectorized loads are created in the order of memory accesses
10633 starting from the access of the first stmt of the chain:
10636 VS2: vx1 = &base + vec_size*1
10637 VS3: vx3 = &base + vec_size*2
10638 VS4: vx4 = &base + vec_size*3
10640 Then permutation statements are generated:
10642 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10643 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10646 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10647 (the order of the data-refs in the output of vect_permute_load_chain
10648 corresponds to the order of scalar stmts in the interleaving chain - see
10649 the documentation of vect_permute_load_chain()).
10650 The generation of permutation stmts and recording them in
10651 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10653 In case of both multiple types and interleaving, the vector loads and
10654 permutation stmts above are created for every copy. The result vector
10655 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10656 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10658 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10659 on a target that supports unaligned accesses (dr_unaligned_supported)
10660 we generate the following code:
10664 p = p + indx * vectype_size;
10669 Otherwise, the data reference is potentially unaligned on a target that
10670 does not support unaligned accesses (dr_explicit_realign_optimized) -
10671 then generate the following code, in which the data in each iteration is
10672 obtained by two vector loads, one from the previous iteration, and one
10673 from the current iteration:
10675 msq_init = *(floor(p1))
10676 p2 = initial_addr + VS - 1;
10677 realignment_token = call target_builtin;
10680 p2 = p2 + indx * vectype_size
10682 vec_dest = realign_load (msq, lsq, realignment_token)
10687 /* If the misalignment remains the same throughout the execution of the
10688 loop, we can create the init_addr and permutation mask at the loop
10689 preheader. Otherwise, it needs to be created inside the loop.
10690 This can only occur when vectorizing memory accesses in the inner-loop
10691 nested within an outer-loop that is being vectorized. */
10693 if (nested_in_vect_loop
10694 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
10695 GET_MODE_SIZE (TYPE_MODE (vectype
))))
10697 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
10698 compute_in_loop
= true;
10701 bool diff_first_stmt_info
10702 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
10704 tree offset
= NULL_TREE
;
10705 if ((alignment_support_scheme
== dr_explicit_realign_optimized
10706 || alignment_support_scheme
== dr_explicit_realign
)
10707 && !compute_in_loop
)
10709 /* If we have different first_stmt_info, we can't set up realignment
10710 here, since we can't guarantee first_stmt_info DR has been
10711 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10712 distance from first_stmt_info DR instead as below. */
10715 if (!diff_first_stmt_info
)
10716 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
10717 &realignment_token
,
10718 alignment_support_scheme
, NULL_TREE
,
10720 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
10722 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
10723 offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
10725 gcc_assert (!first_stmt_info_for_drptr
);
10732 if (!known_eq (poffset
, 0))
10734 ? size_binop (PLUS_EXPR
, offset
, size_int (poffset
))
10735 : size_int (poffset
));
10738 tree vec_offset
= NULL_TREE
;
10739 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10741 aggr_type
= NULL_TREE
;
10744 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
10746 aggr_type
= elem_type
;
10748 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, gsi
, &gs_info
,
10749 &bump
, &vec_offset
, loop_lens
);
10753 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
10754 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
10756 aggr_type
= vectype
;
10757 bump
= vect_get_data_ptr_increment (vinfo
, gsi
, dr_info
, aggr_type
,
10758 memory_access_type
, loop_lens
);
10761 auto_vec
<tree
> vec_offsets
;
10762 auto_vec
<tree
> vec_masks
;
10763 if (mask
&& !costing_p
)
10766 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[mask_index
],
10769 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
, mask
,
10770 &vec_masks
, mask_vectype
);
10773 tree vec_mask
= NULL_TREE
;
10774 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
10776 gcc_assert (alignment_support_scheme
== dr_aligned
10777 || alignment_support_scheme
== dr_unaligned_supported
);
10778 gcc_assert (grouped_load
&& !slp
);
10780 unsigned int inside_cost
= 0, prologue_cost
= 0;
10781 /* For costing some adjacent vector loads, we'd like to cost with
10782 the total number of them once instead of cost each one by one. */
10783 unsigned int n_adjacent_loads
= 0;
10784 for (j
= 0; j
< ncopies
; j
++)
10788 /* An IFN_LOAD_LANES will load all its vector results,
10789 regardless of which ones we actually need. Account
10790 for the cost of unused results. */
10791 if (first_stmt_info
== stmt_info
)
10793 unsigned int gaps
= DR_GROUP_SIZE (first_stmt_info
);
10794 stmt_vec_info next_stmt_info
= first_stmt_info
;
10798 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
10800 while (next_stmt_info
);
10803 if (dump_enabled_p ())
10804 dump_printf_loc (MSG_NOTE
, vect_location
,
10805 "vect_model_load_cost: %d "
10806 "unused vectors.\n",
10808 vect_get_load_cost (vinfo
, stmt_info
, gaps
,
10809 alignment_support_scheme
,
10810 misalignment
, false, &inside_cost
,
10811 &prologue_cost
, cost_vec
, cost_vec
,
10815 n_adjacent_loads
++;
10819 /* 1. Create the vector or array pointer update chain. */
10822 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
10823 at_loop
, offset
, &dummy
, gsi
,
10824 &ptr_incr
, false, bump
);
10827 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
10828 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
10832 vec_mask
= vec_masks
[j
];
10834 tree vec_array
= create_vector_array (vectype
, vec_num
);
10836 tree final_mask
= NULL_TREE
;
10837 tree final_len
= NULL_TREE
;
10838 tree bias
= NULL_TREE
;
10840 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
10841 ncopies
, vectype
, j
);
10843 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
10846 if (lanes_ifn
== IFN_MASK_LEN_LOAD_LANES
)
10849 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
10850 ncopies
, vectype
, j
, 1);
10852 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
10853 signed char biasval
10854 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
10855 bias
= build_int_cst (intQI_type_node
, biasval
);
10858 mask_vectype
= truth_type_for (vectype
);
10859 final_mask
= build_minus_one_cst (mask_vectype
);
10864 if (final_len
&& final_mask
)
10867 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10868 VEC_MASK, LEN, BIAS). */
10869 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
10870 tree alias_ptr
= build_int_cst (ref_type
, align
);
10871 call
= gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES
, 5,
10872 dataref_ptr
, alias_ptr
,
10873 final_mask
, final_len
, bias
);
10875 else if (final_mask
)
10878 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10880 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
10881 tree alias_ptr
= build_int_cst (ref_type
, align
);
10882 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
10883 dataref_ptr
, alias_ptr
,
10889 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10890 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
10891 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
10893 gimple_call_set_lhs (call
, vec_array
);
10894 gimple_call_set_nothrow (call
, true);
10895 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
10897 dr_chain
.create (vec_num
);
10898 /* Extract each vector into an SSA_NAME. */
10899 for (i
= 0; i
< vec_num
; i
++)
10901 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
10903 dr_chain
.quick_push (new_temp
);
10906 /* Record the mapping between SSA_NAMEs and statements. */
10907 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
10909 /* Record that VEC_ARRAY is now dead. */
10910 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
10912 dr_chain
.release ();
10914 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10919 if (n_adjacent_loads
> 0)
10920 vect_get_load_cost (vinfo
, stmt_info
, n_adjacent_loads
,
10921 alignment_support_scheme
, misalignment
, false,
10922 &inside_cost
, &prologue_cost
, cost_vec
,
10924 if (dump_enabled_p ())
10925 dump_printf_loc (MSG_NOTE
, vect_location
,
10926 "vect_model_load_cost: inside_cost = %u, "
10927 "prologue_cost = %u .\n",
10928 inside_cost
, prologue_cost
);
10934 if (memory_access_type
== VMAT_GATHER_SCATTER
)
10936 gcc_assert (alignment_support_scheme
== dr_aligned
10937 || alignment_support_scheme
== dr_unaligned_supported
);
10938 gcc_assert (!grouped_load
&& !slp_perm
);
10940 unsigned int inside_cost
= 0, prologue_cost
= 0;
10941 for (j
= 0; j
< ncopies
; j
++)
10943 /* 1. Create the vector or array pointer update chain. */
10944 if (j
== 0 && !costing_p
)
10946 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10947 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
10948 slp_node
, &gs_info
, &dataref_ptr
,
10952 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
10953 at_loop
, offset
, &dummy
, gsi
,
10954 &ptr_incr
, false, bump
);
10956 else if (!costing_p
)
10958 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
10959 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10960 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10961 gsi
, stmt_info
, bump
);
10964 gimple
*new_stmt
= NULL
;
10965 for (i
= 0; i
< vec_num
; i
++)
10967 tree final_mask
= NULL_TREE
;
10968 tree final_len
= NULL_TREE
;
10969 tree bias
= NULL_TREE
;
10973 vec_mask
= vec_masks
[vec_num
* j
+ i
];
10976 = vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
10977 vec_num
* ncopies
, vectype
,
10980 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
10981 final_mask
, vec_mask
, gsi
);
10983 if (i
> 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10984 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10985 gsi
, stmt_info
, bump
);
10988 /* 2. Create the vector-load in the loop. */
10989 unsigned HOST_WIDE_INT align
;
10990 if (gs_info
.ifn
!= IFN_LAST
)
10994 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
10996 = record_stmt_cost (cost_vec
, cnunits
, scalar_load
,
10997 stmt_info
, 0, vect_body
);
11000 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
11001 vec_offset
= vec_offsets
[vec_num
* j
+ i
];
11002 tree zero
= build_zero_cst (vectype
);
11003 tree scale
= size_int (gs_info
.scale
);
11005 if (gs_info
.ifn
== IFN_MASK_LEN_GATHER_LOAD
)
11009 = vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
11010 vec_num
* ncopies
, vectype
,
11011 vec_num
* j
+ i
, 1);
11014 = build_int_cst (sizetype
,
11015 TYPE_VECTOR_SUBPARTS (vectype
));
11016 signed char biasval
11017 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
11018 bias
= build_int_cst (intQI_type_node
, biasval
);
11021 mask_vectype
= truth_type_for (vectype
);
11022 final_mask
= build_minus_one_cst (mask_vectype
);
11027 if (final_len
&& final_mask
)
11029 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD
, 7,
11030 dataref_ptr
, vec_offset
,
11031 scale
, zero
, final_mask
,
11033 else if (final_mask
)
11034 call
= gimple_build_call_internal (IFN_MASK_GATHER_LOAD
, 5,
11035 dataref_ptr
, vec_offset
,
11036 scale
, zero
, final_mask
);
11038 call
= gimple_build_call_internal (IFN_GATHER_LOAD
, 4,
11039 dataref_ptr
, vec_offset
,
11041 gimple_call_set_nothrow (call
, true);
11043 data_ref
= NULL_TREE
;
11045 else if (gs_info
.decl
)
11047 /* The builtin decls path for gather is legacy, x86 only. */
11048 gcc_assert (!final_len
&& nunits
.is_constant ());
11051 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
11053 = record_stmt_cost (cost_vec
, cnunits
, scalar_load
,
11054 stmt_info
, 0, vect_body
);
11057 poly_uint64 offset_nunits
11058 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
11059 if (known_eq (nunits
, offset_nunits
))
11061 new_stmt
= vect_build_one_gather_load_call
11062 (vinfo
, stmt_info
, gsi
, &gs_info
,
11063 dataref_ptr
, vec_offsets
[vec_num
* j
+ i
],
11065 data_ref
= NULL_TREE
;
11067 else if (known_eq (nunits
, offset_nunits
* 2))
11069 /* We have a offset vector with half the number of
11070 lanes but the builtins will produce full vectype
11071 data with just the lower lanes filled. */
11072 new_stmt
= vect_build_one_gather_load_call
11073 (vinfo
, stmt_info
, gsi
, &gs_info
,
11074 dataref_ptr
, vec_offsets
[2 * vec_num
* j
+ 2 * i
],
11076 tree low
= make_ssa_name (vectype
);
11077 gimple_set_lhs (new_stmt
, low
);
11078 vect_finish_stmt_generation (vinfo
, stmt_info
,
11081 /* now put upper half of final_mask in final_mask low. */
11083 && !SCALAR_INT_MODE_P
11084 (TYPE_MODE (TREE_TYPE (final_mask
))))
11086 int count
= nunits
.to_constant ();
11087 vec_perm_builder
sel (count
, count
, 1);
11088 sel
.quick_grow (count
);
11089 for (int i
= 0; i
< count
; ++i
)
11090 sel
[i
] = i
| (count
/ 2);
11091 vec_perm_indices
indices (sel
, 2, count
);
11092 tree perm_mask
= vect_gen_perm_mask_checked
11093 (TREE_TYPE (final_mask
), indices
);
11094 new_stmt
= gimple_build_assign (NULL_TREE
,
11099 final_mask
= make_ssa_name (TREE_TYPE (final_mask
));
11100 gimple_set_lhs (new_stmt
, final_mask
);
11101 vect_finish_stmt_generation (vinfo
, stmt_info
,
11104 else if (final_mask
)
11106 new_stmt
= gimple_build_assign (NULL_TREE
,
11107 VEC_UNPACK_HI_EXPR
,
11109 final_mask
= make_ssa_name
11110 (truth_type_for (gs_info
.offset_vectype
));
11111 gimple_set_lhs (new_stmt
, final_mask
);
11112 vect_finish_stmt_generation (vinfo
, stmt_info
,
11116 new_stmt
= vect_build_one_gather_load_call
11117 (vinfo
, stmt_info
, gsi
, &gs_info
,
11119 vec_offsets
[2 * vec_num
* j
+ 2 * i
+ 1],
11121 tree high
= make_ssa_name (vectype
);
11122 gimple_set_lhs (new_stmt
, high
);
11123 vect_finish_stmt_generation (vinfo
, stmt_info
,
11126 /* compose low + high. */
11127 int count
= nunits
.to_constant ();
11128 vec_perm_builder
sel (count
, count
, 1);
11129 sel
.quick_grow (count
);
11130 for (int i
= 0; i
< count
; ++i
)
11131 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
11132 vec_perm_indices
indices (sel
, 2, count
);
11134 = vect_gen_perm_mask_checked (vectype
, indices
);
11135 new_stmt
= gimple_build_assign (NULL_TREE
,
11137 low
, high
, perm_mask
);
11138 data_ref
= NULL_TREE
;
11140 else if (known_eq (nunits
* 2, offset_nunits
))
11142 /* We have a offset vector with double the number of
11143 lanes. Select the low/high part accordingly. */
11144 vec_offset
= vec_offsets
[(vec_num
* j
+ i
) / 2];
11145 if ((vec_num
* j
+ i
) & 1)
11147 int count
= offset_nunits
.to_constant ();
11148 vec_perm_builder
sel (count
, count
, 1);
11149 sel
.quick_grow (count
);
11150 for (int i
= 0; i
< count
; ++i
)
11151 sel
[i
] = i
| (count
/ 2);
11152 vec_perm_indices
indices (sel
, 2, count
);
11153 tree perm_mask
= vect_gen_perm_mask_checked
11154 (TREE_TYPE (vec_offset
), indices
);
11155 new_stmt
= gimple_build_assign (NULL_TREE
,
11160 vec_offset
= make_ssa_name (TREE_TYPE (vec_offset
));
11161 gimple_set_lhs (new_stmt
, vec_offset
);
11162 vect_finish_stmt_generation (vinfo
, stmt_info
,
11165 new_stmt
= vect_build_one_gather_load_call
11166 (vinfo
, stmt_info
, gsi
, &gs_info
,
11167 dataref_ptr
, vec_offset
, final_mask
);
11168 data_ref
= NULL_TREE
;
11171 gcc_unreachable ();
11175 /* Emulated gather-scatter. */
11176 gcc_assert (!final_mask
);
11177 unsigned HOST_WIDE_INT const_nunits
= nunits
.to_constant ();
11180 /* For emulated gathers N offset vector element
11181 offset add is consumed by the load). */
11182 inside_cost
= record_stmt_cost (cost_vec
, const_nunits
,
11183 vec_to_scalar
, stmt_info
,
11185 /* N scalar loads plus gathering them into a
11188 = record_stmt_cost (cost_vec
, const_nunits
, scalar_load
,
11189 stmt_info
, 0, vect_body
);
11191 = record_stmt_cost (cost_vec
, 1, vec_construct
,
11192 stmt_info
, 0, vect_body
);
11195 unsigned HOST_WIDE_INT const_offset_nunits
11196 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
)
11198 vec
<constructor_elt
, va_gc
> *ctor_elts
;
11199 vec_alloc (ctor_elts
, const_nunits
);
11200 gimple_seq stmts
= NULL
;
11201 /* We support offset vectors with more elements
11202 than the data vector for now. */
11203 unsigned HOST_WIDE_INT factor
11204 = const_offset_nunits
/ const_nunits
;
11205 vec_offset
= vec_offsets
[(vec_num
* j
+ i
) / factor
];
11206 unsigned elt_offset
11207 = ((vec_num
* j
+ i
) % factor
) * const_nunits
;
11208 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
11209 tree scale
= size_int (gs_info
.scale
);
11210 align
= get_object_alignment (DR_REF (first_dr_info
->dr
));
11211 tree ltype
= build_aligned_type (TREE_TYPE (vectype
), align
);
11212 for (unsigned k
= 0; k
< const_nunits
; ++k
)
11214 tree boff
= size_binop (MULT_EXPR
, TYPE_SIZE (idx_type
),
11215 bitsize_int (k
+ elt_offset
));
11217 = gimple_build (&stmts
, BIT_FIELD_REF
, idx_type
,
11218 vec_offset
, TYPE_SIZE (idx_type
), boff
);
11219 idx
= gimple_convert (&stmts
, sizetype
, idx
);
11220 idx
= gimple_build (&stmts
, MULT_EXPR
, sizetype
, idx
,
11222 tree ptr
= gimple_build (&stmts
, PLUS_EXPR
,
11223 TREE_TYPE (dataref_ptr
),
11225 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
11226 tree elt
= make_ssa_name (TREE_TYPE (vectype
));
11227 tree ref
= build2 (MEM_REF
, ltype
, ptr
,
11228 build_int_cst (ref_type
, 0));
11229 new_stmt
= gimple_build_assign (elt
, ref
);
11230 gimple_set_vuse (new_stmt
, gimple_vuse (gsi_stmt (*gsi
)));
11231 gimple_seq_add_stmt (&stmts
, new_stmt
);
11232 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
, elt
);
11234 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
11235 new_stmt
= gimple_build_assign (
11236 NULL_TREE
, build_constructor (vectype
, ctor_elts
));
11237 data_ref
= NULL_TREE
;
11240 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11241 /* DATA_REF is null if we've already built the statement. */
11244 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
11245 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
11247 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11248 gimple_set_lhs (new_stmt
, new_temp
);
11249 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11251 /* Store vector loads in the corresponding SLP_NODE. */
11253 slp_node
->push_vec_def (new_stmt
);
11256 if (!slp
&& !costing_p
)
11257 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
11260 if (!slp
&& !costing_p
)
11261 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11263 if (costing_p
&& dump_enabled_p ())
11264 dump_printf_loc (MSG_NOTE
, vect_location
,
11265 "vect_model_load_cost: inside_cost = %u, "
11266 "prologue_cost = %u .\n",
11267 inside_cost
, prologue_cost
);
11271 poly_uint64 group_elt
= 0;
11272 unsigned int inside_cost
= 0, prologue_cost
= 0;
11273 /* For costing some adjacent vector loads, we'd like to cost with
11274 the total number of them once instead of cost each one by one. */
11275 unsigned int n_adjacent_loads
= 0;
11276 for (j
= 0; j
< ncopies
; j
++)
11278 /* 1. Create the vector or array pointer update chain. */
11279 if (j
== 0 && !costing_p
)
11281 bool simd_lane_access_p
11282 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
11283 if (simd_lane_access_p
11284 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
11285 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
11286 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
11287 && integer_zerop (DR_INIT (first_dr_info
->dr
))
11288 && alias_sets_conflict_p (get_alias_set (aggr_type
),
11289 get_alias_set (TREE_TYPE (ref_type
)))
11290 && (alignment_support_scheme
== dr_aligned
11291 || alignment_support_scheme
== dr_unaligned_supported
))
11293 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
11294 dataref_offset
= build_int_cst (ref_type
, 0);
11296 else if (diff_first_stmt_info
)
11299 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
11300 aggr_type
, at_loop
, offset
, &dummy
,
11301 gsi
, &ptr_incr
, simd_lane_access_p
,
11303 /* Adjust the pointer by the difference to first_stmt. */
11304 data_reference_p ptrdr
11305 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
11307 = fold_convert (sizetype
,
11308 size_binop (MINUS_EXPR
,
11309 DR_INIT (first_dr_info
->dr
),
11311 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11313 if (alignment_support_scheme
== dr_explicit_realign
)
11315 msq
= vect_setup_realignment (vinfo
,
11316 first_stmt_info_for_drptr
, gsi
,
11317 &realignment_token
,
11318 alignment_support_scheme
,
11319 dataref_ptr
, &at_loop
);
11320 gcc_assert (!compute_in_loop
);
11325 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
11327 offset
, &dummy
, gsi
, &ptr_incr
,
11328 simd_lane_access_p
, bump
);
11330 else if (!costing_p
)
11332 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
11333 if (dataref_offset
)
11334 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
11337 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11341 if (grouped_load
|| slp_perm
)
11342 dr_chain
.create (vec_num
);
11344 gimple
*new_stmt
= NULL
;
11345 for (i
= 0; i
< vec_num
; i
++)
11347 tree final_mask
= NULL_TREE
;
11348 tree final_len
= NULL_TREE
;
11349 tree bias
= NULL_TREE
;
11353 vec_mask
= vec_masks
[vec_num
* j
+ i
];
11355 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
11356 vec_num
* ncopies
, vectype
,
11359 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
11360 final_mask
, vec_mask
, gsi
);
11363 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
11364 gsi
, stmt_info
, bump
);
11367 /* 2. Create the vector-load in the loop. */
11368 switch (alignment_support_scheme
)
11371 case dr_unaligned_supported
:
11376 unsigned int misalign
;
11377 unsigned HOST_WIDE_INT align
;
11378 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
11379 if (alignment_support_scheme
== dr_aligned
)
11381 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
11384 = dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
11388 misalign
= misalignment
;
11389 if (dataref_offset
== NULL_TREE
11390 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
11391 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
11393 align
= least_bit_hwi (misalign
| align
);
11395 /* Compute IFN when LOOP_LENS or final_mask valid. */
11396 machine_mode vmode
= TYPE_MODE (vectype
);
11397 machine_mode new_vmode
= vmode
;
11398 internal_fn partial_ifn
= IFN_LAST
;
11401 opt_machine_mode new_ovmode
11402 = get_len_load_store_mode (vmode
, true, &partial_ifn
);
11403 new_vmode
= new_ovmode
.require ();
11405 = (new_ovmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vmode
);
11406 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
11407 vec_num
* ncopies
, vectype
,
11408 vec_num
* j
+ i
, factor
);
11410 else if (final_mask
)
11412 if (!can_vec_mask_load_store_p (
11413 vmode
, TYPE_MODE (TREE_TYPE (final_mask
)), true,
11415 gcc_unreachable ();
11418 if (partial_ifn
== IFN_MASK_LEN_LOAD
)
11422 /* Pass VF value to 'len' argument of
11423 MASK_LEN_LOAD if LOOP_LENS is invalid. */
11424 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
11428 /* Pass all ones value to 'mask' argument of
11429 MASK_LEN_LOAD if final_mask is invalid. */
11430 mask_vectype
= truth_type_for (vectype
);
11431 final_mask
= build_minus_one_cst (mask_vectype
);
11436 signed char biasval
11437 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
11439 bias
= build_int_cst (intQI_type_node
, biasval
);
11444 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
11446 if (partial_ifn
== IFN_MASK_LEN_LOAD
)
11447 call
= gimple_build_call_internal (IFN_MASK_LEN_LOAD
, 5,
11449 final_mask
, final_len
,
11452 call
= gimple_build_call_internal (IFN_LEN_LOAD
, 4,
11455 gimple_call_set_nothrow (call
, true);
11457 data_ref
= NULL_TREE
;
11459 /* Need conversion if it's wrapped with VnQI. */
11460 if (vmode
!= new_vmode
)
11462 tree new_vtype
= build_vector_type_for_mode (
11463 unsigned_intQI_type_node
, new_vmode
);
11465 = vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
11466 gimple_set_lhs (call
, var
);
11467 vect_finish_stmt_generation (vinfo
, stmt_info
, call
,
11469 tree op
= build1 (VIEW_CONVERT_EXPR
, vectype
, var
);
11470 new_stmt
= gimple_build_assign (vec_dest
,
11471 VIEW_CONVERT_EXPR
, op
);
11474 else if (final_mask
)
11476 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
11477 gcall
*call
= gimple_build_call_internal (IFN_MASK_LOAD
, 3,
11480 gimple_call_set_nothrow (call
, true);
11482 data_ref
= NULL_TREE
;
11486 tree ltype
= vectype
;
11487 tree new_vtype
= NULL_TREE
;
11488 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
11489 unsigned int vect_align
11490 = vect_known_alignment_in_bytes (first_dr_info
, vectype
);
11491 /* Try to use a single smaller load when we are about
11492 to load excess elements compared to the unrolled
11494 if (known_gt ((vec_num
* j
+ i
+ 1) * nunits
,
11495 (group_size
* vf
- gap
)))
11497 poly_uint64 remain
= ((group_size
* vf
- gap
)
11498 - (vec_num
* j
+ i
) * nunits
);
11499 if (known_ge ((vec_num
* j
+ i
+ 1) * nunits
11500 - (group_size
* vf
- gap
), nunits
))
11501 /* DR will be unused. */
11503 else if (known_ge (vect_align
,
11504 tree_to_poly_uint64
11505 (TYPE_SIZE_UNIT (vectype
))))
11506 /* Aligned access to excess elements is OK if
11507 at least one element is accessed in the
11510 else if (known_gt (vect_align
,
11512 * vect_get_scalar_dr_size
11514 /* Aligned access to the gap area when there's
11515 at least one element in it is OK. */
11519 /* remain should now be > 0 and < nunits. */
11521 if (constant_multiple_p (nunits
, remain
, &num
))
11525 = vector_vector_composition_type (vectype
,
11531 /* Else use multiple loads or a masked load? */
11532 /* For loop vectorization we now should have
11533 an alternate type or LOOP_VINFO_PEELING_FOR_GAPS
11536 gcc_assert (new_vtype
11537 || LOOP_VINFO_PEELING_FOR_GAPS
11539 /* But still reduce the access size to the next
11540 required power-of-two so peeling a single
11541 scalar iteration is sufficient. */
11542 unsigned HOST_WIDE_INT cremain
;
11543 if (remain
.is_constant (&cremain
))
11545 unsigned HOST_WIDE_INT cpart_size
11546 = 1 << ceil_log2 (cremain
);
11547 if (known_gt (nunits
, cpart_size
)
11548 && constant_multiple_p (nunits
, cpart_size
,
11553 = vector_vector_composition_type (vectype
,
11563 = (dataref_offset
? dataref_offset
11564 : build_int_cst (ref_type
, 0));
11567 else if (ltype
!= vectype
11568 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
11570 poly_uint64 gap_offset
11571 = (tree_to_poly_uint64 (TYPE_SIZE_UNIT (vectype
))
11572 - tree_to_poly_uint64 (TYPE_SIZE_UNIT (ltype
)));
11573 tree gapcst
= build_int_cstu (ref_type
, gap_offset
);
11574 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
11579 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
11580 if (alignment_support_scheme
== dr_aligned
)
11583 TREE_TYPE (data_ref
)
11584 = build_aligned_type (TREE_TYPE (data_ref
),
11585 align
* BITS_PER_UNIT
);
11588 data_ref
= build_constructor (vectype
, NULL
);
11589 else if (ltype
!= vectype
)
11591 vect_copy_ref_info (data_ref
,
11592 DR_REF (first_dr_info
->dr
));
11593 tree tem
= make_ssa_name (ltype
);
11594 new_stmt
= gimple_build_assign (tem
, data_ref
);
11595 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
11598 vec
<constructor_elt
, va_gc
> *v
;
11599 /* We've computed 'num' above to statically two
11600 or via constant_multiple_p. */
11602 = (exact_div (tree_to_poly_uint64
11603 (TYPE_SIZE_UNIT (vectype
)),
11604 tree_to_poly_uint64
11605 (TYPE_SIZE_UNIT (ltype
)))
11607 vec_alloc (v
, num
);
11608 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
11611 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
11612 build_zero_cst (ltype
));
11613 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
11617 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
11619 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
11620 build_zero_cst (ltype
));
11622 gcc_assert (new_vtype
!= NULL_TREE
);
11623 if (new_vtype
== vectype
)
11624 new_stmt
= gimple_build_assign (
11625 vec_dest
, build_constructor (vectype
, v
));
11628 tree new_vname
= make_ssa_name (new_vtype
);
11629 new_stmt
= gimple_build_assign (
11630 new_vname
, build_constructor (new_vtype
, v
));
11631 vect_finish_stmt_generation (vinfo
, stmt_info
,
11633 new_stmt
= gimple_build_assign (
11635 build1 (VIEW_CONVERT_EXPR
, vectype
, new_vname
));
11641 case dr_explicit_realign
:
11647 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
11649 if (compute_in_loop
)
11650 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
11651 &realignment_token
,
11652 dr_explicit_realign
,
11653 dataref_ptr
, NULL
);
11655 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
11656 ptr
= copy_ssa_name (dataref_ptr
);
11658 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
11659 // For explicit realign the target alignment should be
11660 // known at compile time.
11661 unsigned HOST_WIDE_INT align
11662 = DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
11663 new_stmt
= gimple_build_assign (
11664 ptr
, BIT_AND_EXPR
, dataref_ptr
,
11665 build_int_cst (TREE_TYPE (dataref_ptr
),
11666 -(HOST_WIDE_INT
) align
));
11667 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11669 = build2 (MEM_REF
, vectype
, ptr
, build_int_cst (ref_type
, 0));
11670 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
11671 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11672 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
11673 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11674 gimple_assign_set_lhs (new_stmt
, new_temp
);
11675 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
11676 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11679 bump
= size_binop (MULT_EXPR
, vs
, TYPE_SIZE_UNIT (elem_type
));
11680 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
11681 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
, stmt_info
,
11683 new_stmt
= gimple_build_assign (
11684 NULL_TREE
, BIT_AND_EXPR
, ptr
,
11685 build_int_cst (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
11686 if (TREE_CODE (ptr
) == SSA_NAME
)
11687 ptr
= copy_ssa_name (ptr
, new_stmt
);
11689 ptr
= make_ssa_name (TREE_TYPE (ptr
), new_stmt
);
11690 gimple_assign_set_lhs (new_stmt
, ptr
);
11691 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11693 = build2 (MEM_REF
, vectype
, ptr
, build_int_cst (ref_type
, 0));
11696 case dr_explicit_realign_optimized
:
11700 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
11701 new_temp
= copy_ssa_name (dataref_ptr
);
11703 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
11704 // We should only be doing this if we know the target
11705 // alignment at compile time.
11706 unsigned HOST_WIDE_INT align
11707 = DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
11708 new_stmt
= gimple_build_assign (
11709 new_temp
, BIT_AND_EXPR
, dataref_ptr
,
11710 build_int_cst (TREE_TYPE (dataref_ptr
),
11711 -(HOST_WIDE_INT
) align
));
11712 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11713 data_ref
= build2 (MEM_REF
, vectype
, new_temp
,
11714 build_int_cst (ref_type
, 0));
11718 gcc_unreachable ();
11721 /* One common place to cost the above vect load for different
11722 alignment support schemes. */
11725 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11726 only need to take care of the first stmt, whose
11727 stmt_info is first_stmt_info, vec_num iterating on it
11728 will cover the cost for the remaining, it's consistent
11729 with transforming. For the prologue cost for realign,
11730 we only need to count it once for the whole group. */
11731 bool first_stmt_info_p
= first_stmt_info
== stmt_info
;
11732 bool add_realign_cost
= first_stmt_info_p
&& i
== 0;
11733 if (memory_access_type
== VMAT_CONTIGUOUS
11734 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
11735 || (memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
11736 && (!grouped_load
|| first_stmt_info_p
)))
11738 /* Leave realign cases alone to keep them simple. */
11739 if (alignment_support_scheme
== dr_explicit_realign_optimized
11740 || alignment_support_scheme
== dr_explicit_realign
)
11741 vect_get_load_cost (vinfo
, stmt_info
, 1,
11742 alignment_support_scheme
, misalignment
,
11743 add_realign_cost
, &inside_cost
,
11744 &prologue_cost
, cost_vec
, cost_vec
,
11747 n_adjacent_loads
++;
11752 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11753 /* DATA_REF is null if we've already built the statement. */
11756 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
11757 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
11759 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11760 gimple_set_lhs (new_stmt
, new_temp
);
11761 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11764 /* 3. Handle explicit realignment if necessary/supported.
11766 vec_dest = realign_load (msq, lsq, realignment_token) */
11768 && (alignment_support_scheme
== dr_explicit_realign_optimized
11769 || alignment_support_scheme
== dr_explicit_realign
))
11771 lsq
= gimple_assign_lhs (new_stmt
);
11772 if (!realignment_token
)
11773 realignment_token
= dataref_ptr
;
11774 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11775 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
, msq
,
11776 lsq
, realignment_token
);
11777 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11778 gimple_assign_set_lhs (new_stmt
, new_temp
);
11779 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11781 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
11784 if (i
== vec_num
- 1 && j
== ncopies
- 1)
11785 add_phi_arg (phi
, lsq
, loop_latch_edge (containing_loop
),
11791 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
11794 inside_cost
= record_stmt_cost (cost_vec
, 1, vec_perm
,
11795 stmt_info
, 0, vect_body
);
11798 tree perm_mask
= perm_mask_for_reverse (vectype
);
11799 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
11800 perm_mask
, stmt_info
, gsi
);
11801 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
11805 /* Collect vector loads and later create their permutation in
11806 vect_transform_grouped_load (). */
11807 if (!costing_p
&& (grouped_load
|| slp_perm
))
11808 dr_chain
.quick_push (new_temp
);
11810 /* Store vector loads in the corresponding SLP_NODE. */
11811 if (!costing_p
&& slp
&& !slp_perm
)
11812 slp_node
->push_vec_def (new_stmt
);
11814 /* With SLP permutation we load the gaps as well, without
11815 we need to skip the gaps after we manage to fully load
11816 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11817 group_elt
+= nunits
;
11819 && maybe_ne (group_gap_adj
, 0U)
11821 && known_eq (group_elt
, group_size
- group_gap_adj
))
11823 poly_wide_int bump_val
11824 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
)) * group_gap_adj
);
11825 if (tree_int_cst_sgn (vect_dr_behavior (vinfo
, dr_info
)->step
)
11827 bump_val
= -bump_val
;
11828 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
11829 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11834 /* Bump the vector pointer to account for a gap or for excess
11835 elements loaded for a permuted SLP load. */
11837 && maybe_ne (group_gap_adj
, 0U)
11840 poly_wide_int bump_val
11841 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
)) * group_gap_adj
);
11842 if (tree_int_cst_sgn (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
11843 bump_val
= -bump_val
;
11844 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
11845 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11849 if (slp
&& !slp_perm
)
11855 /* For SLP we know we've seen all possible uses of dr_chain so
11856 direct vect_transform_slp_perm_load to DCE the unused parts.
11857 ??? This is a hack to prevent compile-time issues as seen
11858 in PR101120 and friends. */
11861 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, nullptr, vf
,
11862 true, &n_perms
, nullptr);
11863 inside_cost
= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
11864 stmt_info
, 0, vect_body
);
11868 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
11869 gsi
, vf
, false, &n_perms
,
11878 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
);
11879 /* We assume that the cost of a single load-lanes instruction
11880 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11881 If a grouped access is instead being provided by a
11882 load-and-permute operation, include the cost of the
11884 if (costing_p
&& first_stmt_info
== stmt_info
)
11886 /* Uses an even and odd extract operations or shuffle
11887 operations for each needed permute. */
11888 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
11889 int nstmts
= ceil_log2 (group_size
) * group_size
;
11890 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
11891 stmt_info
, 0, vect_body
);
11893 if (dump_enabled_p ())
11894 dump_printf_loc (MSG_NOTE
, vect_location
,
11895 "vect_model_load_cost:"
11896 "strided group_size = %d .\n",
11899 else if (!costing_p
)
11901 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
11903 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11906 else if (!costing_p
)
11907 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
11909 dr_chain
.release ();
11911 if (!slp
&& !costing_p
)
11912 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11916 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS
11917 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
11918 || memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
);
11919 if (n_adjacent_loads
> 0)
11920 vect_get_load_cost (vinfo
, stmt_info
, n_adjacent_loads
,
11921 alignment_support_scheme
, misalignment
, false,
11922 &inside_cost
, &prologue_cost
, cost_vec
, cost_vec
,
11924 if (dump_enabled_p ())
11925 dump_printf_loc (MSG_NOTE
, vect_location
,
11926 "vect_model_load_cost: inside_cost = %u, "
11927 "prologue_cost = %u .\n",
11928 inside_cost
, prologue_cost
);
11934 /* Function vect_is_simple_cond.
11937 LOOP - the loop that is being vectorized.
11938 COND - Condition that is checked for simple use.
11941 *COMP_VECTYPE - the vector type for the comparison.
11942 *DTS - The def types for the arguments of the comparison
11944 Returns whether a COND can be vectorized. Checks whether
11945 condition operands are supportable using vec_is_simple_use. */
11948 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
11949 slp_tree slp_node
, tree
*comp_vectype
,
11950 enum vect_def_type
*dts
, tree vectype
)
11953 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
11957 if (TREE_CODE (cond
) == SSA_NAME
11958 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
11960 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
11961 &slp_op
, &dts
[0], comp_vectype
)
11963 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
11968 if (!COMPARISON_CLASS_P (cond
))
11971 lhs
= TREE_OPERAND (cond
, 0);
11972 rhs
= TREE_OPERAND (cond
, 1);
11974 if (TREE_CODE (lhs
) == SSA_NAME
)
11976 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
11977 &lhs
, &slp_op
, &dts
[0], &vectype1
))
11980 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
11981 || TREE_CODE (lhs
) == FIXED_CST
)
11982 dts
[0] = vect_constant_def
;
11986 if (TREE_CODE (rhs
) == SSA_NAME
)
11988 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
11989 &rhs
, &slp_op
, &dts
[1], &vectype2
))
11992 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
11993 || TREE_CODE (rhs
) == FIXED_CST
)
11994 dts
[1] = vect_constant_def
;
11998 if (vectype1
&& vectype2
11999 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
12000 TYPE_VECTOR_SUBPARTS (vectype2
)))
12003 *comp_vectype
= vectype1
? vectype1
: vectype2
;
12004 /* Invariant comparison. */
12005 if (! *comp_vectype
)
12007 tree scalar_type
= TREE_TYPE (lhs
);
12008 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
12009 *comp_vectype
= truth_type_for (vectype
);
12012 /* If we can widen the comparison to match vectype do so. */
12013 if (INTEGRAL_TYPE_P (scalar_type
)
12015 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
12016 TYPE_SIZE (TREE_TYPE (vectype
))))
12017 scalar_type
= build_nonstandard_integer_type
12018 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
12019 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12027 /* vectorizable_condition.
12029 Check if STMT_INFO is conditional modify expression that can be vectorized.
12030 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12031 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
12034 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
12036 Return true if STMT_INFO is vectorizable in this way. */
12039 vectorizable_condition (vec_info
*vinfo
,
12040 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
12042 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
12044 tree scalar_dest
= NULL_TREE
;
12045 tree vec_dest
= NULL_TREE
;
12046 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
12047 tree then_clause
, else_clause
;
12048 tree comp_vectype
= NULL_TREE
;
12049 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
12050 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
12053 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
12054 enum vect_def_type dts
[4]
12055 = {vect_unknown_def_type
, vect_unknown_def_type
,
12056 vect_unknown_def_type
, vect_unknown_def_type
};
12060 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
12062 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
12063 vec
<tree
> vec_oprnds0
= vNULL
;
12064 vec
<tree
> vec_oprnds1
= vNULL
;
12065 vec
<tree
> vec_oprnds2
= vNULL
;
12066 vec
<tree
> vec_oprnds3
= vNULL
;
12068 bool masked
= false;
12070 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
12073 /* Is vectorizable conditional operation? */
12074 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
12078 code
= gimple_assign_rhs_code (stmt
);
12079 if (code
!= COND_EXPR
)
12082 stmt_vec_info reduc_info
= NULL
;
12083 int reduc_index
= -1;
12084 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
12086 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
12089 if (slp_node
&& SLP_TREE_LANES (slp_node
) > 1)
12091 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
12092 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
12093 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
12094 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
12095 || reduc_index
!= -1);
12099 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
12103 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12104 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
12109 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
12113 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
12117 gcc_assert (ncopies
>= 1);
12118 if (for_reduction
&& ncopies
> 1)
12119 return false; /* FORNOW */
12121 cond_expr
= gimple_assign_rhs1 (stmt
);
12123 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
12124 &comp_vectype
, &dts
[0], vectype
)
12128 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
12129 slp_tree then_slp_node
, else_slp_node
;
12130 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
12131 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
12133 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
12134 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
12137 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
12140 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
12143 masked
= !COMPARISON_CLASS_P (cond_expr
);
12144 vec_cmp_type
= truth_type_for (comp_vectype
);
12146 if (vec_cmp_type
== NULL_TREE
)
12149 cond_code
= TREE_CODE (cond_expr
);
12152 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
12153 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
12156 /* For conditional reductions, the "then" value needs to be the candidate
12157 value calculated by this iteration while the "else" value needs to be
12158 the result carried over from previous iterations. If the COND_EXPR
12159 is the other way around, we need to swap it. */
12160 bool must_invert_cmp_result
= false;
12161 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
12164 must_invert_cmp_result
= true;
12167 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
12168 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
12169 if (new_code
== ERROR_MARK
)
12170 must_invert_cmp_result
= true;
12173 cond_code
= new_code
;
12174 /* Make sure we don't accidentally use the old condition. */
12175 cond_expr
= NULL_TREE
;
12178 /* ??? The vectorized operand query below doesn't allow swapping
12179 this way for SLP. */
12182 std::swap (then_clause
, else_clause
);
12185 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
12187 /* Boolean values may have another representation in vectors
12188 and therefore we prefer bit operations over comparison for
12189 them (which also works for scalar masks). We store opcodes
12190 to use in bitop1 and bitop2. Statement is vectorized as
12191 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12192 depending on bitop1 and bitop2 arity. */
12196 bitop1
= BIT_NOT_EXPR
;
12197 bitop2
= BIT_AND_EXPR
;
12200 bitop1
= BIT_NOT_EXPR
;
12201 bitop2
= BIT_IOR_EXPR
;
12204 bitop1
= BIT_NOT_EXPR
;
12205 bitop2
= BIT_AND_EXPR
;
12206 std::swap (cond_expr0
, cond_expr1
);
12209 bitop1
= BIT_NOT_EXPR
;
12210 bitop2
= BIT_IOR_EXPR
;
12211 std::swap (cond_expr0
, cond_expr1
);
12214 bitop1
= BIT_XOR_EXPR
;
12217 bitop1
= BIT_XOR_EXPR
;
12218 bitop2
= BIT_NOT_EXPR
;
12223 cond_code
= SSA_NAME
;
12226 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
12227 && reduction_type
== EXTRACT_LAST_REDUCTION
12228 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
12230 if (dump_enabled_p ())
12231 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12232 "reduction comparison operation not supported.\n");
12238 if (bitop1
!= NOP_EXPR
)
12240 machine_mode mode
= TYPE_MODE (comp_vectype
);
12243 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
12244 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12247 if (bitop2
!= NOP_EXPR
)
12249 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
12251 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12256 vect_cost_for_stmt kind
= vector_stmt
;
12257 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12258 /* Count one reduction-like operation per vector. */
12259 kind
= vec_to_scalar
;
12260 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
)
12262 || (!expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
,
12264 || !expand_vec_cond_expr_p (vectype
, vec_cmp_type
,
12269 && (!vect_maybe_update_slp_op_vectype
12270 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
12272 && !vect_maybe_update_slp_op_vectype
12273 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
12274 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
12275 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
12277 if (dump_enabled_p ())
12278 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12279 "incompatible vector types for invariants\n");
12283 if (loop_vinfo
&& for_reduction
12284 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
12286 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12288 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST
,
12289 vectype
, OPTIMIZE_FOR_SPEED
))
12290 vect_record_loop_len (loop_vinfo
,
12291 &LOOP_VINFO_LENS (loop_vinfo
),
12292 ncopies
* vec_num
, vectype
, 1);
12294 vect_record_loop_mask (loop_vinfo
,
12295 &LOOP_VINFO_MASKS (loop_vinfo
),
12296 ncopies
* vec_num
, vectype
, NULL
);
12298 /* Extra inactive lanes should be safe for vect_nested_cycle. */
12299 else if (STMT_VINFO_DEF_TYPE (reduc_info
) != vect_nested_cycle
)
12301 if (dump_enabled_p ())
12302 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12303 "conditional reduction prevents the use"
12304 " of partial vectors.\n");
12305 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
12309 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
12310 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
12318 scalar_dest
= gimple_assign_lhs (stmt
);
12319 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
12320 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
12322 bool swap_cond_operands
= false;
12324 /* See whether another part of the vectorized code applies a loop
12325 mask to the condition, or to its inverse. */
12327 vec_loop_masks
*masks
= NULL
;
12328 vec_loop_lens
*lens
= NULL
;
12329 if (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
))
12331 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12332 lens
= &LOOP_VINFO_LENS (loop_vinfo
);
12334 else if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
12336 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12337 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12340 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
12341 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
12342 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12345 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
12346 tree_code orig_code
= cond
.code
;
12347 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
12348 if (!masked
&& loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
12350 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12351 cond_code
= cond
.code
;
12352 swap_cond_operands
= true;
12356 /* Try the inverse of the current mask. We check if the
12357 inverse mask is live and if so we generate a negate of
12358 the current mask such that we still honor NaNs. */
12359 cond
.inverted_p
= true;
12360 cond
.code
= orig_code
;
12361 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
12363 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12364 cond_code
= cond
.code
;
12365 swap_cond_operands
= true;
12366 must_invert_cmp_result
= true;
12373 /* Handle cond expr. */
12375 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
12376 cond_expr
, comp_vectype
, &vec_oprnds0
,
12377 then_clause
, vectype
, &vec_oprnds2
,
12378 reduction_type
!= EXTRACT_LAST_REDUCTION
12379 ? else_clause
: NULL
, vectype
, &vec_oprnds3
);
12381 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
12382 cond_expr0
, comp_vectype
, &vec_oprnds0
,
12383 cond_expr1
, comp_vectype
, &vec_oprnds1
,
12384 then_clause
, vectype
, &vec_oprnds2
,
12385 reduction_type
!= EXTRACT_LAST_REDUCTION
12386 ? else_clause
: NULL
, vectype
, &vec_oprnds3
);
12388 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12389 vec_else_clause
= else_clause
;
12391 /* Arguments are ready. Create the new vector stmt. */
12392 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
12394 vec_then_clause
= vec_oprnds2
[i
];
12395 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
12396 vec_else_clause
= vec_oprnds3
[i
];
12398 if (swap_cond_operands
)
12399 std::swap (vec_then_clause
, vec_else_clause
);
12402 vec_compare
= vec_cond_lhs
;
12405 vec_cond_rhs
= vec_oprnds1
[i
];
12406 if (bitop1
== NOP_EXPR
)
12408 gimple_seq stmts
= NULL
;
12409 vec_compare
= gimple_build (&stmts
, cond_code
, vec_cmp_type
,
12410 vec_cond_lhs
, vec_cond_rhs
);
12411 gsi_insert_before (gsi
, stmts
, GSI_SAME_STMT
);
12415 new_temp
= make_ssa_name (vec_cmp_type
);
12417 if (bitop1
== BIT_NOT_EXPR
)
12418 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
12422 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
12424 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12425 if (bitop2
== NOP_EXPR
)
12426 vec_compare
= new_temp
;
12427 else if (bitop2
== BIT_NOT_EXPR
12428 && reduction_type
!= EXTRACT_LAST_REDUCTION
)
12430 /* Instead of doing ~x ? y : z do x ? z : y. */
12431 vec_compare
= new_temp
;
12432 std::swap (vec_then_clause
, vec_else_clause
);
12436 vec_compare
= make_ssa_name (vec_cmp_type
);
12437 if (bitop2
== BIT_NOT_EXPR
)
12439 = gimple_build_assign (vec_compare
, bitop2
, new_temp
);
12442 = gimple_build_assign (vec_compare
, bitop2
,
12443 vec_cond_lhs
, new_temp
);
12444 vect_finish_stmt_generation (vinfo
, stmt_info
,
12450 /* If we decided to apply a loop mask to the result of the vector
12451 comparison, AND the comparison with the mask now. Later passes
12452 should then be able to reuse the AND results between mulitple
12456 for (int i = 0; i < 100; ++i)
12457 x[i] = y[i] ? z[i] : 10;
12459 results in following optimized GIMPLE:
12461 mask__35.8_43 = vect__4.7_41 != { 0, ... };
12462 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12463 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12464 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12465 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12466 vect_iftmp.11_47, { 10, ... }>;
12468 instead of using a masked and unmasked forms of
12469 vec != { 0, ... } (masked in the MASK_LOAD,
12470 unmasked in the VEC_COND_EXPR). */
12472 /* Force vec_compare to be an SSA_NAME rather than a comparison,
12473 in cases where that's necessary. */
12475 tree len
= NULL_TREE
, bias
= NULL_TREE
;
12476 if (masks
|| lens
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
12478 if (!is_gimple_val (vec_compare
))
12480 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
12481 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
12483 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12484 vec_compare
= vec_compare_name
;
12487 if (must_invert_cmp_result
)
12489 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
12490 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
12493 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12494 vec_compare
= vec_compare_name
;
12497 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST
,
12498 vectype
, OPTIMIZE_FOR_SPEED
))
12502 len
= vect_get_loop_len (loop_vinfo
, gsi
, lens
,
12503 vec_num
* ncopies
, vectype
, i
, 1);
12504 signed char biasval
12505 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
12506 bias
= build_int_cst (intQI_type_node
, biasval
);
12510 len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
12511 bias
= build_int_cst (intQI_type_node
, 0);
12517 = vect_get_loop_mask (loop_vinfo
, gsi
, masks
, vec_num
* ncopies
,
12519 tree tmp2
= make_ssa_name (vec_cmp_type
);
12521 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
12523 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
12524 vec_compare
= tmp2
;
12529 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12531 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
12532 tree lhs
= gimple_get_lhs (old_stmt
);
12533 if ((unsigned)i
!= vec_oprnds0
.length () - 1)
12534 lhs
= copy_ssa_name (lhs
);
12536 new_stmt
= gimple_build_call_internal
12537 (IFN_LEN_FOLD_EXTRACT_LAST
, 5, vec_else_clause
, vec_compare
,
12538 vec_then_clause
, len
, bias
);
12540 new_stmt
= gimple_build_call_internal
12541 (IFN_FOLD_EXTRACT_LAST
, 3, vec_else_clause
, vec_compare
,
12543 gimple_call_set_lhs (new_stmt
, lhs
);
12544 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
12545 if ((unsigned)i
!= vec_oprnds0
.length () - 1)
12547 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12548 vec_else_clause
= lhs
;
12550 else if (old_stmt
== gsi_stmt (*gsi
))
12551 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
12554 /* In this case we're moving the definition to later in the
12555 block. That doesn't matter because the only uses of the
12556 lhs are in phi statements. */
12557 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
12558 gsi_remove (&old_gsi
, true);
12559 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12564 new_temp
= make_ssa_name (vec_dest
);
12565 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
12566 vec_then_clause
, vec_else_clause
);
12567 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12570 slp_node
->push_vec_def (new_stmt
);
12572 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
12576 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
12578 vec_oprnds0
.release ();
12579 vec_oprnds1
.release ();
12580 vec_oprnds2
.release ();
12581 vec_oprnds3
.release ();
12586 /* Helper of vectorizable_comparison.
12588 Check if STMT_INFO is comparison expression CODE that can be vectorized.
12589 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12590 comparison, put it in VEC_STMT, and insert it at GSI.
12592 Return true if STMT_INFO is vectorizable in this way. */
12595 vectorizable_comparison_1 (vec_info
*vinfo
, tree vectype
,
12596 stmt_vec_info stmt_info
, tree_code code
,
12597 gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
12598 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
12600 tree lhs
, rhs1
, rhs2
;
12601 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
12602 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
12604 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
12605 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
12607 poly_uint64 nunits
;
12609 enum tree_code bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
12611 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
12612 vec
<tree
> vec_oprnds0
= vNULL
;
12613 vec
<tree
> vec_oprnds1
= vNULL
;
12615 tree mask
= NULL_TREE
;
12617 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
12620 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
12623 mask_type
= vectype
;
12624 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
12629 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
12631 gcc_assert (ncopies
>= 1);
12633 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
12636 slp_tree slp_rhs1
, slp_rhs2
;
12637 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
12638 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
12641 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
12642 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
12645 if (vectype1
&& vectype2
12646 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
12647 TYPE_VECTOR_SUBPARTS (vectype2
)))
12650 vectype
= vectype1
? vectype1
: vectype2
;
12652 /* Invariant comparison. */
12655 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
12656 vectype
= mask_type
;
12658 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
12660 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
12663 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
12666 /* Can't compare mask and non-mask types. */
12667 if (vectype1
&& vectype2
12668 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
12671 /* Boolean values may have another representation in vectors
12672 and therefore we prefer bit operations over comparison for
12673 them (which also works for scalar masks). We store opcodes
12674 to use in bitop1 and bitop2. Statement is vectorized as
12675 BITOP2 (rhs1 BITOP1 rhs2) or
12676 rhs1 BITOP2 (BITOP1 rhs2)
12677 depending on bitop1 and bitop2 arity. */
12678 bool swap_p
= false;
12679 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
12681 if (code
== GT_EXPR
)
12683 bitop1
= BIT_NOT_EXPR
;
12684 bitop2
= BIT_AND_EXPR
;
12686 else if (code
== GE_EXPR
)
12688 bitop1
= BIT_NOT_EXPR
;
12689 bitop2
= BIT_IOR_EXPR
;
12691 else if (code
== LT_EXPR
)
12693 bitop1
= BIT_NOT_EXPR
;
12694 bitop2
= BIT_AND_EXPR
;
12697 else if (code
== LE_EXPR
)
12699 bitop1
= BIT_NOT_EXPR
;
12700 bitop2
= BIT_IOR_EXPR
;
12705 bitop1
= BIT_XOR_EXPR
;
12706 if (code
== EQ_EXPR
)
12707 bitop2
= BIT_NOT_EXPR
;
12713 if (bitop1
== NOP_EXPR
)
12715 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
12720 machine_mode mode
= TYPE_MODE (vectype
);
12723 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
12724 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12727 if (bitop2
!= NOP_EXPR
)
12729 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
12730 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12735 /* Put types on constant and invariant SLP children. */
12737 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
12738 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
12740 if (dump_enabled_p ())
12741 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12742 "incompatible vector types for invariants\n");
12746 vect_model_simple_cost (vinfo
, stmt_info
,
12747 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
12748 dts
, ndts
, slp_node
, cost_vec
);
12755 lhs
= gimple_get_lhs (STMT_VINFO_STMT (stmt_info
));
12757 mask
= vect_create_destination_var (lhs
, mask_type
);
12759 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
12760 rhs1
, vectype
, &vec_oprnds0
,
12761 rhs2
, vectype
, &vec_oprnds1
);
12763 std::swap (vec_oprnds0
, vec_oprnds1
);
12765 /* Arguments are ready. Create the new vector stmt. */
12766 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
12769 vec_rhs2
= vec_oprnds1
[i
];
12772 new_temp
= make_ssa_name (mask
);
12774 new_temp
= make_temp_ssa_name (mask_type
, NULL
, "cmp");
12775 if (bitop1
== NOP_EXPR
)
12777 new_stmt
= gimple_build_assign (new_temp
, code
,
12778 vec_rhs1
, vec_rhs2
);
12779 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12783 if (bitop1
== BIT_NOT_EXPR
)
12784 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
12786 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
12788 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12789 if (bitop2
!= NOP_EXPR
)
12791 tree res
= make_ssa_name (mask
);
12792 if (bitop2
== BIT_NOT_EXPR
)
12793 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
12795 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
12797 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12801 slp_node
->push_vec_def (new_stmt
);
12803 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
12807 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
12809 vec_oprnds0
.release ();
12810 vec_oprnds1
.release ();
12815 /* vectorizable_comparison.
12817 Check if STMT_INFO is comparison expression that can be vectorized.
12818 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12819 comparison, put it in VEC_STMT, and insert it at GSI.
12821 Return true if STMT_INFO is vectorizable in this way. */
12824 vectorizable_comparison (vec_info
*vinfo
,
12825 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
12827 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
12829 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
12831 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
12834 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
12837 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
12841 enum tree_code code
= gimple_assign_rhs_code (stmt
);
12842 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12843 if (!vectorizable_comparison_1 (vinfo
, vectype
, stmt_info
, code
, gsi
,
12844 vec_stmt
, slp_node
, cost_vec
))
12848 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
12853 /* Check to see if the current early break given in STMT_INFO is valid for
12857 vectorizable_early_exit (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12858 gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
12859 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
12861 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
12863 || !is_a
<gcond
*> (STMT_VINFO_STMT (stmt_info
)))
12866 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_condition_def
)
12869 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
12872 DUMP_VECT_SCOPE ("vectorizable_early_exit");
12874 auto code
= gimple_cond_code (STMT_VINFO_STMT (stmt_info
));
12876 tree vectype
= NULL_TREE
;
12879 enum vect_def_type dt0
;
12880 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op0
, &slp_op0
, &dt0
,
12883 if (dump_enabled_p ())
12884 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12885 "use not simple.\n");
12892 machine_mode mode
= TYPE_MODE (vectype
);
12898 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
12900 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12901 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
12902 bool masked_loop_p
= LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
12903 bool len_loop_p
= LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
);
12905 /* Now build the new conditional. Pattern gimple_conds get dropped during
12906 codegen so we must replace the original insn. */
12907 gimple
*orig_stmt
= STMT_VINFO_STMT (vect_orig_stmt (stmt_info
));
12908 gcond
*cond_stmt
= as_a
<gcond
*>(orig_stmt
);
12909 /* When vectorizing we assume that if the branch edge is taken that we're
12910 exiting the loop. This is not however always the case as the compiler will
12911 rewrite conditions to always be a comparison against 0. To do this it
12912 sometimes flips the edges. This is fine for scalar, but for vector we
12913 then have to flip the test, as we're still assuming that if you take the
12914 branch edge that we found the exit condition. i.e. we need to know whether
12915 we are generating a `forall` or an `exist` condition. */
12916 auto new_code
= NE_EXPR
;
12917 auto reduc_optab
= ior_optab
;
12918 auto reduc_op
= BIT_IOR_EXPR
;
12919 tree cst
= build_zero_cst (vectype
);
12920 edge exit_true_edge
= EDGE_SUCC (gimple_bb (cond_stmt
), 0);
12921 if (exit_true_edge
->flags
& EDGE_FALSE_VALUE
)
12922 exit_true_edge
= EDGE_SUCC (gimple_bb (cond_stmt
), 1);
12923 gcc_assert (exit_true_edge
->flags
& EDGE_TRUE_VALUE
);
12924 if (flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo
),
12925 exit_true_edge
->dest
))
12927 new_code
= EQ_EXPR
;
12928 reduc_optab
= and_optab
;
12929 reduc_op
= BIT_AND_EXPR
;
12930 cst
= build_minus_one_cst (vectype
);
12933 /* Analyze only. */
12936 if (direct_optab_handler (cbranch_optab
, mode
) == CODE_FOR_nothing
)
12938 if (dump_enabled_p ())
12939 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12940 "can't vectorize early exit because the "
12941 "target doesn't support flag setting vector "
12947 && direct_optab_handler (reduc_optab
, mode
) == CODE_FOR_nothing
)
12949 if (dump_enabled_p ())
12950 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12951 "can't vectorize early exit because the "
12952 "target does not support boolean vector %s "
12954 reduc_optab
== ior_optab
? "OR" : "AND",
12959 if (!vectorizable_comparison_1 (vinfo
, vectype
, stmt_info
, code
, gsi
,
12960 vec_stmt
, slp_node
, cost_vec
))
12963 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
12965 if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN
, vectype
,
12966 OPTIMIZE_FOR_SPEED
))
12967 vect_record_loop_len (loop_vinfo
, lens
, ncopies
, vectype
, 1);
12969 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, NULL
);
12977 tree new_temp
= NULL_TREE
;
12978 gimple
*new_stmt
= NULL
;
12980 if (dump_enabled_p ())
12981 dump_printf_loc (MSG_NOTE
, vect_location
, "transform early-exit.\n");
12983 if (!vectorizable_comparison_1 (vinfo
, vectype
, stmt_info
, code
, gsi
,
12984 vec_stmt
, slp_node
, cost_vec
))
12985 gcc_unreachable ();
12987 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
12988 basic_block cond_bb
= gimple_bb (stmt
);
12989 gimple_stmt_iterator cond_gsi
= gsi_last_bb (cond_bb
);
12991 auto_vec
<tree
> stmts
;
12994 stmts
.safe_splice (SLP_TREE_VEC_DEFS (slp_node
));
12997 auto vec_stmts
= STMT_VINFO_VEC_STMTS (stmt_info
);
12998 stmts
.reserve_exact (vec_stmts
.length ());
12999 for (auto stmt
: vec_stmts
)
13000 stmts
.quick_push (gimple_assign_lhs (stmt
));
13003 /* Determine if we need to reduce the final value. */
13004 if (stmts
.length () > 1)
13006 /* We build the reductions in a way to maintain as much parallelism as
13008 auto_vec
<tree
> workset (stmts
.length ());
13010 /* Mask the statements as we queue them up. Normally we loop over
13011 vec_num, but since we inspect the exact results of vectorization
13012 we don't need to and instead can just use the stmts themselves. */
13014 for (unsigned i
= 0; i
< stmts
.length (); i
++)
13017 = vect_get_loop_mask (loop_vinfo
, gsi
, masks
, ncopies
, vectype
,
13020 = prepare_vec_mask (loop_vinfo
, TREE_TYPE (stmt_mask
), stmt_mask
,
13021 stmts
[i
], &cond_gsi
);
13022 workset
.quick_push (stmt_mask
);
13024 else if (len_loop_p
)
13025 for (unsigned i
= 0; i
< stmts
.length (); i
++)
13027 tree len_mask
= vect_gen_loop_len_mask (loop_vinfo
, gsi
, &cond_gsi
,
13028 lens
, ncopies
, vectype
,
13031 workset
.quick_push (len_mask
);
13034 workset
.splice (stmts
);
13036 while (workset
.length () > 1)
13038 new_temp
= make_temp_ssa_name (vectype
, NULL
, "vexit_reduc");
13039 tree arg0
= workset
.pop ();
13040 tree arg1
= workset
.pop ();
13041 new_stmt
= gimple_build_assign (new_temp
, reduc_op
, arg0
, arg1
);
13042 vect_finish_stmt_generation (loop_vinfo
, stmt_info
, new_stmt
,
13044 workset
.quick_insert (0, new_temp
);
13049 new_temp
= stmts
[0];
13053 = vect_get_loop_mask (loop_vinfo
, gsi
, masks
, ncopies
, vectype
, 0);
13054 new_temp
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
13055 new_temp
, &cond_gsi
);
13057 else if (len_loop_p
)
13058 new_temp
= vect_gen_loop_len_mask (loop_vinfo
, gsi
, &cond_gsi
, lens
,
13059 ncopies
, vectype
, new_temp
, 0, 1);
13062 gcc_assert (new_temp
);
13064 gimple_cond_set_condition (cond_stmt
, new_code
, new_temp
, cst
);
13065 update_stmt (orig_stmt
);
13068 SLP_TREE_VEC_DEFS (slp_node
).truncate (0);
13070 STMT_VINFO_VEC_STMTS (stmt_info
).truncate (0);
13073 *vec_stmt
= orig_stmt
;
13078 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
13079 can handle all live statements in the node. Otherwise return true
13080 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
13081 VEC_STMT_P is as for vectorizable_live_operation. */
13084 can_vectorize_live_stmts (vec_info
*vinfo
, stmt_vec_info stmt_info
,
13085 slp_tree slp_node
, slp_instance slp_node_instance
,
13087 stmt_vector_for_cost
*cost_vec
)
13089 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
13092 stmt_vec_info slp_stmt_info
;
13094 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
13097 && (STMT_VINFO_LIVE_P (slp_stmt_info
)
13099 && LOOP_VINFO_EARLY_BREAKS (loop_vinfo
)
13100 && STMT_VINFO_DEF_TYPE (slp_stmt_info
)
13101 == vect_induction_def
))
13102 && !vectorizable_live_operation (vinfo
, slp_stmt_info
, slp_node
,
13103 slp_node_instance
, i
,
13104 vec_stmt_p
, cost_vec
))
13108 else if ((STMT_VINFO_LIVE_P (stmt_info
)
13109 || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo
)
13110 && STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
))
13111 && !vectorizable_live_operation (vinfo
, stmt_info
,
13112 slp_node
, slp_node_instance
, -1,
13113 vec_stmt_p
, cost_vec
))
13119 /* Make sure the statement is vectorizable. */
13122 vect_analyze_stmt (vec_info
*vinfo
,
13123 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
13124 slp_tree node
, slp_instance node_instance
,
13125 stmt_vector_for_cost
*cost_vec
)
13127 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
13128 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
13130 gimple_seq pattern_def_seq
;
13132 if (dump_enabled_p ())
13133 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
13136 if (gimple_has_volatile_ops (stmt_info
->stmt
))
13137 return opt_result::failure_at (stmt_info
->stmt
,
13139 " stmt has volatile operands: %G\n",
13142 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
13144 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
13146 gimple_stmt_iterator si
;
13148 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
13150 stmt_vec_info pattern_def_stmt_info
13151 = vinfo
->lookup_stmt (gsi_stmt (si
));
13152 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
13153 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
13155 /* Analyze def stmt of STMT if it's a pattern stmt. */
13156 if (dump_enabled_p ())
13157 dump_printf_loc (MSG_NOTE
, vect_location
,
13158 "==> examining pattern def statement: %G",
13159 pattern_def_stmt_info
->stmt
);
13162 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
13163 need_to_vectorize
, node
, node_instance
,
13171 /* Skip stmts that do not need to be vectorized. In loops this is expected
13173 - the COND_EXPR which is the loop exit condition
13174 - any LABEL_EXPRs in the loop
13175 - computations that are used only for array indexing or loop control.
13176 In basic blocks we only analyze statements that are a part of some SLP
13177 instance, therefore, all the statements are relevant.
13179 Pattern statement needs to be analyzed instead of the original statement
13180 if the original statement is not relevant. Otherwise, we analyze both
13181 statements. In basic blocks we are called from some SLP instance
13182 traversal, don't analyze pattern stmts instead, the pattern stmts
13183 already will be part of SLP instance. */
13185 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
13186 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
13187 && !STMT_VINFO_LIVE_P (stmt_info
))
13189 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
13190 && pattern_stmt_info
13191 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
13192 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
13194 /* Analyze PATTERN_STMT instead of the original stmt. */
13195 stmt_info
= pattern_stmt_info
;
13196 if (dump_enabled_p ())
13197 dump_printf_loc (MSG_NOTE
, vect_location
,
13198 "==> examining pattern statement: %G",
13203 if (dump_enabled_p ())
13204 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
13206 return opt_result::success ();
13209 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
13211 && pattern_stmt_info
13212 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
13213 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
13215 /* Analyze PATTERN_STMT too. */
13216 if (dump_enabled_p ())
13217 dump_printf_loc (MSG_NOTE
, vect_location
,
13218 "==> examining pattern statement: %G",
13219 pattern_stmt_info
->stmt
);
13222 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
13223 node_instance
, cost_vec
);
13228 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
13230 case vect_internal_def
:
13231 case vect_condition_def
:
13234 case vect_reduction_def
:
13235 case vect_nested_cycle
:
13236 gcc_assert (!bb_vinfo
13237 && (relevance
== vect_used_in_outer
13238 || relevance
== vect_used_in_outer_by_reduction
13239 || relevance
== vect_used_by_reduction
13240 || relevance
== vect_unused_in_scope
13241 || relevance
== vect_used_only_live
));
13244 case vect_double_reduction_def
:
13245 gcc_assert (!bb_vinfo
&& node
);
13248 case vect_induction_def
:
13249 case vect_first_order_recurrence
:
13250 gcc_assert (!bb_vinfo
);
13253 case vect_constant_def
:
13254 case vect_external_def
:
13255 case vect_unknown_def_type
:
13257 gcc_unreachable ();
13260 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
13262 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (node
);
13264 if (STMT_VINFO_RELEVANT_P (stmt_info
))
13266 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
13267 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
13268 || gimple_code (stmt_info
->stmt
) == GIMPLE_COND
13269 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
13270 *need_to_vectorize
= true;
13273 if (PURE_SLP_STMT (stmt_info
) && !node
)
13275 if (dump_enabled_p ())
13276 dump_printf_loc (MSG_NOTE
, vect_location
,
13277 "handled only by SLP analysis\n");
13278 return opt_result::success ();
13283 && (STMT_VINFO_RELEVANT_P (stmt_info
)
13284 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
13285 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
13286 -mveclibabi= takes preference over library functions with
13287 the simd attribute. */
13288 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
13289 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
13291 || vectorizable_conversion (vinfo
, stmt_info
,
13292 NULL
, NULL
, node
, cost_vec
)
13293 || vectorizable_operation (vinfo
, stmt_info
,
13294 NULL
, NULL
, node
, cost_vec
)
13295 || vectorizable_assignment (vinfo
, stmt_info
,
13296 NULL
, NULL
, node
, cost_vec
)
13297 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
13298 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
13299 || vectorizable_lane_reducing (as_a
<loop_vec_info
> (vinfo
),
13300 stmt_info
, node
, cost_vec
)
13301 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
13302 node
, node_instance
, cost_vec
)
13303 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
13304 NULL
, node
, cost_vec
)
13305 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
13306 || vectorizable_condition (vinfo
, stmt_info
,
13307 NULL
, NULL
, node
, cost_vec
)
13308 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
13310 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
13311 stmt_info
, NULL
, node
)
13312 || vectorizable_recurr (as_a
<loop_vec_info
> (vinfo
),
13313 stmt_info
, NULL
, node
, cost_vec
)
13314 || vectorizable_early_exit (vinfo
, stmt_info
, NULL
, NULL
, node
,
13319 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
13320 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
13321 NULL
, NULL
, node
, cost_vec
)
13322 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
13324 || vectorizable_shift (vinfo
, stmt_info
,
13325 NULL
, NULL
, node
, cost_vec
)
13326 || vectorizable_operation (vinfo
, stmt_info
,
13327 NULL
, NULL
, node
, cost_vec
)
13328 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
13330 || vectorizable_load (vinfo
, stmt_info
,
13331 NULL
, NULL
, node
, cost_vec
)
13332 || vectorizable_store (vinfo
, stmt_info
,
13333 NULL
, NULL
, node
, cost_vec
)
13334 || vectorizable_condition (vinfo
, stmt_info
,
13335 NULL
, NULL
, node
, cost_vec
)
13336 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
13338 || vectorizable_phi (vinfo
, stmt_info
, NULL
, node
, cost_vec
)
13339 || vectorizable_early_exit (vinfo
, stmt_info
, NULL
, NULL
, node
,
13345 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
13348 return opt_result::failure_at (stmt_info
->stmt
,
13350 " relevant stmt not supported: %G",
13353 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
13354 need extra handling, except for vectorizable reductions. */
13356 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
13357 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
13358 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
13359 stmt_info
, node
, node_instance
,
13361 return opt_result::failure_at (stmt_info
->stmt
,
13363 " live stmt not supported: %G",
13366 return opt_result::success ();
13370 /* Function vect_transform_stmt.
13372 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
13375 vect_transform_stmt (vec_info
*vinfo
,
13376 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
13377 slp_tree slp_node
, slp_instance slp_node_instance
)
13379 bool is_store
= false;
13380 gimple
*vec_stmt
= NULL
;
13383 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
13385 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
13387 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (slp_node
);
13389 switch (STMT_VINFO_TYPE (stmt_info
))
13391 case type_demotion_vec_info_type
:
13392 case type_promotion_vec_info_type
:
13393 case type_conversion_vec_info_type
:
13394 done
= vectorizable_conversion (vinfo
, stmt_info
,
13395 gsi
, &vec_stmt
, slp_node
, NULL
);
13399 case induc_vec_info_type
:
13400 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
13401 stmt_info
, &vec_stmt
, slp_node
,
13406 case shift_vec_info_type
:
13407 done
= vectorizable_shift (vinfo
, stmt_info
,
13408 gsi
, &vec_stmt
, slp_node
, NULL
);
13412 case op_vec_info_type
:
13413 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
13418 case assignment_vec_info_type
:
13419 done
= vectorizable_assignment (vinfo
, stmt_info
,
13420 gsi
, &vec_stmt
, slp_node
, NULL
);
13424 case load_vec_info_type
:
13425 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
13430 case store_vec_info_type
:
13431 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
13433 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))
13434 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info
))))
13435 /* In case of interleaving, the whole chain is vectorized when the
13436 last store in the chain is reached. Store stmts before the last
13437 one are skipped, and there vec_stmt_info shouldn't be freed
13442 done
= vectorizable_store (vinfo
, stmt_info
,
13443 gsi
, &vec_stmt
, slp_node
, NULL
);
13449 case condition_vec_info_type
:
13450 done
= vectorizable_condition (vinfo
, stmt_info
,
13451 gsi
, &vec_stmt
, slp_node
, NULL
);
13455 case comparison_vec_info_type
:
13456 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
13461 case call_vec_info_type
:
13462 done
= vectorizable_call (vinfo
, stmt_info
,
13463 gsi
, &vec_stmt
, slp_node
, NULL
);
13466 case call_simd_clone_vec_info_type
:
13467 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
13471 case reduc_vec_info_type
:
13472 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
13473 gsi
, &vec_stmt
, slp_node
);
13477 case cycle_phi_info_type
:
13478 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
13479 &vec_stmt
, slp_node
, slp_node_instance
);
13483 case lc_phi_info_type
:
13484 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
13485 stmt_info
, &vec_stmt
, slp_node
);
13489 case recurr_info_type
:
13490 done
= vectorizable_recurr (as_a
<loop_vec_info
> (vinfo
),
13491 stmt_info
, &vec_stmt
, slp_node
, NULL
);
13495 case phi_info_type
:
13496 done
= vectorizable_phi (vinfo
, stmt_info
, &vec_stmt
, slp_node
, NULL
);
13500 case loop_exit_ctrl_vec_info_type
:
13501 done
= vectorizable_early_exit (vinfo
, stmt_info
, gsi
, &vec_stmt
,
13507 if (!STMT_VINFO_LIVE_P (stmt_info
))
13509 if (dump_enabled_p ())
13510 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
13511 "stmt not supported.\n");
13512 gcc_unreachable ();
13517 if (!slp_node
&& vec_stmt
)
13518 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
13520 if (STMT_VINFO_TYPE (stmt_info
) != store_vec_info_type
)
13522 /* Handle stmts whose DEF is used outside the loop-nest that is
13523 being vectorized. */
13524 done
= can_vectorize_live_stmts (vinfo
, stmt_info
, slp_node
,
13525 slp_node_instance
, true, NULL
);
13530 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
13536 /* Remove a group of stores (for SLP or interleaving), free their
13540 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
13542 stmt_vec_info next_stmt_info
= first_stmt_info
;
13544 while (next_stmt_info
)
13546 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
13547 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
13548 /* Free the attached stmt_vec_info and remove the stmt. */
13549 vinfo
->remove_stmt (next_stmt_info
);
13550 next_stmt_info
= tmp
;
13554 /* If NUNITS is nonzero, return a vector type that contains NUNITS
13555 elements of type SCALAR_TYPE, or null if the target doesn't support
13558 If NUNITS is zero, return a vector type that contains elements of
13559 type SCALAR_TYPE, choosing whichever vector size the target prefers.
13561 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13562 for this vectorization region and want to "autodetect" the best choice.
13563 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13564 and we want the new type to be interoperable with it. PREVAILING_MODE
13565 in this case can be a scalar integer mode or a vector mode; when it
13566 is a vector mode, the function acts like a tree-level version of
13567 related_vector_mode. */
13570 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
13571 tree scalar_type
, poly_uint64 nunits
)
13573 tree orig_scalar_type
= scalar_type
;
13574 scalar_mode inner_mode
;
13575 machine_mode simd_mode
;
13578 if ((!INTEGRAL_TYPE_P (scalar_type
)
13579 && !POINTER_TYPE_P (scalar_type
)
13580 && !SCALAR_FLOAT_TYPE_P (scalar_type
))
13581 || (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
13582 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
)))
13585 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
13587 /* Interoperability between modes requires one to be a constant multiple
13588 of the other, so that the number of vectors required for each operation
13589 is a compile-time constant. */
13590 if (prevailing_mode
!= VOIDmode
13591 && !constant_multiple_p (nunits
* nbytes
,
13592 GET_MODE_SIZE (prevailing_mode
))
13593 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode
),
13597 /* For vector types of elements whose mode precision doesn't
13598 match their types precision we use a element type of mode
13599 precision. The vectorization routines will have to make sure
13600 they support the proper result truncation/extension.
13601 We also make sure to build vector types with INTEGER_TYPE
13602 component type only. */
13603 if (INTEGRAL_TYPE_P (scalar_type
)
13604 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
13605 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
13606 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
13607 TYPE_UNSIGNED (scalar_type
));
13609 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13610 When the component mode passes the above test simply use a type
13611 corresponding to that mode. The theory is that any use that
13612 would cause problems with this will disable vectorization anyway. */
13613 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
13614 && !INTEGRAL_TYPE_P (scalar_type
))
13615 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
13617 /* We can't build a vector type of elements with alignment bigger than
13619 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
13620 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
13621 TYPE_UNSIGNED (scalar_type
));
13623 /* If we felt back to using the mode fail if there was
13624 no scalar type for it. */
13625 if (scalar_type
== NULL_TREE
)
13628 /* If no prevailing mode was supplied, use the mode the target prefers.
13629 Otherwise lookup a vector mode based on the prevailing mode. */
13630 if (prevailing_mode
== VOIDmode
)
13632 gcc_assert (known_eq (nunits
, 0U));
13633 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
13634 if (SCALAR_INT_MODE_P (simd_mode
))
13636 /* Traditional behavior is not to take the integer mode
13637 literally, but simply to use it as a way of determining
13638 the vector size. It is up to mode_for_vector to decide
13639 what the TYPE_MODE should be.
13641 Note that nunits == 1 is allowed in order to support single
13642 element vector types. */
13643 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
13644 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
13648 else if (SCALAR_INT_MODE_P (prevailing_mode
)
13649 || !related_vector_mode (prevailing_mode
,
13650 inner_mode
, nunits
).exists (&simd_mode
))
13652 /* Fall back to using mode_for_vector, mostly in the hope of being
13653 able to use an integer mode. */
13654 if (known_eq (nunits
, 0U)
13655 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
13658 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
13662 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
13664 /* In cases where the mode was chosen by mode_for_vector, check that
13665 the target actually supports the chosen mode, or that it at least
13666 allows the vector mode to be replaced by a like-sized integer. */
13667 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
13668 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
13671 /* Re-attach the address-space qualifier if we canonicalized the scalar
13673 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
13674 return build_qualified_type
13675 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
13680 /* Function get_vectype_for_scalar_type.
13682 Returns the vector type corresponding to SCALAR_TYPE as supported
13683 by the target. If GROUP_SIZE is nonzero and we're performing BB
13684 vectorization, make sure that the number of elements in the vector
13685 is no bigger than GROUP_SIZE. */
13688 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
13689 unsigned int group_size
)
13691 /* For BB vectorization, we should always have a group size once we've
13692 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13693 are tentative requests during things like early data reference
13694 analysis and pattern recognition. */
13695 if (is_a
<bb_vec_info
> (vinfo
))
13696 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
13700 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
13702 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
13703 vinfo
->vector_mode
= TYPE_MODE (vectype
);
13705 /* Register the natural choice of vector type, before the group size
13706 has been applied. */
13708 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
13710 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13711 try again with an explicit number of elements. */
13714 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
13716 /* Start with the biggest number of units that fits within
13717 GROUP_SIZE and halve it until we find a valid vector type.
13718 Usually either the first attempt will succeed or all will
13719 fail (in the latter case because GROUP_SIZE is too small
13720 for the target), but it's possible that a target could have
13721 a hole between supported vector types.
13723 If GROUP_SIZE is not a power of 2, this has the effect of
13724 trying the largest power of 2 that fits within the group,
13725 even though the group is not a multiple of that vector size.
13726 The BB vectorizer will then try to carve up the group into
13728 unsigned int nunits
= 1 << floor_log2 (group_size
);
13731 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
13732 scalar_type
, nunits
);
13735 while (nunits
> 1 && !vectype
);
13741 /* Return the vector type corresponding to SCALAR_TYPE as supported
13742 by the target. NODE, if nonnull, is the SLP tree node that will
13743 use the returned vector type. */
13746 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
13748 unsigned int group_size
= 0;
13750 group_size
= SLP_TREE_LANES (node
);
13751 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
13754 /* Function get_mask_type_for_scalar_type.
13756 Returns the mask type corresponding to a result of comparison
13757 of vectors of specified SCALAR_TYPE as supported by target.
13758 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13759 make sure that the number of elements in the vector is no bigger
13760 than GROUP_SIZE. */
13763 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
13764 unsigned int group_size
)
13766 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
13771 return truth_type_for (vectype
);
13774 /* Function get_mask_type_for_scalar_type.
13776 Returns the mask type corresponding to a result of comparison
13777 of vectors of specified SCALAR_TYPE as supported by target.
13778 NODE, if nonnull, is the SLP tree node that will use the returned
13782 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
13785 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, node
);
13790 return truth_type_for (vectype
);
13793 /* Function get_same_sized_vectype
13795 Returns a vector type corresponding to SCALAR_TYPE of size
13796 VECTOR_TYPE if supported by the target. */
13799 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
13801 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
13802 return truth_type_for (vector_type
);
13804 poly_uint64 nunits
;
13805 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
13806 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
13809 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
13810 scalar_type
, nunits
);
13813 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13814 would not change the chosen vector modes. */
13817 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
13819 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
13820 i
!= vinfo
->used_vector_modes
.end (); ++i
)
13821 if (!VECTOR_MODE_P (*i
)
13822 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
13827 /* Function vect_is_simple_use.
13830 VINFO - the vect info of the loop or basic block that is being vectorized.
13831 OPERAND - operand in the loop or bb.
13833 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13834 case OPERAND is an SSA_NAME that is defined in the vectorizable region
13835 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13836 the definition could be anywhere in the function
13837 DT - the type of definition
13839 Returns whether a stmt with OPERAND can be vectorized.
13840 For loops, supportable operands are constants, loop invariants, and operands
13841 that are defined by the current iteration of the loop. Unsupportable
13842 operands are those that are defined by a previous iteration of the loop (as
13843 is the case in reduction/induction computations).
13844 For basic blocks, supportable operands are constants and bb invariants.
13845 For now, operands defined outside the basic block are not supported. */
13848 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
13849 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
13851 if (def_stmt_info_out
)
13852 *def_stmt_info_out
= NULL
;
13854 *def_stmt_out
= NULL
;
13855 *dt
= vect_unknown_def_type
;
13857 if (dump_enabled_p ())
13859 dump_printf_loc (MSG_NOTE
, vect_location
,
13860 "vect_is_simple_use: operand ");
13861 if (TREE_CODE (operand
) == SSA_NAME
13862 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
13863 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
13865 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
13868 if (CONSTANT_CLASS_P (operand
))
13869 *dt
= vect_constant_def
;
13870 else if (is_gimple_min_invariant (operand
))
13871 *dt
= vect_external_def
;
13872 else if (TREE_CODE (operand
) != SSA_NAME
)
13873 *dt
= vect_unknown_def_type
;
13874 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
13875 *dt
= vect_external_def
;
13878 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
13879 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
13881 *dt
= vect_external_def
;
13884 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
13885 def_stmt
= stmt_vinfo
->stmt
;
13886 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
13887 if (def_stmt_info_out
)
13888 *def_stmt_info_out
= stmt_vinfo
;
13891 *def_stmt_out
= def_stmt
;
13894 if (dump_enabled_p ())
13896 dump_printf (MSG_NOTE
, ", type of def: ");
13899 case vect_uninitialized_def
:
13900 dump_printf (MSG_NOTE
, "uninitialized\n");
13902 case vect_constant_def
:
13903 dump_printf (MSG_NOTE
, "constant\n");
13905 case vect_external_def
:
13906 dump_printf (MSG_NOTE
, "external\n");
13908 case vect_internal_def
:
13909 dump_printf (MSG_NOTE
, "internal\n");
13911 case vect_induction_def
:
13912 dump_printf (MSG_NOTE
, "induction\n");
13914 case vect_reduction_def
:
13915 dump_printf (MSG_NOTE
, "reduction\n");
13917 case vect_double_reduction_def
:
13918 dump_printf (MSG_NOTE
, "double reduction\n");
13920 case vect_nested_cycle
:
13921 dump_printf (MSG_NOTE
, "nested cycle\n");
13923 case vect_first_order_recurrence
:
13924 dump_printf (MSG_NOTE
, "first order recurrence\n");
13926 case vect_condition_def
:
13927 dump_printf (MSG_NOTE
, "control flow\n");
13929 case vect_unknown_def_type
:
13930 dump_printf (MSG_NOTE
, "unknown\n");
13935 if (*dt
== vect_unknown_def_type
)
13937 if (dump_enabled_p ())
13938 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
13939 "Unsupported pattern.\n");
13946 /* Function vect_is_simple_use.
13948 Same as vect_is_simple_use but also determines the vector operand
13949 type of OPERAND and stores it to *VECTYPE. If the definition of
13950 OPERAND is vect_uninitialized_def, vect_constant_def or
13951 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
13952 is responsible to compute the best suited vector type for the
13956 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
13957 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
13958 gimple
**def_stmt_out
)
13960 stmt_vec_info def_stmt_info
;
13962 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
13966 *def_stmt_out
= def_stmt
;
13967 if (def_stmt_info_out
)
13968 *def_stmt_info_out
= def_stmt_info
;
13970 /* Now get a vector type if the def is internal, otherwise supply
13971 NULL_TREE and leave it up to the caller to figure out a proper
13972 type for the use stmt. */
13973 if (*dt
== vect_internal_def
13974 || *dt
== vect_induction_def
13975 || *dt
== vect_reduction_def
13976 || *dt
== vect_double_reduction_def
13977 || *dt
== vect_nested_cycle
13978 || *dt
== vect_first_order_recurrence
)
13980 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
13981 gcc_assert (*vectype
!= NULL_TREE
);
13982 if (dump_enabled_p ())
13983 dump_printf_loc (MSG_NOTE
, vect_location
,
13984 "vect_is_simple_use: vectype %T\n", *vectype
);
13986 else if (*dt
== vect_uninitialized_def
13987 || *dt
== vect_constant_def
13988 || *dt
== vect_external_def
)
13989 *vectype
= NULL_TREE
;
13991 gcc_unreachable ();
13996 /* Function vect_is_simple_use.
13998 Same as vect_is_simple_use but determines the operand by operand
13999 position OPERAND from either STMT or SLP_NODE, filling in *OP
14000 and *SLP_DEF (when SLP_NODE is not NULL). */
14003 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
14004 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
14005 enum vect_def_type
*dt
,
14006 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
14010 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
14012 *vectype
= SLP_TREE_VECTYPE (child
);
14013 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
14015 /* ??? VEC_PERM nodes might be intermediate and their lane value
14016 have no representative (nor do we build a VEC_PERM stmt for
14017 the actual operation). Note for two-operator nodes we set
14018 a representative but leave scalar stmts empty as we'd only
14019 have one for a subset of lanes. Ideally no caller would
14020 require *op for internal defs. */
14021 if (SLP_TREE_REPRESENTATIVE (child
))
14023 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
14024 return vect_is_simple_use (*op
, vinfo
, dt
, def_stmt_info_out
);
14028 gcc_assert (SLP_TREE_CODE (child
) == VEC_PERM_EXPR
);
14029 *op
= error_mark_node
;
14030 *dt
= vect_internal_def
;
14031 if (def_stmt_info_out
)
14032 *def_stmt_info_out
= NULL
;
14038 if (def_stmt_info_out
)
14039 *def_stmt_info_out
= NULL
;
14040 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
14041 *dt
= SLP_TREE_DEF_TYPE (child
);
14048 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
14050 if (gimple_assign_rhs_code (ass
) == COND_EXPR
14051 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
14054 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
14056 *op
= gimple_op (ass
, operand
);
14058 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
14059 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
14061 *op
= gimple_op (ass
, operand
+ 1);
14063 else if (gcond
*cond
= dyn_cast
<gcond
*> (stmt
->stmt
))
14064 *op
= gimple_op (cond
, operand
);
14065 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
14066 *op
= gimple_call_arg (call
, operand
);
14068 gcc_unreachable ();
14069 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
14073 /* If OP is not NULL and is external or constant update its vector
14074 type with VECTYPE. Returns true if successful or false if not,
14075 for example when conflicting vector types are present. */
14078 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
14080 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
14082 if (SLP_TREE_VECTYPE (op
))
14083 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
14084 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
14085 should be handled by patters. Allow vect_constant_def for now. */
14086 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
14087 && SLP_TREE_DEF_TYPE (op
) == vect_external_def
)
14089 SLP_TREE_VECTYPE (op
) = vectype
;
14093 /* Function supportable_widening_operation
14095 Check whether an operation represented by the code CODE is a
14096 widening operation that is supported by the target platform in
14097 vector form (i.e., when operating on arguments of type VECTYPE_IN
14098 producing a result of type VECTYPE_OUT).
14100 Widening operations we currently support are NOP (CONVERT), FLOAT,
14101 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
14102 are supported by the target platform either directly (via vector
14103 tree-codes), or via target builtins.
14106 - CODE1 and CODE2 are codes of vector operations to be used when
14107 vectorizing the operation, if available.
14108 - MULTI_STEP_CVT determines the number of required intermediate steps in
14109 case of multi-step conversion (like char->short->int - in that case
14110 MULTI_STEP_CVT will be 1).
14111 - INTERM_TYPES contains the intermediate type required to perform the
14112 widening operation (short in the above example). */
14115 supportable_widening_operation (vec_info
*vinfo
,
14117 stmt_vec_info stmt_info
,
14118 tree vectype_out
, tree vectype_in
,
14119 code_helper
*code1
,
14120 code_helper
*code2
,
14121 int *multi_step_cvt
,
14122 vec
<tree
> *interm_types
)
14124 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
14125 class loop
*vect_loop
= NULL
;
14126 machine_mode vec_mode
;
14127 enum insn_code icode1
, icode2
;
14128 optab optab1
= unknown_optab
, optab2
= unknown_optab
;
14129 tree vectype
= vectype_in
;
14130 tree wide_vectype
= vectype_out
;
14131 tree_code c1
= MAX_TREE_CODES
, c2
= MAX_TREE_CODES
;
14133 tree prev_type
, intermediate_type
;
14134 machine_mode intermediate_mode
, prev_mode
;
14135 optab optab3
, optab4
;
14137 *multi_step_cvt
= 0;
14139 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
14141 switch (code
.safe_as_tree_code ())
14143 case MAX_TREE_CODES
:
14144 /* Don't set c1 and c2 if code is not a tree_code. */
14147 case WIDEN_MULT_EXPR
:
14148 /* The result of a vectorized widening operation usually requires
14149 two vectors (because the widened results do not fit into one vector).
14150 The generated vector results would normally be expected to be
14151 generated in the same order as in the original scalar computation,
14152 i.e. if 8 results are generated in each vector iteration, they are
14153 to be organized as follows:
14154 vect1: [res1,res2,res3,res4],
14155 vect2: [res5,res6,res7,res8].
14157 However, in the special case that the result of the widening
14158 operation is used in a reduction computation only, the order doesn't
14159 matter (because when vectorizing a reduction we change the order of
14160 the computation). Some targets can take advantage of this and
14161 generate more efficient code. For example, targets like Altivec,
14162 that support widen_mult using a sequence of {mult_even,mult_odd}
14163 generate the following vectors:
14164 vect1: [res1,res3,res5,res7],
14165 vect2: [res2,res4,res6,res8].
14167 When vectorizing outer-loops, we execute the inner-loop sequentially
14168 (each vectorized inner-loop iteration contributes to VF outer-loop
14169 iterations in parallel). We therefore don't allow to change the
14170 order of the computation in the inner-loop during outer-loop
14172 /* TODO: Another case in which order doesn't *really* matter is when we
14173 widen and then contract again, e.g. (short)((int)x * y >> 8).
14174 Normally, pack_trunc performs an even/odd permute, whereas the
14175 repack from an even/odd expansion would be an interleave, which
14176 would be significantly simpler for e.g. AVX2. */
14177 /* In any case, in order to avoid duplicating the code below, recurse
14178 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
14179 are properly set up for the caller. If we fail, we'll continue with
14180 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
14182 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
14183 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
14184 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
14185 stmt_info
, vectype_out
,
14187 code2
, multi_step_cvt
,
14190 /* Elements in a vector with vect_used_by_reduction property cannot
14191 be reordered if the use chain with this property does not have the
14192 same operation. One such an example is s += a * b, where elements
14193 in a and b cannot be reordered. Here we check if the vector defined
14194 by STMT is only directly used in the reduction statement. */
14195 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
14196 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
14198 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
14201 c1
= VEC_WIDEN_MULT_LO_EXPR
;
14202 c2
= VEC_WIDEN_MULT_HI_EXPR
;
14205 case DOT_PROD_EXPR
:
14206 c1
= DOT_PROD_EXPR
;
14207 c2
= DOT_PROD_EXPR
;
14215 case VEC_WIDEN_MULT_EVEN_EXPR
:
14216 /* Support the recursion induced just above. */
14217 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
14218 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
14221 case WIDEN_LSHIFT_EXPR
:
14222 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
14223 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
14227 c1
= VEC_UNPACK_LO_EXPR
;
14228 c2
= VEC_UNPACK_HI_EXPR
;
14232 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
14233 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
14236 case FIX_TRUNC_EXPR
:
14237 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
14238 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
14242 gcc_unreachable ();
14245 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
14246 std::swap (c1
, c2
);
14248 if (code
== FIX_TRUNC_EXPR
)
14250 /* The signedness is determined from output operand. */
14251 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
14252 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
14254 else if (CONVERT_EXPR_CODE_P (code
.safe_as_tree_code ())
14255 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
14256 && VECTOR_BOOLEAN_TYPE_P (vectype
)
14257 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
14258 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
14260 /* If the input and result modes are the same, a different optab
14261 is needed where we pass in the number of units in vectype. */
14262 optab1
= vec_unpacks_sbool_lo_optab
;
14263 optab2
= vec_unpacks_sbool_hi_optab
;
14266 vec_mode
= TYPE_MODE (vectype
);
14267 if (widening_fn_p (code
))
14269 /* If this is an internal fn then we must check whether the target
14270 supports either a low-high split or an even-odd split. */
14271 internal_fn ifn
= as_internal_fn ((combined_fn
) code
);
14273 internal_fn lo
, hi
, even
, odd
;
14274 lookup_hilo_internal_fn (ifn
, &lo
, &hi
);
14275 *code1
= as_combined_fn (lo
);
14276 *code2
= as_combined_fn (hi
);
14277 optab1
= direct_internal_fn_optab (lo
, {vectype
, vectype
});
14278 optab2
= direct_internal_fn_optab (hi
, {vectype
, vectype
});
14280 /* If we don't support low-high, then check for even-odd. */
14282 || (icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
14284 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
14286 lookup_evenodd_internal_fn (ifn
, &even
, &odd
);
14287 *code1
= as_combined_fn (even
);
14288 *code2
= as_combined_fn (odd
);
14289 optab1
= direct_internal_fn_optab (even
, {vectype
, vectype
});
14290 optab2
= direct_internal_fn_optab (odd
, {vectype
, vectype
});
14293 else if (code
.is_tree_code ())
14295 if (code
== FIX_TRUNC_EXPR
)
14297 /* The signedness is determined from output operand. */
14298 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
14299 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
14301 else if (CONVERT_EXPR_CODE_P ((tree_code
) code
.safe_as_tree_code ())
14302 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
14303 && VECTOR_BOOLEAN_TYPE_P (vectype
)
14304 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
14305 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
14307 /* If the input and result modes are the same, a different optab
14308 is needed where we pass in the number of units in vectype. */
14309 optab1
= vec_unpacks_sbool_lo_optab
;
14310 optab2
= vec_unpacks_sbool_hi_optab
;
14314 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
14315 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
14321 if (!optab1
|| !optab2
)
14324 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
14325 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
14329 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
14330 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
14332 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14334 /* For scalar masks we may have different boolean
14335 vector types having the same QImode. Thus we
14336 add additional check for elements number. */
14337 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
14338 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
14342 /* Check if it's a multi-step conversion that can be done using intermediate
14345 prev_type
= vectype
;
14346 prev_mode
= vec_mode
;
14348 if (!CONVERT_EXPR_CODE_P (code
.safe_as_tree_code ()))
14351 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14352 intermediate steps in promotion sequence. We try
14353 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
14355 interm_types
->create (MAX_INTERM_CVT_STEPS
);
14356 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
14358 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
14359 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
14361 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
14362 else if (VECTOR_MODE_P (intermediate_mode
))
14364 tree intermediate_element_type
14365 = lang_hooks
.types
.type_for_mode (GET_MODE_INNER (intermediate_mode
),
14366 TYPE_UNSIGNED (prev_type
));
14368 = build_vector_type_for_mode (intermediate_element_type
,
14369 intermediate_mode
);
14373 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
14374 TYPE_UNSIGNED (prev_type
));
14376 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
14377 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
14378 && intermediate_mode
== prev_mode
14379 && SCALAR_INT_MODE_P (prev_mode
))
14381 /* If the input and result modes are the same, a different optab
14382 is needed where we pass in the number of units in vectype. */
14383 optab3
= vec_unpacks_sbool_lo_optab
;
14384 optab4
= vec_unpacks_sbool_hi_optab
;
14388 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
14389 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
14392 if (!optab3
|| !optab4
14393 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
14394 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
14395 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
14396 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
14397 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
14398 == CODE_FOR_nothing
)
14399 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
14400 == CODE_FOR_nothing
))
14403 interm_types
->quick_push (intermediate_type
);
14404 (*multi_step_cvt
)++;
14406 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
14407 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
14409 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14411 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
14412 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
14416 prev_type
= intermediate_type
;
14417 prev_mode
= intermediate_mode
;
14420 interm_types
->release ();
14425 /* Function supportable_narrowing_operation
14427 Check whether an operation represented by the code CODE is a
14428 narrowing operation that is supported by the target platform in
14429 vector form (i.e., when operating on arguments of type VECTYPE_IN
14430 and producing a result of type VECTYPE_OUT).
14432 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14433 and FLOAT. This function checks if these operations are supported by
14434 the target platform directly via vector tree-codes.
14437 - CODE1 is the code of a vector operation to be used when
14438 vectorizing the operation, if available.
14439 - MULTI_STEP_CVT determines the number of required intermediate steps in
14440 case of multi-step conversion (like int->short->char - in that case
14441 MULTI_STEP_CVT will be 1).
14442 - INTERM_TYPES contains the intermediate type required to perform the
14443 narrowing operation (short in the above example). */
14446 supportable_narrowing_operation (code_helper code
,
14447 tree vectype_out
, tree vectype_in
,
14448 code_helper
*code1
, int *multi_step_cvt
,
14449 vec
<tree
> *interm_types
)
14451 machine_mode vec_mode
;
14452 enum insn_code icode1
;
14453 optab optab1
, interm_optab
;
14454 tree vectype
= vectype_in
;
14455 tree narrow_vectype
= vectype_out
;
14457 tree intermediate_type
, prev_type
;
14458 machine_mode intermediate_mode
, prev_mode
;
14460 unsigned HOST_WIDE_INT n_elts
;
14463 if (!code
.is_tree_code ())
14466 *multi_step_cvt
= 0;
14467 switch ((tree_code
) code
)
14470 c1
= VEC_PACK_TRUNC_EXPR
;
14471 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
14472 && VECTOR_BOOLEAN_TYPE_P (vectype
)
14473 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
))
14474 && TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&n_elts
)
14475 && n_elts
< BITS_PER_UNIT
)
14476 optab1
= vec_pack_sbool_trunc_optab
;
14478 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
14481 case FIX_TRUNC_EXPR
:
14482 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
14483 /* The signedness is determined from output operand. */
14484 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
14488 c1
= VEC_PACK_FLOAT_EXPR
;
14489 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
14493 gcc_unreachable ();
14499 vec_mode
= TYPE_MODE (vectype
);
14500 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
14505 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
14507 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14509 /* For scalar masks we may have different boolean
14510 vector types having the same QImode. Thus we
14511 add additional check for elements number. */
14512 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
14513 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
14517 if (code
== FLOAT_EXPR
)
14520 /* Check if it's a multi-step conversion that can be done using intermediate
14522 prev_mode
= vec_mode
;
14523 prev_type
= vectype
;
14524 if (code
== FIX_TRUNC_EXPR
)
14525 uns
= TYPE_UNSIGNED (vectype_out
);
14527 uns
= TYPE_UNSIGNED (vectype
);
14529 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14530 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14531 costly than signed. */
14532 if (code
== FIX_TRUNC_EXPR
&& uns
)
14534 enum insn_code icode2
;
14537 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
14539 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
14540 if (interm_optab
!= unknown_optab
14541 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
14542 && insn_data
[icode1
].operand
[0].mode
14543 == insn_data
[icode2
].operand
[0].mode
)
14546 optab1
= interm_optab
;
14551 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14552 intermediate steps in promotion sequence. We try
14553 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14554 interm_types
->create (MAX_INTERM_CVT_STEPS
);
14555 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
14557 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
14558 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
14560 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
14563 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
14564 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
14565 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
14566 && SCALAR_INT_MODE_P (prev_mode
)
14567 && TYPE_VECTOR_SUBPARTS (intermediate_type
).is_constant (&n_elts
)
14568 && n_elts
< BITS_PER_UNIT
)
14569 interm_optab
= vec_pack_sbool_trunc_optab
;
14572 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
14575 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
14576 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
14577 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
14578 == CODE_FOR_nothing
))
14581 interm_types
->quick_push (intermediate_type
);
14582 (*multi_step_cvt
)++;
14584 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
14586 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14588 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
14589 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
14593 prev_mode
= intermediate_mode
;
14594 prev_type
= intermediate_type
;
14595 optab1
= interm_optab
;
14598 interm_types
->release ();
14602 /* Function supportable_indirect_convert_operation
14604 Check whether an operation represented by the code CODE is single or multi
14605 operations that are supported by the target platform in
14606 vector form (i.e., when operating on arguments of type VECTYPE_IN
14607 producing a result of type VECTYPE_OUT).
14609 Convert operations we currently support directly are FIX_TRUNC and FLOAT.
14610 This function checks if these operations are supported
14611 by the target platform directly (via vector tree-codes).
14614 - converts contains some pairs to perform the convert operation,
14615 the pair's first is the intermediate type, and its second is the code of
14616 a vector operation to be used when converting the operation from the
14617 previous type to the intermediate type. */
14619 supportable_indirect_convert_operation (code_helper code
,
14622 vec
<std::pair
<tree
, tree_code
> > *converts
,
14625 bool found_mode
= false;
14626 scalar_mode lhs_mode
= GET_MODE_INNER (TYPE_MODE (vectype_out
));
14627 scalar_mode rhs_mode
= GET_MODE_INNER (TYPE_MODE (vectype_in
));
14628 opt_scalar_mode mode_iter
;
14629 tree_code tc1
, tc2
, code1
, code2
;
14631 tree cvt_type
= NULL_TREE
;
14632 poly_uint64 nelts
= TYPE_VECTOR_SUBPARTS (vectype_in
);
14634 if (supportable_convert_operation ((tree_code
) code
,
14639 converts
->safe_push (std::make_pair (vectype_out
, tc1
));
14643 /* For conversions between float and integer types try whether
14644 we can use intermediate signed integer types to support the
14646 if (GET_MODE_SIZE (lhs_mode
) != GET_MODE_SIZE (rhs_mode
)
14647 && (code
== FLOAT_EXPR
14648 || (code
== FIX_TRUNC_EXPR
&& !flag_trapping_math
)))
14650 bool demotion
= GET_MODE_SIZE (rhs_mode
) > GET_MODE_SIZE (lhs_mode
);
14651 bool float_expr_p
= code
== FLOAT_EXPR
;
14652 unsigned short target_size
;
14653 scalar_mode intermediate_mode
;
14656 intermediate_mode
= lhs_mode
;
14657 target_size
= GET_MODE_SIZE (rhs_mode
);
14661 target_size
= GET_MODE_SIZE (lhs_mode
);
14662 if (!int_mode_for_size
14663 (GET_MODE_BITSIZE (rhs_mode
), 0).exists (&intermediate_mode
))
14666 code1
= float_expr_p
? (tree_code
) code
: NOP_EXPR
;
14667 code2
= float_expr_p
? NOP_EXPR
: (tree_code
) code
;
14668 opt_scalar_mode mode_iter
;
14669 FOR_EACH_2XWIDER_MODE (mode_iter
, intermediate_mode
)
14671 intermediate_mode
= mode_iter
.require ();
14673 if (GET_MODE_SIZE (intermediate_mode
) > target_size
)
14676 scalar_mode cvt_mode
;
14677 if (!int_mode_for_size
14678 (GET_MODE_BITSIZE (intermediate_mode
), 0).exists (&cvt_mode
))
14681 cvt_type
= build_nonstandard_integer_type
14682 (GET_MODE_BITSIZE (cvt_mode
), 0);
14684 /* Check if the intermediate type can hold OP0's range.
14685 When converting from float to integer this is not necessary
14686 because values that do not fit the (smaller) target type are
14687 unspecified anyway. */
14688 if (demotion
&& float_expr_p
)
14690 wide_int op_min_value
, op_max_value
;
14691 /* For vector form, it looks like op0 doesn't have RANGE_INFO.
14692 In the future, if it is supported, changes may need to be made
14693 to this part, such as checking the RANGE of each element
14695 if ((TREE_CODE (op0
) == SSA_NAME
&& !SSA_NAME_RANGE_INFO (op0
))
14696 || !vect_get_range_info (op0
, &op_min_value
, &op_max_value
))
14699 if (cvt_type
== NULL_TREE
14700 || (wi::min_precision (op_max_value
, SIGNED
)
14701 > TYPE_PRECISION (cvt_type
))
14702 || (wi::min_precision (op_min_value
, SIGNED
)
14703 > TYPE_PRECISION (cvt_type
)))
14707 cvt_type
= get_related_vectype_for_scalar_type (TYPE_MODE (vectype_in
),
14710 /* This should only happened for SLP as long as loop vectorizer
14711 only supports same-sized vector. */
14712 if (cvt_type
== NULL_TREE
14713 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type
), nelts
)
14714 || !supportable_convert_operation ((tree_code
) code1
,
14717 || !supportable_convert_operation ((tree_code
) code2
,
14728 converts
->safe_push (std::make_pair (cvt_type
, tc2
));
14729 if (TYPE_MODE (cvt_type
) != TYPE_MODE (vectype_out
))
14730 converts
->safe_push (std::make_pair (vectype_out
, tc1
));
14737 /* Generate and return a vector mask of MASK_TYPE such that
14738 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14739 Add the statements to SEQ. */
14742 vect_gen_while (gimple_seq
*seq
, tree mask_type
, tree start_index
,
14743 tree end_index
, const char *name
)
14745 tree cmp_type
= TREE_TYPE (start_index
);
14746 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
14747 cmp_type
, mask_type
,
14748 OPTIMIZE_FOR_SPEED
));
14749 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
14750 start_index
, end_index
,
14751 build_zero_cst (mask_type
));
14754 tmp
= make_temp_ssa_name (mask_type
, NULL
, name
);
14756 tmp
= make_ssa_name (mask_type
);
14757 gimple_call_set_lhs (call
, tmp
);
14758 gimple_seq_add_stmt (seq
, call
);
14762 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
14763 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14766 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
14769 tree tmp
= vect_gen_while (seq
, mask_type
, start_index
, end_index
);
14770 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
14773 /* Try to compute the vector types required to vectorize STMT_INFO,
14774 returning true on success and false if vectorization isn't possible.
14775 If GROUP_SIZE is nonzero and we're performing BB vectorization,
14776 take sure that the number of elements in the vectors is no bigger
14781 - Set *STMT_VECTYPE_OUT to:
14782 - NULL_TREE if the statement doesn't need to be vectorized;
14783 - the equivalent of STMT_VINFO_VECTYPE otherwise.
14785 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14786 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14787 statement does not help to determine the overall number of units. */
14790 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
14791 tree
*stmt_vectype_out
,
14792 tree
*nunits_vectype_out
,
14793 unsigned int group_size
)
14795 gimple
*stmt
= stmt_info
->stmt
;
14797 /* For BB vectorization, we should always have a group size once we've
14798 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14799 are tentative requests during things like early data reference
14800 analysis and pattern recognition. */
14801 if (is_a
<bb_vec_info
> (vinfo
))
14802 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
14806 *stmt_vectype_out
= NULL_TREE
;
14807 *nunits_vectype_out
= NULL_TREE
;
14809 if (gimple_get_lhs (stmt
) == NULL_TREE
14810 /* Allow vector conditionals through here. */
14811 && !is_a
<gcond
*> (stmt
)
14812 /* MASK_STORE has no lhs, but is ok. */
14813 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
14815 if (is_a
<gcall
*> (stmt
))
14817 /* Ignore calls with no lhs. These must be calls to
14818 #pragma omp simd functions, and what vectorization factor
14819 it really needs can't be determined until
14820 vectorizable_simd_clone_call. */
14821 if (dump_enabled_p ())
14822 dump_printf_loc (MSG_NOTE
, vect_location
,
14823 "defer to SIMD clone analysis.\n");
14824 return opt_result::success ();
14827 return opt_result::failure_at (stmt
,
14828 "not vectorized: irregular stmt: %G", stmt
);
14832 tree scalar_type
= NULL_TREE
;
14833 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
14835 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
14836 if (dump_enabled_p ())
14837 dump_printf_loc (MSG_NOTE
, vect_location
,
14838 "precomputed vectype: %T\n", vectype
);
14840 else if (vect_use_mask_type_p (stmt_info
))
14842 unsigned int precision
= stmt_info
->mask_precision
;
14843 scalar_type
= build_nonstandard_integer_type (precision
, 1);
14844 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
14846 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
14847 " data-type %T\n", scalar_type
);
14848 if (dump_enabled_p ())
14849 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
14853 /* If we got here with a gcond it means that the target had no available vector
14854 mode for the scalar type. We can't vectorize so abort. */
14855 if (is_a
<gcond
*> (stmt
))
14856 return opt_result::failure_at (stmt
,
14858 " unsupported data-type for gcond %T\n",
14861 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
14862 scalar_type
= TREE_TYPE (DR_REF (dr
));
14863 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
14864 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
14866 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
14868 if (dump_enabled_p ())
14871 dump_printf_loc (MSG_NOTE
, vect_location
,
14872 "get vectype for scalar type (group size %d):"
14873 " %T\n", group_size
, scalar_type
);
14875 dump_printf_loc (MSG_NOTE
, vect_location
,
14876 "get vectype for scalar type: %T\n", scalar_type
);
14878 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
14880 return opt_result::failure_at (stmt
,
14882 " unsupported data-type %T\n",
14885 if (dump_enabled_p ())
14886 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
14889 if (scalar_type
&& VECTOR_MODE_P (TYPE_MODE (scalar_type
)))
14890 return opt_result::failure_at (stmt
,
14891 "not vectorized: vector stmt in loop:%G",
14894 *stmt_vectype_out
= vectype
;
14896 /* Don't try to compute scalar types if the stmt produces a boolean
14897 vector; use the existing vector type instead. */
14898 tree nunits_vectype
= vectype
;
14899 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14901 /* The number of units is set according to the smallest scalar
14902 type (or the largest vector size, but we only support one
14903 vector size per vectorization). */
14904 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
14905 TREE_TYPE (vectype
));
14906 if (!types_compatible_p (scalar_type
, TREE_TYPE (vectype
)))
14908 if (dump_enabled_p ())
14909 dump_printf_loc (MSG_NOTE
, vect_location
,
14910 "get vectype for smallest scalar type: %T\n",
14912 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
14914 if (!nunits_vectype
)
14915 return opt_result::failure_at
14916 (stmt
, "not vectorized: unsupported data-type %T\n",
14918 if (dump_enabled_p ())
14919 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
14924 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
14925 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)))
14926 return opt_result::failure_at (stmt
,
14927 "Not vectorized: Incompatible number "
14928 "of vector subparts between %T and %T\n",
14929 nunits_vectype
, *stmt_vectype_out
);
14931 if (dump_enabled_p ())
14933 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
14934 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
14935 dump_printf (MSG_NOTE
, "\n");
14938 *nunits_vectype_out
= nunits_vectype
;
14939 return opt_result::success ();
14942 /* Generate and return statement sequence that sets vector length LEN that is:
14944 min_of_start_and_end = min (START_INDEX, END_INDEX);
14945 left_len = END_INDEX - min_of_start_and_end;
14946 rhs = min (left_len, LEN_LIMIT);
14949 Note: the cost of the code generated by this function is modeled
14950 by vect_estimate_min_profitable_iters, so changes here may need
14951 corresponding changes there. */
14954 vect_gen_len (tree len
, tree start_index
, tree end_index
, tree len_limit
)
14956 gimple_seq stmts
= NULL
;
14957 tree len_type
= TREE_TYPE (len
);
14958 gcc_assert (TREE_TYPE (start_index
) == len_type
);
14960 tree min
= gimple_build (&stmts
, MIN_EXPR
, len_type
, start_index
, end_index
);
14961 tree left_len
= gimple_build (&stmts
, MINUS_EXPR
, len_type
, end_index
, min
);
14962 tree rhs
= gimple_build (&stmts
, MIN_EXPR
, len_type
, left_len
, len_limit
);
14963 gimple
* stmt
= gimple_build_assign (len
, rhs
);
14964 gimple_seq_add_stmt (&stmts
, stmt
);