1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
59 #include "optabs-libfuncs.h"
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
64 /* Return the vectorized type for the given statement. */
67 stmt_vectype (class _stmt_vec_info
*stmt_info
)
69 return STMT_VINFO_VECTYPE (stmt_info
);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
75 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
77 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
78 basic_block bb
= gimple_bb (stmt
);
79 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
85 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
87 return (bb
->loop_father
== loop
->inner
);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
95 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
96 enum vect_cost_for_stmt kind
,
97 stmt_vec_info stmt_info
, slp_tree node
,
98 tree vectype
, int misalign
,
99 enum vect_cost_model_location where
)
101 if ((kind
== vector_load
|| kind
== unaligned_load
)
102 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
103 kind
= vector_gather_load
;
104 if ((kind
== vector_store
|| kind
== unaligned_store
)
105 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
106 kind
= vector_scatter_store
;
108 stmt_info_for_cost si
109 = { count
, kind
, where
, stmt_info
, node
, vectype
, misalign
};
110 body_cost_vec
->safe_push (si
);
113 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
117 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
118 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
119 tree vectype
, int misalign
,
120 enum vect_cost_model_location where
)
122 return record_stmt_cost (body_cost_vec
, count
, kind
, stmt_info
, NULL
,
123 vectype
, misalign
, where
);
127 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
128 enum vect_cost_for_stmt kind
, slp_tree node
,
129 tree vectype
, int misalign
,
130 enum vect_cost_model_location where
)
132 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, node
,
133 vectype
, misalign
, where
);
137 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
138 enum vect_cost_for_stmt kind
,
139 enum vect_cost_model_location where
)
141 gcc_assert (kind
== cond_branch_taken
|| kind
== cond_branch_not_taken
142 || kind
== scalar_stmt
);
143 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, NULL
,
144 NULL_TREE
, 0, where
);
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
150 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
152 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
162 read_vector_array (vec_info
*vinfo
,
163 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
164 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
166 tree vect_type
, vect
, vect_name
, array_ref
;
169 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
170 vect_type
= TREE_TYPE (TREE_TYPE (array
));
171 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
172 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
173 build_int_cst (size_type_node
, n
),
174 NULL_TREE
, NULL_TREE
);
176 new_stmt
= gimple_build_assign (vect
, array_ref
);
177 vect_name
= make_ssa_name (vect
, new_stmt
);
178 gimple_assign_set_lhs (new_stmt
, vect_name
);
179 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
189 write_vector_array (vec_info
*vinfo
,
190 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
191 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
196 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
197 build_int_cst (size_type_node
, n
),
198 NULL_TREE
, NULL_TREE
);
200 new_stmt
= gimple_build_assign (array_ref
, vect
);
201 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
209 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
213 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
223 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
224 gimple_stmt_iterator
*gsi
, tree var
)
226 tree clobber
= build_clobber (TREE_TYPE (var
));
227 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
228 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
233 /* Function vect_mark_relevant.
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
238 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
239 enum vect_relevant relevant
, bool live_p
)
241 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
242 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE
, vect_location
,
246 "mark relevant %d, live %d: %G", relevant
, live_p
,
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE
, vect_location
,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
265 stmt_vec_info old_stmt_info
= stmt_info
;
266 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
268 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
269 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
271 if (live_p
&& relevant
== vect_unused_in_scope
)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE
, vect_location
,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
277 relevant
= vect_used_only_live
;
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE
, vect_location
,
282 "mark relevant %d, live %d: %G", relevant
, live_p
,
286 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
287 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
288 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
290 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE
, vect_location
,
295 "already marked relevant/live.\n");
299 worklist
->safe_push (stmt_info
);
303 /* Function is_simple_and_all_uses_invariant
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
309 loop_vec_info loop_vinfo
)
314 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
318 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
320 enum vect_def_type dt
= vect_uninitialized_def
;
322 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
326 "use not simple.\n");
330 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
336 /* Function vect_stmt_relevant_p.
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
346 CHECKME: what other side effects would the vectorizer allow? */
349 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
350 enum vect_relevant
*relevant
, bool *live_p
)
352 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
354 imm_use_iterator imm_iter
;
358 *relevant
= vect_unused_in_scope
;
361 /* cond stmt other than loop exit cond. */
362 if (is_ctrl_stmt (stmt_info
->stmt
)
363 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
364 *relevant
= vect_used_in_scope
;
366 /* changing memory. */
367 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
368 if (gimple_vdef (stmt_info
->stmt
)
369 && !gimple_clobber_p (stmt_info
->stmt
))
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE
, vect_location
,
373 "vec_stmt_relevant_p: stmt has vdefs.\n");
374 *relevant
= vect_used_in_scope
;
377 /* uses outside the loop. */
378 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
380 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
382 basic_block bb
= gimple_bb (USE_STMT (use_p
));
383 if (!flow_bb_inside_loop_p (loop
, bb
))
385 if (is_gimple_debug (USE_STMT (use_p
)))
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE
, vect_location
,
390 "vec_stmt_relevant_p: used out of loop.\n");
392 /* We expect all such uses to be in the loop exit phis
393 (because of loop closed form) */
394 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
395 gcc_assert (bb
== single_exit (loop
)->dest
);
402 if (*live_p
&& *relevant
== vect_unused_in_scope
403 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
405 if (dump_enabled_p ())
406 dump_printf_loc (MSG_NOTE
, vect_location
,
407 "vec_stmt_relevant_p: stmt live but not relevant.\n");
408 *relevant
= vect_used_only_live
;
411 return (*live_p
|| *relevant
);
415 /* Function exist_non_indexing_operands_for_use_p
417 USE is one of the uses attached to STMT_INFO. Check if USE is
418 used in STMT_INFO for anything other than indexing an array. */
421 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
425 /* USE corresponds to some operand in STMT. If there is no data
426 reference in STMT, then any operand that corresponds to USE
427 is not indexing an array. */
428 if (!STMT_VINFO_DATA_REF (stmt_info
))
431 /* STMT has a data_ref. FORNOW this means that its of one of
435 (This should have been verified in analyze_data_refs).
437 'var' in the second case corresponds to a def, not a use,
438 so USE cannot correspond to any operands that are not used
441 Therefore, all we need to check is if STMT falls into the
442 first case, and whether var corresponds to USE. */
444 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
445 if (!assign
|| !gimple_assign_copy_p (assign
))
447 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
448 if (call
&& gimple_call_internal_p (call
))
450 internal_fn ifn
= gimple_call_internal_fn (call
);
451 int mask_index
= internal_fn_mask_index (ifn
);
453 && use
== gimple_call_arg (call
, mask_index
))
455 int stored_value_index
= internal_fn_stored_value_index (ifn
);
456 if (stored_value_index
>= 0
457 && use
== gimple_call_arg (call
, stored_value_index
))
459 if (internal_gather_scatter_fn_p (ifn
)
460 && use
== gimple_call_arg (call
, 1))
466 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
468 operand
= gimple_assign_rhs1 (assign
);
469 if (TREE_CODE (operand
) != SSA_NAME
)
480 Function process_use.
483 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
484 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
485 that defined USE. This is done by calling mark_relevant and passing it
486 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
487 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
491 Generally, LIVE_P and RELEVANT are used to define the liveness and
492 relevance info of the DEF_STMT of this USE:
493 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
494 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
496 - case 1: If USE is used only for address computations (e.g. array indexing),
497 which does not need to be directly vectorized, then the liveness/relevance
498 of the respective DEF_STMT is left unchanged.
499 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
500 we skip DEF_STMT cause it had already been processed.
501 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
502 "relevant" will be modified accordingly.
504 Return true if everything is as expected. Return false otherwise. */
507 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
508 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
511 stmt_vec_info dstmt_vinfo
;
512 enum vect_def_type dt
;
514 /* case 1: we are only interested in uses that need to be vectorized. Uses
515 that are used for address computation are not considered relevant. */
516 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
517 return opt_result::success ();
519 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
520 return opt_result::failure_at (stmt_vinfo
->stmt
,
522 " unsupported use in stmt.\n");
525 return opt_result::success ();
527 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
528 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
530 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
531 We have to force the stmt live since the epilogue loop needs it to
532 continue computing the reduction. */
533 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
534 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
535 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
537 && bb
->loop_father
== def_bb
->loop_father
)
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_NOTE
, vect_location
,
541 "reduc-stmt defining reduc-phi in the same nest.\n");
542 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
543 return opt_result::success ();
546 /* case 3a: outer-loop stmt defining an inner-loop stmt:
547 outer-loop-header-bb:
553 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_NOTE
, vect_location
,
557 "outer-loop def-stmt defining inner-loop stmt.\n");
561 case vect_unused_in_scope
:
562 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
563 vect_used_in_scope
: vect_unused_in_scope
;
566 case vect_used_in_outer_by_reduction
:
567 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
568 relevant
= vect_used_by_reduction
;
571 case vect_used_in_outer
:
572 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
573 relevant
= vect_used_in_scope
;
576 case vect_used_in_scope
:
584 /* case 3b: inner-loop stmt defining an outer-loop stmt:
585 outer-loop-header-bb:
589 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
591 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
593 if (dump_enabled_p ())
594 dump_printf_loc (MSG_NOTE
, vect_location
,
595 "inner-loop def-stmt defining outer-loop stmt.\n");
599 case vect_unused_in_scope
:
600 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
601 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
602 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
605 case vect_used_by_reduction
:
606 case vect_used_only_live
:
607 relevant
= vect_used_in_outer_by_reduction
;
610 case vect_used_in_scope
:
611 relevant
= vect_used_in_outer
;
618 /* We are also not interested in uses on loop PHI backedges that are
619 inductions. Otherwise we'll needlessly vectorize the IV increment
620 and cause hybrid SLP for SLP inductions. Unless the PHI is live
622 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
623 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
624 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
625 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
626 loop_latch_edge (bb
->loop_father
))
629 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE
, vect_location
,
631 "induction value on backedge.\n");
632 return opt_result::success ();
636 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
637 return opt_result::success ();
641 /* Function vect_mark_stmts_to_be_vectorized.
643 Not all stmts in the loop need to be vectorized. For example:
652 Stmt 1 and 3 do not need to be vectorized, because loop control and
653 addressing of vectorized data-refs are handled differently.
655 This pass detects such stmts. */
658 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
660 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
661 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
662 unsigned int nbbs
= loop
->num_nodes
;
663 gimple_stmt_iterator si
;
667 enum vect_relevant relevant
;
669 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
671 auto_vec
<stmt_vec_info
, 64> worklist
;
673 /* 1. Init worklist. */
674 for (i
= 0; i
< nbbs
; i
++)
677 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
679 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
684 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
685 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
687 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
689 if (is_gimple_debug (gsi_stmt (si
)))
691 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
692 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE
, vect_location
,
694 "init: stmt relevant? %G", stmt_info
->stmt
);
696 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
697 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
701 /* 2. Process_worklist */
702 while (worklist
.length () > 0)
707 stmt_vec_info stmt_vinfo
= worklist
.pop ();
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE
, vect_location
,
710 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
712 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
713 (DEF_STMT) as relevant/irrelevant according to the relevance property
715 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
717 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
718 propagated as is to the DEF_STMTs of its USEs.
720 One exception is when STMT has been identified as defining a reduction
721 variable; in this case we set the relevance to vect_used_by_reduction.
722 This is because we distinguish between two kinds of relevant stmts -
723 those that are used by a reduction computation, and those that are
724 (also) used by a regular computation. This allows us later on to
725 identify stmts that are used solely by a reduction, and therefore the
726 order of the results that they produce does not have to be kept. */
728 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
730 case vect_reduction_def
:
731 gcc_assert (relevant
!= vect_unused_in_scope
);
732 if (relevant
!= vect_unused_in_scope
733 && relevant
!= vect_used_in_scope
734 && relevant
!= vect_used_by_reduction
735 && relevant
!= vect_used_only_live
)
736 return opt_result::failure_at
737 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
740 case vect_nested_cycle
:
741 if (relevant
!= vect_unused_in_scope
742 && relevant
!= vect_used_in_outer_by_reduction
743 && relevant
!= vect_used_in_outer
)
744 return opt_result::failure_at
745 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
748 case vect_double_reduction_def
:
749 if (relevant
!= vect_unused_in_scope
750 && relevant
!= vect_used_by_reduction
751 && relevant
!= vect_used_only_live
)
752 return opt_result::failure_at
753 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
760 if (is_pattern_stmt_p (stmt_vinfo
))
762 /* Pattern statements are not inserted into the code, so
763 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
764 have to scan the RHS or function arguments instead. */
765 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
767 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
768 tree op
= gimple_assign_rhs1 (assign
);
771 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
774 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
775 loop_vinfo
, relevant
, &worklist
, false);
778 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
779 loop_vinfo
, relevant
, &worklist
, false);
784 for (; i
< gimple_num_ops (assign
); i
++)
786 op
= gimple_op (assign
, i
);
787 if (TREE_CODE (op
) == SSA_NAME
)
790 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
797 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
799 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
801 tree arg
= gimple_call_arg (call
, i
);
803 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
811 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
813 tree op
= USE_FROM_PTR (use_p
);
815 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
821 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
823 gather_scatter_info gs_info
;
824 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
827 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
836 } /* while worklist */
838 return opt_result::success ();
841 /* Function vect_model_simple_cost.
843 Models cost for simple operations, i.e. those that only emit ncopies of a
844 single op. Right now, this does not account for multiple insns that could
845 be generated for the single vector op. We will handle that shortly. */
848 vect_model_simple_cost (vec_info
*,
849 stmt_vec_info stmt_info
, int ncopies
,
850 enum vect_def_type
*dt
,
853 stmt_vector_for_cost
*cost_vec
,
854 vect_cost_for_stmt kind
= vector_stmt
)
856 int inside_cost
= 0, prologue_cost
= 0;
858 gcc_assert (cost_vec
!= NULL
);
860 /* ??? Somehow we need to fix this at the callers. */
862 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
865 /* Cost the "broadcast" of a scalar operand in to a vector operand.
866 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
868 for (int i
= 0; i
< ndts
; i
++)
869 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
870 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
871 stmt_info
, 0, vect_prologue
);
873 /* Pass the inside-of-loop statements to the target-specific cost model. */
874 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
875 stmt_info
, 0, vect_body
);
877 if (dump_enabled_p ())
878 dump_printf_loc (MSG_NOTE
, vect_location
,
879 "vect_model_simple_cost: inside_cost = %d, "
880 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
884 /* Model cost for type demotion and promotion operations. PWR is
885 normally zero for single-step promotions and demotions. It will be
886 one if two-step promotion/demotion is required, and so on. NCOPIES
887 is the number of vector results (and thus number of instructions)
888 for the narrowest end of the operation chain. Each additional
889 step doubles the number of instructions required. If WIDEN_ARITH
890 is true the stmt is doing widening arithmetic. */
893 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
894 enum vect_def_type
*dt
,
895 unsigned int ncopies
, int pwr
,
896 stmt_vector_for_cost
*cost_vec
,
900 int inside_cost
= 0, prologue_cost
= 0;
902 for (i
= 0; i
< pwr
+ 1; i
++)
904 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
,
906 ? vector_stmt
: vec_promote_demote
,
907 stmt_info
, 0, vect_body
);
911 /* FORNOW: Assuming maximum 2 args per stmts. */
912 for (i
= 0; i
< 2; i
++)
913 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
914 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
915 stmt_info
, 0, vect_prologue
);
917 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE
, vect_location
,
919 "vect_model_promotion_demotion_cost: inside_cost = %d, "
920 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
923 /* Returns true if the current function returns DECL. */
926 cfun_returns (tree decl
)
930 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
932 greturn
*ret
= safe_dyn_cast
<greturn
*> (*gsi_last_bb (e
->src
));
935 if (gimple_return_retval (ret
) == decl
)
937 /* We often end up with an aggregate copy to the result decl,
938 handle that case as well. First skip intermediate clobbers
943 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
945 while (gimple_clobber_p (def
));
946 if (is_a
<gassign
*> (def
)
947 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
948 && gimple_assign_rhs1 (def
) == decl
)
954 /* Function vect_model_store_cost
956 Models cost for stores. In the case of grouped accesses, one access
957 has the overhead of the grouped access attributed to it. */
960 vect_model_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
961 vect_memory_access_type memory_access_type
,
962 gather_scatter_info
*gs_info
,
963 dr_alignment_support alignment_support_scheme
,
965 vec_load_store_type vls_type
, slp_tree slp_node
,
966 stmt_vector_for_cost
*cost_vec
)
968 unsigned int inside_cost
= 0, prologue_cost
= 0;
969 stmt_vec_info first_stmt_info
= stmt_info
;
970 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
972 /* ??? Somehow we need to fix this at the callers. */
974 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
976 if (vls_type
== VLS_STORE_INVARIANT
)
979 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
980 stmt_info
, 0, vect_prologue
);
983 /* Grouped stores update all elements in the group at once,
984 so we want the DR for the first statement. */
985 if (!slp_node
&& grouped_access_p
)
986 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
988 /* True if we should include any once-per-group costs as well as
989 the cost of the statement itself. For SLP we only get called
990 once per group anyhow. */
991 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
993 /* We assume that the cost of a single store-lanes instruction is
994 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
995 access is instead being provided by a permute-and-store operation,
996 include the cost of the permutes. */
998 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1000 /* Uses a high and low interleave or shuffle operations for each
1002 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1003 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1004 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1005 stmt_info
, 0, vect_body
);
1007 if (dump_enabled_p ())
1008 dump_printf_loc (MSG_NOTE
, vect_location
,
1009 "vect_model_store_cost: strided group_size = %d .\n",
1013 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1014 /* Costs of the stores. */
1015 if (memory_access_type
== VMAT_ELEMENTWISE
1016 || memory_access_type
== VMAT_GATHER_SCATTER
)
1018 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1019 if (memory_access_type
== VMAT_GATHER_SCATTER
1020 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
1021 /* For emulated scatter N offset vector element extracts
1022 (we assume the scalar scaling and ptr + offset add is consumed by
1024 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
* assumed_nunits
,
1025 vec_to_scalar
, stmt_info
, 0,
1027 /* N scalar stores plus extracting the elements. */
1028 inside_cost
+= record_stmt_cost (cost_vec
,
1029 ncopies
* assumed_nunits
,
1030 scalar_store
, stmt_info
, 0, vect_body
);
1033 vect_get_store_cost (vinfo
, stmt_info
, ncopies
, alignment_support_scheme
,
1034 misalignment
, &inside_cost
, cost_vec
);
1036 if (memory_access_type
== VMAT_ELEMENTWISE
1037 || memory_access_type
== VMAT_STRIDED_SLP
1038 || (memory_access_type
== VMAT_GATHER_SCATTER
1039 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
))
1041 /* N scalar stores plus extracting the elements. */
1042 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1043 inside_cost
+= record_stmt_cost (cost_vec
,
1044 ncopies
* assumed_nunits
,
1045 vec_to_scalar
, stmt_info
, 0, vect_body
);
1048 /* When vectorizing a store into the function result assign
1049 a penalty if the function returns in a multi-register location.
1050 In this case we assume we'll end up with having to spill the
1051 vector result and do piecewise loads as a conservative estimate. */
1052 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
1054 && (TREE_CODE (base
) == RESULT_DECL
1055 || (DECL_P (base
) && cfun_returns (base
)))
1056 && !aggregate_value_p (base
, cfun
->decl
))
1058 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
1059 /* ??? Handle PARALLEL in some way. */
1062 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1063 /* Assume that a single reg-reg move is possible and cheap,
1064 do not account for vector to gp register move cost. */
1068 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1070 stmt_info
, 0, vect_epilogue
);
1072 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1074 stmt_info
, 0, vect_epilogue
);
1079 if (dump_enabled_p ())
1080 dump_printf_loc (MSG_NOTE
, vect_location
,
1081 "vect_model_store_cost: inside_cost = %d, "
1082 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1086 /* Calculate cost of DR's memory access. */
1088 vect_get_store_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1089 dr_alignment_support alignment_support_scheme
,
1091 unsigned int *inside_cost
,
1092 stmt_vector_for_cost
*body_cost_vec
)
1094 switch (alignment_support_scheme
)
1098 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1099 vector_store
, stmt_info
, 0,
1102 if (dump_enabled_p ())
1103 dump_printf_loc (MSG_NOTE
, vect_location
,
1104 "vect_model_store_cost: aligned.\n");
1108 case dr_unaligned_supported
:
1110 /* Here, we assign an additional cost for the unaligned store. */
1111 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1112 unaligned_store
, stmt_info
,
1113 misalignment
, vect_body
);
1114 if (dump_enabled_p ())
1115 dump_printf_loc (MSG_NOTE
, vect_location
,
1116 "vect_model_store_cost: unaligned supported by "
1121 case dr_unaligned_unsupported
:
1123 *inside_cost
= VECT_MAX_COST
;
1125 if (dump_enabled_p ())
1126 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1127 "vect_model_store_cost: unsupported access.\n");
1136 /* Calculate cost of DR's memory access. */
1138 vect_get_load_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1139 dr_alignment_support alignment_support_scheme
,
1141 bool add_realign_cost
, unsigned int *inside_cost
,
1142 unsigned int *prologue_cost
,
1143 stmt_vector_for_cost
*prologue_cost_vec
,
1144 stmt_vector_for_cost
*body_cost_vec
,
1145 bool record_prologue_costs
)
1147 switch (alignment_support_scheme
)
1151 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1152 stmt_info
, 0, vect_body
);
1154 if (dump_enabled_p ())
1155 dump_printf_loc (MSG_NOTE
, vect_location
,
1156 "vect_model_load_cost: aligned.\n");
1160 case dr_unaligned_supported
:
1162 /* Here, we assign an additional cost for the unaligned load. */
1163 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1164 unaligned_load
, stmt_info
,
1165 misalignment
, vect_body
);
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE
, vect_location
,
1169 "vect_model_load_cost: unaligned supported by "
1174 case dr_explicit_realign
:
1176 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1177 vector_load
, stmt_info
, 0, vect_body
);
1178 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1179 vec_perm
, stmt_info
, 0, vect_body
);
1181 /* FIXME: If the misalignment remains fixed across the iterations of
1182 the containing loop, the following cost should be added to the
1184 if (targetm
.vectorize
.builtin_mask_for_load
)
1185 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1186 stmt_info
, 0, vect_body
);
1188 if (dump_enabled_p ())
1189 dump_printf_loc (MSG_NOTE
, vect_location
,
1190 "vect_model_load_cost: explicit realign\n");
1194 case dr_explicit_realign_optimized
:
1196 if (dump_enabled_p ())
1197 dump_printf_loc (MSG_NOTE
, vect_location
,
1198 "vect_model_load_cost: unaligned software "
1201 /* Unaligned software pipeline has a load of an address, an initial
1202 load, and possibly a mask operation to "prime" the loop. However,
1203 if this is an access in a group of loads, which provide grouped
1204 access, then the above cost should only be considered for one
1205 access in the group. Inside the loop, there is a load op
1206 and a realignment op. */
1208 if (add_realign_cost
&& record_prologue_costs
)
1210 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1211 vector_stmt
, stmt_info
,
1213 if (targetm
.vectorize
.builtin_mask_for_load
)
1214 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1215 vector_stmt
, stmt_info
,
1219 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1220 stmt_info
, 0, vect_body
);
1221 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1222 stmt_info
, 0, vect_body
);
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE
, vect_location
,
1226 "vect_model_load_cost: explicit realign optimized"
1232 case dr_unaligned_unsupported
:
1234 *inside_cost
= VECT_MAX_COST
;
1236 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1238 "vect_model_load_cost: unsupported access.\n");
1247 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1248 the loop preheader for the vectorized stmt STMT_VINFO. */
1251 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1252 gimple_stmt_iterator
*gsi
)
1255 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1257 vinfo
->insert_on_entry (stmt_vinfo
, new_stmt
);
1259 if (dump_enabled_p ())
1260 dump_printf_loc (MSG_NOTE
, vect_location
,
1261 "created new init_stmt: %G", new_stmt
);
1264 /* Function vect_init_vector.
1266 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1267 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1268 vector type a vector with all elements equal to VAL is created first.
1269 Place the initialization at GSI if it is not NULL. Otherwise, place the
1270 initialization at the loop preheader.
1271 Return the DEF of INIT_STMT.
1272 It will be used in the vectorization of STMT_INFO. */
1275 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1276 gimple_stmt_iterator
*gsi
)
1281 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1282 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1284 gcc_assert (VECTOR_TYPE_P (type
));
1285 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1287 /* Scalar boolean value should be transformed into
1288 all zeros or all ones value before building a vector. */
1289 if (VECTOR_BOOLEAN_TYPE_P (type
))
1291 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1292 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1294 if (CONSTANT_CLASS_P (val
))
1295 val
= integer_zerop (val
) ? false_val
: true_val
;
1298 new_temp
= make_ssa_name (TREE_TYPE (type
));
1299 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1300 val
, true_val
, false_val
);
1301 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1307 gimple_seq stmts
= NULL
;
1308 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1309 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1310 TREE_TYPE (type
), val
);
1312 /* ??? Condition vectorization expects us to do
1313 promotion of invariant/external defs. */
1314 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1315 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1316 !gsi_end_p (gsi2
); )
1318 init_stmt
= gsi_stmt (gsi2
);
1319 gsi_remove (&gsi2
, false);
1320 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1324 val
= build_vector_from_val (type
, val
);
1327 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1328 init_stmt
= gimple_build_assign (new_temp
, val
);
1329 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1334 /* Function vect_get_vec_defs_for_operand.
1336 OP is an operand in STMT_VINFO. This function returns a vector of
1337 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1339 In the case that OP is an SSA_NAME which is defined in the loop, then
1340 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1342 In case OP is an invariant or constant, a new stmt that creates a vector def
1343 needs to be introduced. VECTYPE may be used to specify a required type for
1344 vector invariant. */
1347 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1349 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1352 enum vect_def_type dt
;
1354 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1356 if (dump_enabled_p ())
1357 dump_printf_loc (MSG_NOTE
, vect_location
,
1358 "vect_get_vec_defs_for_operand: %T\n", op
);
1360 stmt_vec_info def_stmt_info
;
1361 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1362 &def_stmt_info
, &def_stmt
);
1363 gcc_assert (is_simple_use
);
1364 if (def_stmt
&& dump_enabled_p ())
1365 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1367 vec_oprnds
->create (ncopies
);
1368 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1370 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1374 vector_type
= vectype
;
1375 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1376 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1377 vector_type
= truth_type_for (stmt_vectype
);
1379 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1381 gcc_assert (vector_type
);
1382 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1384 vec_oprnds
->quick_push (vop
);
1388 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1389 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1390 for (unsigned i
= 0; i
< ncopies
; ++i
)
1391 vec_oprnds
->quick_push (gimple_get_lhs
1392 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1397 /* Get vectorized definitions for OP0 and OP1. */
1400 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1402 tree op0
, vec
<tree
> *vec_oprnds0
, tree vectype0
,
1403 tree op1
, vec
<tree
> *vec_oprnds1
, tree vectype1
,
1404 tree op2
, vec
<tree
> *vec_oprnds2
, tree vectype2
,
1405 tree op3
, vec
<tree
> *vec_oprnds3
, tree vectype3
)
1410 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1412 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1414 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1416 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1421 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1422 op0
, vec_oprnds0
, vectype0
);
1424 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1425 op1
, vec_oprnds1
, vectype1
);
1427 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1428 op2
, vec_oprnds2
, vectype2
);
1430 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1431 op3
, vec_oprnds3
, vectype3
);
1436 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1438 tree op0
, vec
<tree
> *vec_oprnds0
,
1439 tree op1
, vec
<tree
> *vec_oprnds1
,
1440 tree op2
, vec
<tree
> *vec_oprnds2
,
1441 tree op3
, vec
<tree
> *vec_oprnds3
)
1443 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1444 op0
, vec_oprnds0
, NULL_TREE
,
1445 op1
, vec_oprnds1
, NULL_TREE
,
1446 op2
, vec_oprnds2
, NULL_TREE
,
1447 op3
, vec_oprnds3
, NULL_TREE
);
1450 /* Helper function called by vect_finish_replace_stmt and
1451 vect_finish_stmt_generation. Set the location of the new
1452 statement and create and return a stmt_vec_info for it. */
1455 vect_finish_stmt_generation_1 (vec_info
*,
1456 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1458 if (dump_enabled_p ())
1459 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1463 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1465 /* While EH edges will generally prevent vectorization, stmt might
1466 e.g. be in a must-not-throw region. Ensure newly created stmts
1467 that could throw are part of the same region. */
1468 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1469 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1470 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1473 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1476 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1477 which sets the same scalar result as STMT_INFO did. Create and return a
1478 stmt_vec_info for VEC_STMT. */
1481 vect_finish_replace_stmt (vec_info
*vinfo
,
1482 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1484 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1485 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1487 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1488 gsi_replace (&gsi
, vec_stmt
, true);
1490 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1493 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1494 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1497 vect_finish_stmt_generation (vec_info
*vinfo
,
1498 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1499 gimple_stmt_iterator
*gsi
)
1501 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1503 if (!gsi_end_p (*gsi
)
1504 && gimple_has_mem_ops (vec_stmt
))
1506 gimple
*at_stmt
= gsi_stmt (*gsi
);
1507 tree vuse
= gimple_vuse (at_stmt
);
1508 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1510 tree vdef
= gimple_vdef (at_stmt
);
1511 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1512 gimple_set_modified (vec_stmt
, true);
1513 /* If we have an SSA vuse and insert a store, update virtual
1514 SSA form to avoid triggering the renamer. Do so only
1515 if we can easily see all uses - which is what almost always
1516 happens with the way vectorized stmts are inserted. */
1517 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1518 && ((is_gimple_assign (vec_stmt
)
1519 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1520 || (is_gimple_call (vec_stmt
)
1521 && (!(gimple_call_flags (vec_stmt
)
1522 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
))
1523 || (gimple_call_lhs (vec_stmt
)
1524 && !is_gimple_reg (gimple_call_lhs (vec_stmt
)))))))
1526 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1527 gimple_set_vdef (vec_stmt
, new_vdef
);
1528 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1532 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1533 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1536 /* We want to vectorize a call to combined function CFN with function
1537 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1538 as the types of all inputs. Check whether this is possible using
1539 an internal function, returning its code if so or IFN_LAST if not. */
1542 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1543 tree vectype_out
, tree vectype_in
)
1546 if (internal_fn_p (cfn
))
1547 ifn
= as_internal_fn (cfn
);
1549 ifn
= associated_internal_fn (fndecl
);
1550 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1552 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1553 if (info
.vectorizable
)
1555 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1556 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1557 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1558 OPTIMIZE_FOR_SPEED
))
1566 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1567 gimple_stmt_iterator
*);
1569 /* Check whether a load or store statement in the loop described by
1570 LOOP_VINFO is possible in a loop using partial vectors. This is
1571 testing whether the vectorizer pass has the appropriate support,
1572 as well as whether the target does.
1574 VLS_TYPE says whether the statement is a load or store and VECTYPE
1575 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1576 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1577 says how the load or store is going to be implemented and GROUP_SIZE
1578 is the number of load or store statements in the containing group.
1579 If the access is a gather load or scatter store, GS_INFO describes
1580 its arguments. If the load or store is conditional, SCALAR_MASK is the
1581 condition under which it occurs.
1583 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1584 vectors is not supported, otherwise record the required rgroup control
1588 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo
, tree vectype
,
1590 vec_load_store_type vls_type
,
1592 vect_memory_access_type
1594 gather_scatter_info
*gs_info
,
1597 /* Invariant loads need no special support. */
1598 if (memory_access_type
== VMAT_INVARIANT
)
1601 unsigned int nvectors
;
1603 nvectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1605 nvectors
= vect_get_num_copies (loop_vinfo
, vectype
);
1607 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1608 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
1609 machine_mode vecmode
= TYPE_MODE (vectype
);
1610 bool is_load
= (vls_type
== VLS_LOAD
);
1611 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1614 = (is_load
? vect_load_lanes_supported (vectype
, group_size
, true)
1615 : vect_store_lanes_supported (vectype
, group_size
, true));
1616 if (ifn
== IFN_MASK_LEN_LOAD_LANES
|| ifn
== IFN_MASK_LEN_STORE_LANES
)
1617 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, 1);
1618 else if (ifn
== IFN_MASK_LOAD_LANES
|| ifn
== IFN_MASK_STORE_LANES
)
1619 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1623 if (dump_enabled_p ())
1624 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1625 "can't operate on partial vectors because"
1626 " the target doesn't have an appropriate"
1627 " load/store-lanes instruction.\n");
1628 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1633 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1635 internal_fn ifn
= (is_load
1636 ? IFN_MASK_GATHER_LOAD
1637 : IFN_MASK_SCATTER_STORE
);
1638 internal_fn len_ifn
= (is_load
1639 ? IFN_MASK_LEN_GATHER_LOAD
1640 : IFN_MASK_LEN_SCATTER_STORE
);
1641 if (internal_gather_scatter_fn_supported_p (len_ifn
, vectype
,
1642 gs_info
->memory_type
,
1643 gs_info
->offset_vectype
,
1645 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, 1);
1646 else if (internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1647 gs_info
->memory_type
,
1648 gs_info
->offset_vectype
,
1650 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1654 if (dump_enabled_p ())
1655 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1656 "can't operate on partial vectors because"
1657 " the target doesn't have an appropriate"
1658 " gather load or scatter store instruction.\n");
1659 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1664 if (memory_access_type
!= VMAT_CONTIGUOUS
1665 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1667 /* Element X of the data must come from iteration i * VF + X of the
1668 scalar loop. We need more work to support other mappings. */
1669 if (dump_enabled_p ())
1670 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1671 "can't operate on partial vectors because an"
1672 " access isn't contiguous.\n");
1673 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1677 if (!VECTOR_MODE_P (vecmode
))
1679 if (dump_enabled_p ())
1680 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1681 "can't operate on partial vectors when emulating"
1682 " vector operations.\n");
1683 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1687 /* We might load more scalars than we need for permuting SLP loads.
1688 We checked in get_group_load_store_type that the extra elements
1689 don't leak into a new vector. */
1690 auto group_memory_nvectors
= [](poly_uint64 size
, poly_uint64 nunits
)
1692 unsigned int nvectors
;
1693 if (can_div_away_from_zero_p (size
, nunits
, &nvectors
))
1698 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1699 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1700 machine_mode mask_mode
;
1702 bool using_partial_vectors_p
= false;
1703 if (get_len_load_store_mode (vecmode
, is_load
).exists (&vmode
))
1705 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1706 unsigned factor
= (vecmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vecmode
);
1707 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, factor
);
1708 using_partial_vectors_p
= true;
1710 else if (targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1711 && can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1713 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1714 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1715 using_partial_vectors_p
= true;
1718 if (!using_partial_vectors_p
)
1720 if (dump_enabled_p ())
1721 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1722 "can't operate on partial vectors because the"
1723 " target doesn't have the appropriate partial"
1724 " vectorization load or store.\n");
1725 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1729 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1730 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1731 that needs to be applied to all loads and stores in a vectorized loop.
1732 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1733 otherwise return VEC_MASK & LOOP_MASK.
1735 MASK_TYPE is the type of both masks. If new statements are needed,
1736 insert them before GSI. */
1739 prepare_vec_mask (loop_vec_info loop_vinfo
, tree mask_type
, tree loop_mask
,
1740 tree vec_mask
, gimple_stmt_iterator
*gsi
)
1742 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1746 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1748 if (loop_vinfo
->vec_cond_masked_set
.contains ({ vec_mask
, loop_mask
}))
1751 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1752 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1753 vec_mask
, loop_mask
);
1755 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1759 /* Determine whether we can use a gather load or scatter store to vectorize
1760 strided load or store STMT_INFO by truncating the current offset to a
1761 smaller width. We need to be able to construct an offset vector:
1763 { 0, X, X*2, X*3, ... }
1765 without loss of precision, where X is STMT_INFO's DR_STEP.
1767 Return true if this is possible, describing the gather load or scatter
1768 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1771 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1772 loop_vec_info loop_vinfo
, bool masked_p
,
1773 gather_scatter_info
*gs_info
)
1775 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1776 data_reference
*dr
= dr_info
->dr
;
1777 tree step
= DR_STEP (dr
);
1778 if (TREE_CODE (step
) != INTEGER_CST
)
1780 /* ??? Perhaps we could use range information here? */
1781 if (dump_enabled_p ())
1782 dump_printf_loc (MSG_NOTE
, vect_location
,
1783 "cannot truncate variable step.\n");
1787 /* Get the number of bits in an element. */
1788 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1789 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1790 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1792 /* Set COUNT to the upper limit on the number of elements - 1.
1793 Start with the maximum vectorization factor. */
1794 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1796 /* Try lowering COUNT to the number of scalar latch iterations. */
1797 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1798 widest_int max_iters
;
1799 if (max_loop_iterations (loop
, &max_iters
)
1800 && max_iters
< count
)
1801 count
= max_iters
.to_shwi ();
1803 /* Try scales of 1 and the element size. */
1804 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1805 wi::overflow_type overflow
= wi::OVF_NONE
;
1806 for (int i
= 0; i
< 2; ++i
)
1808 int scale
= scales
[i
];
1810 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1813 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1814 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1817 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1818 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1820 /* Find the narrowest viable offset type. */
1821 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1822 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1825 /* See whether the target supports the operation with an offset
1826 no narrower than OFFSET_TYPE. */
1827 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1828 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1829 vectype
, memory_type
, offset_type
, scale
,
1830 &gs_info
->ifn
, &gs_info
->offset_vectype
)
1831 || gs_info
->ifn
== IFN_LAST
)
1834 gs_info
->decl
= NULL_TREE
;
1835 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1836 but we don't need to store that here. */
1837 gs_info
->base
= NULL_TREE
;
1838 gs_info
->element_type
= TREE_TYPE (vectype
);
1839 gs_info
->offset
= fold_convert (offset_type
, step
);
1840 gs_info
->offset_dt
= vect_constant_def
;
1841 gs_info
->scale
= scale
;
1842 gs_info
->memory_type
= memory_type
;
1846 if (overflow
&& dump_enabled_p ())
1847 dump_printf_loc (MSG_NOTE
, vect_location
,
1848 "truncating gather/scatter offset to %d bits"
1849 " might change its value.\n", element_bits
);
1854 /* Return true if we can use gather/scatter internal functions to
1855 vectorize STMT_INFO, which is a grouped or strided load or store.
1856 MASKED_P is true if load or store is conditional. When returning
1857 true, fill in GS_INFO with the information required to perform the
1861 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1862 loop_vec_info loop_vinfo
, bool masked_p
,
1863 gather_scatter_info
*gs_info
)
1865 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1866 || gs_info
->ifn
== IFN_LAST
)
1867 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1870 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1871 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1873 gcc_assert (TYPE_PRECISION (new_offset_type
)
1874 >= TYPE_PRECISION (old_offset_type
));
1875 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_NOTE
, vect_location
,
1879 "using gather/scatter for strided/grouped access,"
1880 " scale = %d\n", gs_info
->scale
);
1885 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1886 elements with a known constant step. Return -1 if that step
1887 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1890 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
1892 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1893 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
1897 /* If the target supports a permute mask that reverses the elements in
1898 a vector of type VECTYPE, return that mask, otherwise return null. */
1901 perm_mask_for_reverse (tree vectype
)
1903 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1905 /* The encoding has a single stepped pattern. */
1906 vec_perm_builder
sel (nunits
, 1, 3);
1907 for (int i
= 0; i
< 3; ++i
)
1908 sel
.quick_push (nunits
- 1 - i
);
1910 vec_perm_indices
indices (sel
, 1, nunits
);
1911 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), TYPE_MODE (vectype
),
1914 return vect_gen_perm_mask_checked (vectype
, indices
);
1917 /* A subroutine of get_load_store_type, with a subset of the same
1918 arguments. Handle the case where STMT_INFO is a load or store that
1919 accesses consecutive elements with a negative step. Sets *POFFSET
1920 to the offset to be applied to the DR for the first access. */
1922 static vect_memory_access_type
1923 get_negative_load_store_type (vec_info
*vinfo
,
1924 stmt_vec_info stmt_info
, tree vectype
,
1925 vec_load_store_type vls_type
,
1926 unsigned int ncopies
, poly_int64
*poffset
)
1928 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1929 dr_alignment_support alignment_support_scheme
;
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1935 "multiple types with negative step.\n");
1936 return VMAT_ELEMENTWISE
;
1939 /* For backward running DRs the first access in vectype actually is
1940 N-1 elements before the address of the DR. */
1941 *poffset
= ((-TYPE_VECTOR_SUBPARTS (vectype
) + 1)
1942 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype
))));
1944 int misalignment
= dr_misalignment (dr_info
, vectype
, *poffset
);
1945 alignment_support_scheme
1946 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
, misalignment
);
1947 if (alignment_support_scheme
!= dr_aligned
1948 && alignment_support_scheme
!= dr_unaligned_supported
)
1950 if (dump_enabled_p ())
1951 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1952 "negative step but alignment required.\n");
1954 return VMAT_ELEMENTWISE
;
1957 if (vls_type
== VLS_STORE_INVARIANT
)
1959 if (dump_enabled_p ())
1960 dump_printf_loc (MSG_NOTE
, vect_location
,
1961 "negative step with invariant source;"
1962 " no permute needed.\n");
1963 return VMAT_CONTIGUOUS_DOWN
;
1966 if (!perm_mask_for_reverse (vectype
))
1968 if (dump_enabled_p ())
1969 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1970 "negative step and reversing not supported.\n");
1972 return VMAT_ELEMENTWISE
;
1975 return VMAT_CONTIGUOUS_REVERSE
;
1978 /* STMT_INFO is either a masked or unconditional store. Return the value
1982 vect_get_store_rhs (stmt_vec_info stmt_info
)
1984 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1986 gcc_assert (gimple_assign_single_p (assign
));
1987 return gimple_assign_rhs1 (assign
);
1989 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1991 internal_fn ifn
= gimple_call_internal_fn (call
);
1992 int index
= internal_fn_stored_value_index (ifn
);
1993 gcc_assert (index
>= 0);
1994 return gimple_call_arg (call
, index
);
1999 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2001 This function returns a vector type which can be composed with NETLS pieces,
2002 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2003 same vector size as the return vector. It checks target whether supports
2004 pieces-size vector mode for construction firstly, if target fails to, check
2005 pieces-size scalar mode for construction further. It returns NULL_TREE if
2006 fails to find the available composition.
2008 For example, for (vtype=V16QI, nelts=4), we can probably get:
2009 - V16QI with PTYPE V4QI.
2010 - V4SI with PTYPE SI.
2014 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
2016 gcc_assert (VECTOR_TYPE_P (vtype
));
2017 gcc_assert (known_gt (nelts
, 0U));
2019 machine_mode vmode
= TYPE_MODE (vtype
);
2020 if (!VECTOR_MODE_P (vmode
))
2023 /* When we are asked to compose the vector from its components let
2024 that happen directly. */
2025 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype
), nelts
))
2027 *ptype
= TREE_TYPE (vtype
);
2031 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
2032 unsigned int pbsize
;
2033 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
2035 /* First check if vec_init optab supports construction from
2036 vector pieces directly. */
2037 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
2038 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
2040 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
2041 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
2042 != CODE_FOR_nothing
))
2044 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
2048 /* Otherwise check if exists an integer type of the same piece size and
2049 if vec_init optab supports construction from it directly. */
2050 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
2051 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
2052 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
2053 != CODE_FOR_nothing
))
2055 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
2056 return build_vector_type (*ptype
, nelts
);
2063 /* A subroutine of get_load_store_type, with a subset of the same
2064 arguments. Handle the case where STMT_INFO is part of a grouped load
2067 For stores, the statements in the group are all consecutive
2068 and there is no gap at the end. For loads, the statements in the
2069 group might not be consecutive; there can be gaps between statements
2070 as well as at the end. */
2073 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2074 tree vectype
, slp_tree slp_node
,
2075 bool masked_p
, vec_load_store_type vls_type
,
2076 vect_memory_access_type
*memory_access_type
,
2077 poly_int64
*poffset
,
2078 dr_alignment_support
*alignment_support_scheme
,
2080 gather_scatter_info
*gs_info
,
2081 internal_fn
*lanes_ifn
)
2083 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2084 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2085 stmt_vec_info first_stmt_info
;
2086 unsigned int group_size
;
2087 unsigned HOST_WIDE_INT gap
;
2088 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2090 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2091 group_size
= DR_GROUP_SIZE (first_stmt_info
);
2092 gap
= DR_GROUP_GAP (first_stmt_info
);
2096 first_stmt_info
= stmt_info
;
2100 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2101 bool single_element_p
= (stmt_info
== first_stmt_info
2102 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2103 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2105 /* True if the vectorized statements would access beyond the last
2106 statement in the group. */
2107 bool overrun_p
= false;
2109 /* True if we can cope with such overrun by peeling for gaps, so that
2110 there is at least one final scalar iteration after the vector loop. */
2111 bool can_overrun_p
= (!masked_p
2112 && vls_type
== VLS_LOAD
2116 /* There can only be a gap at the end of the group if the stride is
2117 known at compile time. */
2118 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2120 /* Stores can't yet have gaps. */
2121 gcc_assert (slp_node
|| vls_type
== VLS_LOAD
|| gap
== 0);
2125 /* For SLP vectorization we directly vectorize a subchain
2126 without permutation. */
2127 if (! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
2129 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node
)[0]);
2130 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2132 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2133 separated by the stride, until we have a complete vector.
2134 Fall back to scalar accesses if that isn't possible. */
2135 if (multiple_p (nunits
, group_size
))
2136 *memory_access_type
= VMAT_STRIDED_SLP
;
2138 *memory_access_type
= VMAT_ELEMENTWISE
;
2142 overrun_p
= loop_vinfo
&& gap
!= 0;
2143 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2145 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2146 "Grouped store with gaps requires"
2147 " non-consecutive accesses\n");
2150 /* An overrun is fine if the trailing elements are smaller
2151 than the alignment boundary B. Every vector access will
2152 be a multiple of B and so we are guaranteed to access a
2153 non-gap element in the same B-sized block. */
2155 && gap
< (vect_known_alignment_in_bytes (first_dr_info
,
2157 / vect_get_scalar_dr_size (first_dr_info
)))
2160 /* If the gap splits the vector in half and the target
2161 can do half-vector operations avoid the epilogue peeling
2162 by simply loading half of the vector only. Usually
2163 the construction with an upper zero half will be elided. */
2164 dr_alignment_support alss
;
2165 int misalign
= dr_misalignment (first_dr_info
, vectype
);
2169 && (((alss
= vect_supportable_dr_alignment (vinfo
, first_dr_info
,
2170 vectype
, misalign
)))
2172 || alss
== dr_unaligned_supported
)
2173 && known_eq (nunits
, (group_size
- gap
) * 2)
2174 && known_eq (nunits
, group_size
)
2175 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2179 if (overrun_p
&& !can_overrun_p
)
2181 if (dump_enabled_p ())
2182 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2183 "Peeling for outer loop is not supported\n");
2186 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2189 if (single_element_p
)
2190 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2191 only correct for single element "interleaving" SLP. */
2192 *memory_access_type
= get_negative_load_store_type
2193 (vinfo
, stmt_info
, vectype
, vls_type
, 1, poffset
);
2196 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2197 separated by the stride, until we have a complete vector.
2198 Fall back to scalar accesses if that isn't possible. */
2199 if (multiple_p (nunits
, group_size
))
2200 *memory_access_type
= VMAT_STRIDED_SLP
;
2202 *memory_access_type
= VMAT_ELEMENTWISE
;
2205 else if (cmp
== 0 && loop_vinfo
)
2207 gcc_assert (vls_type
== VLS_LOAD
);
2208 *memory_access_type
= VMAT_INVARIANT
;
2209 /* Invariant accesses perform only component accesses, alignment
2210 is irrelevant for them. */
2211 *alignment_support_scheme
= dr_unaligned_supported
;
2214 *memory_access_type
= VMAT_CONTIGUOUS
;
2216 /* When we have a contiguous access across loop iterations
2217 but the access in the loop doesn't cover the full vector
2218 we can end up with no gap recorded but still excess
2219 elements accessed, see PR103116. Make sure we peel for
2220 gaps if necessary and sufficient and give up if not.
2222 If there is a combination of the access not covering the full
2223 vector and a gap recorded then we may need to peel twice. */
2225 && *memory_access_type
== VMAT_CONTIGUOUS
2226 && SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
2227 && !multiple_p (group_size
* LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
2230 unsigned HOST_WIDE_INT cnunits
, cvf
;
2232 || !nunits
.is_constant (&cnunits
)
2233 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&cvf
)
2234 /* Peeling for gaps assumes that a single scalar iteration
2235 is enough to make sure the last vector iteration doesn't
2236 access excess elements.
2237 ??? Enhancements include peeling multiple iterations
2238 or using masked loads with a static mask. */
2239 || (group_size
* cvf
) % cnunits
+ group_size
- gap
< cnunits
)
2241 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2243 "peeling for gaps insufficient for "
2253 /* We can always handle this case using elementwise accesses,
2254 but see if something more efficient is available. */
2255 *memory_access_type
= VMAT_ELEMENTWISE
;
2257 /* If there is a gap at the end of the group then these optimizations
2258 would access excess elements in the last iteration. */
2259 bool would_overrun_p
= (gap
!= 0);
2260 /* An overrun is fine if the trailing elements are smaller than the
2261 alignment boundary B. Every vector access will be a multiple of B
2262 and so we are guaranteed to access a non-gap element in the
2263 same B-sized block. */
2266 && gap
< (vect_known_alignment_in_bytes (first_dr_info
, vectype
)
2267 / vect_get_scalar_dr_size (first_dr_info
)))
2268 would_overrun_p
= false;
2270 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2271 && (can_overrun_p
|| !would_overrun_p
)
2272 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2274 /* First cope with the degenerate case of a single-element
2276 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2281 /* Otherwise try using LOAD/STORE_LANES. */
2283 = vls_type
== VLS_LOAD
2284 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2285 : vect_store_lanes_supported (vectype
, group_size
,
2287 if (*lanes_ifn
!= IFN_LAST
)
2289 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2290 overrun_p
= would_overrun_p
;
2293 /* If that fails, try using permuting loads. */
2294 else if (vls_type
== VLS_LOAD
2295 ? vect_grouped_load_supported (vectype
,
2298 : vect_grouped_store_supported (vectype
, group_size
))
2300 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2301 overrun_p
= would_overrun_p
;
2306 /* As a last resort, trying using a gather load or scatter store.
2308 ??? Although the code can handle all group sizes correctly,
2309 it probably isn't a win to use separate strided accesses based
2310 on nearby locations. Or, even if it's a win over scalar code,
2311 it might not be a win over vectorizing at a lower VF, if that
2312 allows us to use contiguous accesses. */
2313 if (*memory_access_type
== VMAT_ELEMENTWISE
2316 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2318 *memory_access_type
= VMAT_GATHER_SCATTER
;
2321 if (*memory_access_type
== VMAT_GATHER_SCATTER
2322 || *memory_access_type
== VMAT_ELEMENTWISE
)
2324 *alignment_support_scheme
= dr_unaligned_supported
;
2325 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2329 *misalignment
= dr_misalignment (first_dr_info
, vectype
, *poffset
);
2330 *alignment_support_scheme
2331 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, vectype
,
2335 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2337 /* STMT is the leader of the group. Check the operands of all the
2338 stmts of the group. */
2339 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2340 while (next_stmt_info
)
2342 tree op
= vect_get_store_rhs (next_stmt_info
);
2343 enum vect_def_type dt
;
2344 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2346 if (dump_enabled_p ())
2347 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2348 "use not simple.\n");
2351 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2357 gcc_assert (can_overrun_p
);
2358 if (dump_enabled_p ())
2359 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2360 "Data access with gaps requires scalar "
2362 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2368 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2369 if there is a memory access type that the vectorized form can use,
2370 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2371 or scatters, fill in GS_INFO accordingly. In addition
2372 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2373 the target does not support the alignment scheme. *MISALIGNMENT
2374 is set according to the alignment of the access (including
2375 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2377 SLP says whether we're performing SLP rather than loop vectorization.
2378 MASKED_P is true if the statement is conditional on a vectorized mask.
2379 VECTYPE is the vector type that the vectorized statements will use.
2380 NCOPIES is the number of vector statements that will be needed. */
2383 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2384 tree vectype
, slp_tree slp_node
,
2385 bool masked_p
, vec_load_store_type vls_type
,
2386 unsigned int ncopies
,
2387 vect_memory_access_type
*memory_access_type
,
2388 poly_int64
*poffset
,
2389 dr_alignment_support
*alignment_support_scheme
,
2391 gather_scatter_info
*gs_info
,
2392 internal_fn
*lanes_ifn
)
2394 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2395 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2396 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2398 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2400 *memory_access_type
= VMAT_GATHER_SCATTER
;
2401 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2403 /* When using internal functions, we rely on pattern recognition
2404 to convert the type of the offset to the type that the target
2405 requires, with the result being a call to an internal function.
2406 If that failed for some reason (e.g. because another pattern
2407 took priority), just handle cases in which the offset already
2408 has the right type. */
2409 else if (gs_info
->ifn
!= IFN_LAST
2410 && !is_gimple_call (stmt_info
->stmt
)
2411 && !tree_nop_conversion_p (TREE_TYPE (gs_info
->offset
),
2412 TREE_TYPE (gs_info
->offset_vectype
)))
2414 if (dump_enabled_p ())
2415 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2416 "%s offset requires a conversion\n",
2417 vls_type
== VLS_LOAD
? "gather" : "scatter");
2420 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2421 &gs_info
->offset_dt
,
2422 &gs_info
->offset_vectype
))
2424 if (dump_enabled_p ())
2425 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2426 "%s index use not simple.\n",
2427 vls_type
== VLS_LOAD
? "gather" : "scatter");
2430 else if (gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
2432 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant ()
2433 || !TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
).is_constant ()
2434 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2435 (gs_info
->offset_vectype
),
2436 TYPE_VECTOR_SUBPARTS (vectype
)))
2438 if (dump_enabled_p ())
2439 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2440 "unsupported vector types for emulated "
2445 /* Gather-scatter accesses perform only component accesses, alignment
2446 is irrelevant for them. */
2447 *alignment_support_scheme
= dr_unaligned_supported
;
2449 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) || slp_node
)
2451 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
,
2453 vls_type
, memory_access_type
, poffset
,
2454 alignment_support_scheme
,
2455 misalignment
, gs_info
, lanes_ifn
))
2458 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2460 gcc_assert (!slp_node
);
2462 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2464 *memory_access_type
= VMAT_GATHER_SCATTER
;
2466 *memory_access_type
= VMAT_ELEMENTWISE
;
2467 /* Alignment is irrelevant here. */
2468 *alignment_support_scheme
= dr_unaligned_supported
;
2472 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2475 gcc_assert (vls_type
== VLS_LOAD
);
2476 *memory_access_type
= VMAT_INVARIANT
;
2477 /* Invariant accesses perform only component accesses, alignment
2478 is irrelevant for them. */
2479 *alignment_support_scheme
= dr_unaligned_supported
;
2484 *memory_access_type
= get_negative_load_store_type
2485 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
, poffset
);
2487 *memory_access_type
= VMAT_CONTIGUOUS
;
2488 *misalignment
= dr_misalignment (STMT_VINFO_DR_INFO (stmt_info
),
2490 *alignment_support_scheme
2491 = vect_supportable_dr_alignment (vinfo
,
2492 STMT_VINFO_DR_INFO (stmt_info
),
2493 vectype
, *misalignment
);
2497 if ((*memory_access_type
== VMAT_ELEMENTWISE
2498 || *memory_access_type
== VMAT_STRIDED_SLP
)
2499 && !nunits
.is_constant ())
2501 if (dump_enabled_p ())
2502 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2503 "Not using elementwise accesses due to variable "
2504 "vectorization factor.\n");
2508 if (*alignment_support_scheme
== dr_unaligned_unsupported
)
2510 if (dump_enabled_p ())
2511 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2512 "unsupported unaligned access\n");
2516 /* FIXME: At the moment the cost model seems to underestimate the
2517 cost of using elementwise accesses. This check preserves the
2518 traditional behavior until that can be fixed. */
2519 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2520 if (!first_stmt_info
)
2521 first_stmt_info
= stmt_info
;
2522 if (*memory_access_type
== VMAT_ELEMENTWISE
2523 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2524 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2525 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2526 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2528 if (dump_enabled_p ())
2529 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2530 "not falling back to elementwise accesses\n");
2536 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2537 conditional operation STMT_INFO. When returning true, store the mask
2538 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2539 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2540 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2543 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2544 slp_tree slp_node
, unsigned mask_index
,
2545 tree
*mask
, slp_tree
*mask_node
,
2546 vect_def_type
*mask_dt_out
, tree
*mask_vectype_out
)
2548 enum vect_def_type mask_dt
;
2550 slp_tree mask_node_1
;
2551 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, mask_index
,
2552 mask
, &mask_node_1
, &mask_dt
, &mask_vectype
))
2554 if (dump_enabled_p ())
2555 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2556 "mask use not simple.\n");
2560 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask
)))
2562 if (dump_enabled_p ())
2563 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2564 "mask argument is not a boolean.\n");
2568 /* If the caller is not prepared for adjusting an external/constant
2569 SLP mask vector type fail. */
2572 && SLP_TREE_DEF_TYPE (mask_node_1
) != vect_internal_def
)
2574 if (dump_enabled_p ())
2575 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2576 "SLP mask argument is not vectorized.\n");
2580 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2582 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2584 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2586 if (dump_enabled_p ())
2587 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2588 "could not find an appropriate vector mask type.\n");
2592 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2593 TYPE_VECTOR_SUBPARTS (vectype
)))
2595 if (dump_enabled_p ())
2596 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2597 "vector mask type %T"
2598 " does not match vector data type %T.\n",
2599 mask_vectype
, vectype
);
2604 *mask_dt_out
= mask_dt
;
2605 *mask_vectype_out
= mask_vectype
;
2607 *mask_node
= mask_node_1
;
2611 /* Return true if stored value RHS is suitable for vectorizing store
2612 statement STMT_INFO. When returning true, store the type of the
2613 definition in *RHS_DT_OUT, the type of the vectorized store value in
2614 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2617 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2618 slp_tree slp_node
, tree rhs
,
2619 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2620 vec_load_store_type
*vls_type_out
)
2622 /* In the case this is a store from a constant make sure
2623 native_encode_expr can handle it. */
2624 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2626 if (dump_enabled_p ())
2627 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2628 "cannot encode constant as a byte sequence.\n");
2633 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2635 if (gimple_call_internal_p (call
)
2636 && internal_store_fn_p (gimple_call_internal_fn (call
)))
2637 op_no
= internal_fn_stored_value_index (gimple_call_internal_fn (call
));
2639 op_no
= vect_slp_child_index_for_operand (call
, op_no
);
2642 enum vect_def_type rhs_dt
;
2645 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, op_no
,
2646 &rhs
, &slp_op
, &rhs_dt
, &rhs_vectype
))
2648 if (dump_enabled_p ())
2649 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2650 "use not simple.\n");
2654 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2655 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2657 if (dump_enabled_p ())
2658 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2659 "incompatible vector types.\n");
2663 *rhs_dt_out
= rhs_dt
;
2664 *rhs_vectype_out
= rhs_vectype
;
2665 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2666 *vls_type_out
= VLS_STORE_INVARIANT
;
2668 *vls_type_out
= VLS_STORE
;
2672 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2673 Note that we support masks with floating-point type, in which case the
2674 floats are interpreted as a bitmask. */
2677 vect_build_all_ones_mask (vec_info
*vinfo
,
2678 stmt_vec_info stmt_info
, tree masktype
)
2680 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2681 return build_int_cst (masktype
, -1);
2682 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2684 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2685 mask
= build_vector_from_val (masktype
, mask
);
2686 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2688 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2692 for (int j
= 0; j
< 6; ++j
)
2694 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2695 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2696 mask
= build_vector_from_val (masktype
, mask
);
2697 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2702 /* Build an all-zero merge value of type VECTYPE while vectorizing
2703 STMT_INFO as a gather load. */
2706 vect_build_zero_merge_argument (vec_info
*vinfo
,
2707 stmt_vec_info stmt_info
, tree vectype
)
2710 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2711 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2712 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2716 for (int j
= 0; j
< 6; ++j
)
2718 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2719 merge
= build_real (TREE_TYPE (vectype
), r
);
2723 merge
= build_vector_from_val (vectype
, merge
);
2724 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2727 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2728 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2729 the gather load operation. If the load is conditional, MASK is the
2730 unvectorized condition and MASK_DT is its definition type, otherwise
2734 vect_build_gather_load_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2735 gimple_stmt_iterator
*gsi
,
2737 gather_scatter_info
*gs_info
,
2739 stmt_vector_for_cost
*cost_vec
)
2741 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2742 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2743 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2744 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2745 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2746 edge pe
= loop_preheader_edge (loop
);
2747 enum { NARROW
, NONE
, WIDEN
} modifier
;
2748 poly_uint64 gather_off_nunits
2749 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2751 /* FIXME: Keep the previous costing way in vect_model_load_cost by costing
2752 N scalar loads, but it should be tweaked to use target specific costs
2753 on related gather load calls. */
2756 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
2757 unsigned int inside_cost
;
2758 inside_cost
= record_stmt_cost (cost_vec
, ncopies
* assumed_nunits
,
2759 scalar_load
, stmt_info
, 0, vect_body
);
2760 if (dump_enabled_p ())
2761 dump_printf_loc (MSG_NOTE
, vect_location
,
2762 "vect_model_load_cost: inside_cost = %d, "
2763 "prologue_cost = 0 .\n",
2768 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2769 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2770 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2771 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2772 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2773 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2774 tree scaletype
= TREE_VALUE (arglist
);
2775 tree real_masktype
= masktype
;
2776 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2778 || TREE_CODE (masktype
) == INTEGER_TYPE
2779 || types_compatible_p (srctype
, masktype
)));
2781 masktype
= truth_type_for (srctype
);
2783 tree mask_halftype
= masktype
;
2784 tree perm_mask
= NULL_TREE
;
2785 tree mask_perm_mask
= NULL_TREE
;
2786 if (known_eq (nunits
, gather_off_nunits
))
2788 else if (known_eq (nunits
* 2, gather_off_nunits
))
2792 /* Currently widening gathers and scatters are only supported for
2793 fixed-length vectors. */
2794 int count
= gather_off_nunits
.to_constant ();
2795 vec_perm_builder
sel (count
, count
, 1);
2796 for (int i
= 0; i
< count
; ++i
)
2797 sel
.quick_push (i
| (count
/ 2));
2799 vec_perm_indices
indices (sel
, 1, count
);
2800 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2803 else if (known_eq (nunits
, gather_off_nunits
* 2))
2807 /* Currently narrowing gathers and scatters are only supported for
2808 fixed-length vectors. */
2809 int count
= nunits
.to_constant ();
2810 vec_perm_builder
sel (count
, count
, 1);
2811 sel
.quick_grow (count
);
2812 for (int i
= 0; i
< count
; ++i
)
2813 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2814 vec_perm_indices
indices (sel
, 2, count
);
2815 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2819 if (mask
&& VECTOR_TYPE_P (real_masktype
))
2821 for (int i
= 0; i
< count
; ++i
)
2822 sel
[i
] = i
| (count
/ 2);
2823 indices
.new_vector (sel
, 2, count
);
2824 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2827 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2832 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2833 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2835 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2836 if (!is_gimple_min_invariant (ptr
))
2839 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2840 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2841 gcc_assert (!new_bb
);
2844 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2846 tree vec_oprnd0
= NULL_TREE
;
2847 tree vec_mask
= NULL_TREE
;
2848 tree src_op
= NULL_TREE
;
2849 tree mask_op
= NULL_TREE
;
2850 tree prev_res
= NULL_TREE
;
2854 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2855 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2858 auto_vec
<tree
> vec_oprnds0
;
2859 auto_vec
<tree
> vec_masks
;
2860 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2861 modifier
== WIDEN
? ncopies
/ 2 : ncopies
,
2862 gs_info
->offset
, &vec_oprnds0
);
2864 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2865 modifier
== NARROW
? ncopies
/ 2 : ncopies
,
2866 mask
, &vec_masks
, masktype
);
2867 for (int j
= 0; j
< ncopies
; ++j
)
2870 if (modifier
== WIDEN
&& (j
& 1))
2871 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
2872 perm_mask
, stmt_info
, gsi
);
2874 op
= vec_oprnd0
= vec_oprnds0
[modifier
== WIDEN
? j
/ 2 : j
];
2876 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2878 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2879 TYPE_VECTOR_SUBPARTS (idxtype
)));
2880 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2881 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2882 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2883 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2889 if (mask_perm_mask
&& (j
& 1))
2890 mask_op
= permute_vec_elements (vinfo
, mask_op
, mask_op
,
2891 mask_perm_mask
, stmt_info
, gsi
);
2894 if (modifier
== NARROW
)
2897 vec_mask
= vec_masks
[j
/ 2];
2900 vec_mask
= vec_masks
[j
];
2903 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2905 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2906 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2907 gcc_assert (known_eq (sub1
, sub2
));
2908 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2909 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2911 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2912 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2916 if (modifier
== NARROW
&& !VECTOR_TYPE_P (real_masktype
))
2918 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2920 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2921 : VEC_UNPACK_LO_EXPR
,
2923 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2929 tree mask_arg
= mask_op
;
2930 if (masktype
!= real_masktype
)
2932 tree utype
, optype
= TREE_TYPE (mask_op
);
2933 if (VECTOR_TYPE_P (real_masktype
)
2934 || TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2935 utype
= real_masktype
;
2937 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2938 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2939 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2941 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2942 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2944 if (!useless_type_conversion_p (real_masktype
, utype
))
2946 gcc_assert (TYPE_PRECISION (utype
)
2947 <= TYPE_PRECISION (real_masktype
));
2948 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2949 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2950 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2953 src_op
= build_zero_cst (srctype
);
2955 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2958 if (!useless_type_conversion_p (vectype
, rettype
))
2960 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2961 TYPE_VECTOR_SUBPARTS (rettype
)));
2962 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2963 gimple_call_set_lhs (new_stmt
, op
);
2964 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2965 var
= make_ssa_name (vec_dest
);
2966 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2967 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2968 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2972 var
= make_ssa_name (vec_dest
, new_stmt
);
2973 gimple_call_set_lhs (new_stmt
, var
);
2974 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2977 if (modifier
== NARROW
)
2984 var
= permute_vec_elements (vinfo
, prev_res
, var
, perm_mask
,
2986 new_stmt
= SSA_NAME_DEF_STMT (var
);
2989 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
2991 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
2994 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2995 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2996 the scatter store operation. If the store is conditional, MASK is the
2997 unvectorized condition, otherwise MASK is null. */
3000 vect_build_scatter_store_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3001 gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
3002 gather_scatter_info
*gs_info
, tree mask
)
3004 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3005 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3006 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3007 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3008 enum { NARROW
, NONE
, WIDEN
} modifier
;
3009 poly_uint64 scatter_off_nunits
3010 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
3012 tree perm_mask
= NULL_TREE
, mask_halfvectype
= NULL_TREE
;
3013 if (known_eq (nunits
, scatter_off_nunits
))
3015 else if (known_eq (nunits
* 2, scatter_off_nunits
))
3019 /* Currently gathers and scatters are only supported for
3020 fixed-length vectors. */
3021 unsigned int count
= scatter_off_nunits
.to_constant ();
3022 vec_perm_builder
sel (count
, count
, 1);
3023 for (unsigned i
= 0; i
< (unsigned int) count
; ++i
)
3024 sel
.quick_push (i
| (count
/ 2));
3026 vec_perm_indices
indices (sel
, 1, count
);
3027 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
, indices
);
3028 gcc_assert (perm_mask
!= NULL_TREE
);
3030 else if (known_eq (nunits
, scatter_off_nunits
* 2))
3034 /* Currently gathers and scatters are only supported for
3035 fixed-length vectors. */
3036 unsigned int count
= nunits
.to_constant ();
3037 vec_perm_builder
sel (count
, count
, 1);
3038 for (unsigned i
= 0; i
< (unsigned int) count
; ++i
)
3039 sel
.quick_push (i
| (count
/ 2));
3041 vec_perm_indices
indices (sel
, 2, count
);
3042 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
3043 gcc_assert (perm_mask
!= NULL_TREE
);
3047 mask_halfvectype
= truth_type_for (gs_info
->offset_vectype
);
3052 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
3053 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
3054 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
3055 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
3056 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
3057 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
3058 tree scaletype
= TREE_VALUE (arglist
);
3060 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
3061 && TREE_CODE (rettype
) == VOID_TYPE
);
3063 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
3064 if (!is_gimple_min_invariant (ptr
))
3067 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
3068 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3069 edge pe
= loop_preheader_edge (loop
);
3070 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
3071 gcc_assert (!new_bb
);
3074 tree mask_arg
= NULL_TREE
;
3075 if (mask
== NULL_TREE
)
3077 mask_arg
= build_int_cst (masktype
, -1);
3078 mask_arg
= vect_init_vector (vinfo
, stmt_info
, mask_arg
, masktype
, NULL
);
3081 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
3083 auto_vec
<tree
> vec_oprnds0
;
3084 auto_vec
<tree
> vec_oprnds1
;
3085 auto_vec
<tree
> vec_masks
;
3088 tree mask_vectype
= truth_type_for (vectype
);
3089 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
3090 modifier
== NARROW
? ncopies
/ 2 : ncopies
,
3091 mask
, &vec_masks
, mask_vectype
);
3093 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
3094 modifier
== WIDEN
? ncopies
/ 2 : ncopies
,
3095 gs_info
->offset
, &vec_oprnds0
);
3096 tree op
= vect_get_store_rhs (stmt_info
);
3097 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
3098 modifier
== NARROW
? ncopies
/ 2 : ncopies
, op
,
3101 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3102 tree mask_op
= NULL_TREE
;
3104 for (int j
= 0; j
< ncopies
; ++j
)
3106 if (modifier
== WIDEN
)
3109 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
, perm_mask
,
3112 op
= vec_oprnd0
= vec_oprnds0
[j
/ 2];
3113 src
= vec_oprnd1
= vec_oprnds1
[j
];
3115 mask_op
= vec_mask
= vec_masks
[j
];
3117 else if (modifier
== NARROW
)
3120 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
3121 perm_mask
, stmt_info
, gsi
);
3123 src
= vec_oprnd1
= vec_oprnds1
[j
/ 2];
3124 op
= vec_oprnd0
= vec_oprnds0
[j
];
3126 mask_op
= vec_mask
= vec_masks
[j
/ 2];
3130 op
= vec_oprnd0
= vec_oprnds0
[j
];
3131 src
= vec_oprnd1
= vec_oprnds1
[j
];
3133 mask_op
= vec_mask
= vec_masks
[j
];
3136 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
3138 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
3139 TYPE_VECTOR_SUBPARTS (srctype
)));
3140 tree var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
3141 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
3142 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
3143 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3147 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
3149 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
3150 TYPE_VECTOR_SUBPARTS (idxtype
)));
3151 tree var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
3152 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
3153 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
3154 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3162 if (modifier
== NARROW
)
3165 = vect_get_new_ssa_name (mask_halfvectype
, vect_simple_var
);
3167 = gimple_build_assign (var
,
3168 (j
& 1) ? VEC_UNPACK_HI_EXPR
3169 : VEC_UNPACK_LO_EXPR
,
3171 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3174 tree optype
= TREE_TYPE (mask_arg
);
3175 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
3178 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
3179 tree var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
3180 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
3182 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
3183 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3185 if (!useless_type_conversion_p (masktype
, utype
))
3187 gcc_assert (TYPE_PRECISION (utype
) <= TYPE_PRECISION (masktype
));
3188 tree var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
3189 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
3190 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3196 = gimple_build_call (gs_info
->decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
3197 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3199 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3201 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3204 /* Prepare the base and offset in GS_INFO for vectorization.
3205 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3206 to the vectorized offset argument for the first copy of STMT_INFO.
3207 STMT_INFO is the statement described by GS_INFO and LOOP is the
3211 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo
,
3212 class loop
*loop
, stmt_vec_info stmt_info
,
3213 slp_tree slp_node
, gather_scatter_info
*gs_info
,
3214 tree
*dataref_ptr
, vec
<tree
> *vec_offset
)
3216 gimple_seq stmts
= NULL
;
3217 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
3221 edge pe
= loop_preheader_edge (loop
);
3222 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3223 gcc_assert (!new_bb
);
3226 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_offset
);
3230 = vect_get_num_copies (loop_vinfo
, gs_info
->offset_vectype
);
3231 vect_get_vec_defs_for_operand (loop_vinfo
, stmt_info
, ncopies
,
3232 gs_info
->offset
, vec_offset
,
3233 gs_info
->offset_vectype
);
3237 /* Prepare to implement a grouped or strided load or store using
3238 the gather load or scatter store operation described by GS_INFO.
3239 STMT_INFO is the load or store statement.
3241 Set *DATAREF_BUMP to the amount that should be added to the base
3242 address after each copy of the vectorized statement. Set *VEC_OFFSET
3243 to an invariant offset vector in which element I has the value
3244 I * DR_STEP / SCALE. */
3247 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
3248 loop_vec_info loop_vinfo
,
3249 gimple_stmt_iterator
*gsi
,
3250 gather_scatter_info
*gs_info
,
3251 tree
*dataref_bump
, tree
*vec_offset
,
3252 vec_loop_lens
*loop_lens
)
3254 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3255 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3257 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
))
3259 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
3260 ivtmp_8 = _31 * 16 (step in bytes);
3261 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
3262 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
3264 = vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
, 1, vectype
, 0, 0);
3266 = fold_build2 (MULT_EXPR
, sizetype
,
3267 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
3269 *dataref_bump
= force_gimple_operand_gsi (gsi
, tmp
, true, NULL_TREE
, true,
3275 = size_binop (MULT_EXPR
,
3276 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
3277 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
3278 *dataref_bump
= cse_and_gimplify_to_preheader (loop_vinfo
, bump
);
3281 /* The offset given in GS_INFO can have pointer type, so use the element
3282 type of the vector instead. */
3283 tree offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
3285 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3286 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
3287 ssize_int (gs_info
->scale
));
3288 step
= fold_convert (offset_type
, step
);
3290 /* Create {0, X, X*2, X*3, ...}. */
3291 tree offset
= fold_build2 (VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
3292 build_zero_cst (offset_type
), step
);
3293 *vec_offset
= cse_and_gimplify_to_preheader (loop_vinfo
, offset
);
3296 /* Prepare the pointer IVs which needs to be updated by a variable amount.
3297 Such variable amount is the outcome of .SELECT_VL. In this case, we can
3298 allow each iteration process the flexible number of elements as long as
3299 the number <= vf elments.
3301 Return data reference according to SELECT_VL.
3302 If new statements are needed, insert them before GSI. */
3305 vect_get_loop_variant_data_ptr_increment (
3306 vec_info
*vinfo
, tree aggr_type
, gimple_stmt_iterator
*gsi
,
3307 vec_loop_lens
*loop_lens
, dr_vec_info
*dr_info
,
3308 vect_memory_access_type memory_access_type
)
3310 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3311 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3313 /* gather/scatter never reach here. */
3314 gcc_assert (memory_access_type
!= VMAT_GATHER_SCATTER
);
3316 /* When we support SELECT_VL pattern, we dynamic adjust
3317 the memory address by .SELECT_VL result.
3319 The result of .SELECT_VL is the number of elements to
3320 be processed of each iteration. So the memory address
3321 adjustment operation should be:
3323 addr = addr + .SELECT_VL (ARG..) * step;
3326 = vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
, 1, aggr_type
, 0, 0);
3327 tree len_type
= TREE_TYPE (loop_len
);
3328 /* Since the outcome of .SELECT_VL is element size, we should adjust
3329 it into bytesize so that it can be used in address pointer variable
3330 amount IVs adjustment. */
3331 tree tmp
= fold_build2 (MULT_EXPR
, len_type
, loop_len
,
3332 wide_int_to_tree (len_type
, wi::to_widest (step
)));
3333 tree bump
= make_temp_ssa_name (len_type
, NULL
, "ivtmp");
3334 gassign
*assign
= gimple_build_assign (bump
, tmp
);
3335 gsi_insert_before (gsi
, assign
, GSI_SAME_STMT
);
3339 /* Return the amount that should be added to a vector pointer to move
3340 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3341 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3345 vect_get_data_ptr_increment (vec_info
*vinfo
, gimple_stmt_iterator
*gsi
,
3346 dr_vec_info
*dr_info
, tree aggr_type
,
3347 vect_memory_access_type memory_access_type
,
3348 vec_loop_lens
*loop_lens
= nullptr)
3350 if (memory_access_type
== VMAT_INVARIANT
)
3351 return size_zero_node
;
3353 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3354 if (loop_vinfo
&& LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
))
3355 return vect_get_loop_variant_data_ptr_increment (vinfo
, aggr_type
, gsi
,
3357 memory_access_type
);
3359 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3360 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3361 if (tree_int_cst_sgn (step
) == -1)
3362 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3366 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3369 vectorizable_bswap (vec_info
*vinfo
,
3370 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3371 gimple
**vec_stmt
, slp_tree slp_node
,
3373 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3376 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3377 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3380 op
= gimple_call_arg (stmt
, 0);
3381 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3382 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3384 /* Multiple types in SLP are handled by creating the appropriate number of
3385 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3390 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3392 gcc_assert (ncopies
>= 1);
3394 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3398 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3399 unsigned word_bytes
;
3400 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3403 /* The encoding uses one stepped pattern for each byte in the word. */
3404 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3405 for (unsigned i
= 0; i
< 3; ++i
)
3406 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3407 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3409 vec_perm_indices
indices (elts
, 1, num_bytes
);
3410 machine_mode vmode
= TYPE_MODE (char_vectype
);
3411 if (!can_vec_perm_const_p (vmode
, vmode
, indices
))
3417 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3419 if (dump_enabled_p ())
3420 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3421 "incompatible vector types for invariants\n");
3425 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3426 DUMP_VECT_SCOPE ("vectorizable_bswap");
3427 record_stmt_cost (cost_vec
,
3428 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3429 record_stmt_cost (cost_vec
,
3431 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
,
3432 vec_perm
, stmt_info
, 0, vect_body
);
3436 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3439 vec
<tree
> vec_oprnds
= vNULL
;
3440 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
3442 /* Arguments are ready. create the new vector stmt. */
3445 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3448 tree tem
= make_ssa_name (char_vectype
);
3449 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3450 char_vectype
, vop
));
3451 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3452 tree tem2
= make_ssa_name (char_vectype
);
3453 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3454 tem
, tem
, bswap_vconst
);
3455 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3456 tem
= make_ssa_name (vectype
);
3457 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3459 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3461 slp_node
->push_vec_def (new_stmt
);
3463 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3467 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3469 vec_oprnds
.release ();
3473 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3474 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3475 in a single step. On success, store the binary pack code in
3479 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3480 code_helper
*convert_code
)
3482 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3483 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3487 int multi_step_cvt
= 0;
3488 auto_vec
<tree
, 8> interm_types
;
3489 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3490 &code
, &multi_step_cvt
, &interm_types
)
3494 *convert_code
= code
;
3498 /* Function vectorizable_call.
3500 Check if STMT_INFO performs a function call that can be vectorized.
3501 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3502 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3503 Return true if STMT_INFO is vectorizable in this way. */
3506 vectorizable_call (vec_info
*vinfo
,
3507 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3508 gimple
**vec_stmt
, slp_tree slp_node
,
3509 stmt_vector_for_cost
*cost_vec
)
3515 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3516 tree vectype_out
, vectype_in
;
3517 poly_uint64 nunits_in
;
3518 poly_uint64 nunits_out
;
3519 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3520 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3521 tree fndecl
, new_temp
, rhs_type
;
3522 enum vect_def_type dt
[4]
3523 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3524 vect_unknown_def_type
};
3525 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3526 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3527 int ndts
= ARRAY_SIZE (dt
);
3529 auto_vec
<tree
, 8> vargs
;
3530 enum { NARROW
, NONE
, WIDEN
} modifier
;
3534 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3537 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3541 /* Is STMT_INFO a vectorizable call? */
3542 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3546 if (gimple_call_internal_p (stmt
)
3547 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3548 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3549 /* Handled by vectorizable_load and vectorizable_store. */
3552 if (gimple_call_lhs (stmt
) == NULL_TREE
3553 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3556 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3558 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3560 /* Process function arguments. */
3561 rhs_type
= NULL_TREE
;
3562 vectype_in
= NULL_TREE
;
3563 nargs
= gimple_call_num_args (stmt
);
3565 /* Bail out if the function has more than four arguments, we do not have
3566 interesting builtin functions to vectorize with more than two arguments
3567 except for fma. No arguments is also not good. */
3568 if (nargs
== 0 || nargs
> 4)
3571 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3572 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3573 if (cfn
== CFN_GOMP_SIMD_LANE
)
3576 rhs_type
= unsigned_type_node
;
3580 if (internal_fn_p (cfn
))
3581 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3583 for (i
= 0; i
< nargs
; i
++)
3585 if ((int) i
== mask_opno
)
3587 if (!vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_opno
,
3588 &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3593 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3594 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3596 if (dump_enabled_p ())
3597 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3598 "use not simple.\n");
3602 /* We can only handle calls with arguments of the same type. */
3604 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3606 if (dump_enabled_p ())
3607 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3608 "argument types differ.\n");
3612 rhs_type
= TREE_TYPE (op
);
3615 vectype_in
= vectypes
[i
];
3616 else if (vectypes
[i
]
3617 && !types_compatible_p (vectypes
[i
], vectype_in
))
3619 if (dump_enabled_p ())
3620 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3621 "argument vector types differ.\n");
3625 /* If all arguments are external or constant defs, infer the vector type
3626 from the scalar type. */
3628 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3630 gcc_assert (vectype_in
);
3633 if (dump_enabled_p ())
3634 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3635 "no vectype for scalar type %T\n", rhs_type
);
3639 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3640 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3641 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3642 by a pack of the two vectors into an SI vector. We would need
3643 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3644 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3646 if (dump_enabled_p ())
3647 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3648 "mismatched vector sizes %T and %T\n",
3649 vectype_in
, vectype_out
);
3653 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3654 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3656 if (dump_enabled_p ())
3657 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3658 "mixed mask and nonmask vector types\n");
3662 if (vect_emulated_vector_p (vectype_in
) || vect_emulated_vector_p (vectype_out
))
3664 if (dump_enabled_p ())
3665 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3666 "use emulated vector type for call\n");
3671 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3672 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3673 if (known_eq (nunits_in
* 2, nunits_out
))
3675 else if (known_eq (nunits_out
, nunits_in
))
3677 else if (known_eq (nunits_out
* 2, nunits_in
))
3682 /* We only handle functions that do not read or clobber memory. */
3683 if (gimple_vuse (stmt
))
3685 if (dump_enabled_p ())
3686 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3687 "function reads from or writes to memory.\n");
3691 /* For now, we only vectorize functions if a target specific builtin
3692 is available. TODO -- in some cases, it might be profitable to
3693 insert the calls for pieces of the vector, in order to be able
3694 to vectorize other operations in the loop. */
3696 internal_fn ifn
= IFN_LAST
;
3697 tree callee
= gimple_call_fndecl (stmt
);
3699 /* First try using an internal function. */
3700 code_helper convert_code
= MAX_TREE_CODES
;
3702 && (modifier
== NONE
3703 || (modifier
== NARROW
3704 && simple_integer_narrowing (vectype_out
, vectype_in
,
3706 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3709 /* If that fails, try asking for a target-specific built-in function. */
3710 if (ifn
== IFN_LAST
)
3712 if (cfn
!= CFN_LAST
)
3713 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3714 (cfn
, vectype_out
, vectype_in
);
3715 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3716 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3717 (callee
, vectype_out
, vectype_in
);
3720 if (ifn
== IFN_LAST
&& !fndecl
)
3722 if (cfn
== CFN_GOMP_SIMD_LANE
3725 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3726 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3727 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3728 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3730 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3731 { 0, 1, 2, ... vf - 1 } vector. */
3732 gcc_assert (nargs
== 0);
3734 else if (modifier
== NONE
3735 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3736 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3737 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3738 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3739 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3740 slp_op
, vectype_in
, cost_vec
);
3743 if (dump_enabled_p ())
3744 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3745 "function is not vectorizable.\n");
3752 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3753 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3755 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3757 /* Sanity check: make sure that at least one copy of the vectorized stmt
3758 needs to be generated. */
3759 gcc_assert (ncopies
>= 1);
3761 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
3762 internal_fn cond_fn
= get_conditional_internal_fn (ifn
);
3763 internal_fn cond_len_fn
= get_len_internal_fn (ifn
);
3764 int len_opno
= internal_fn_len_index (cond_len_fn
);
3765 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3766 vec_loop_lens
*lens
= (loop_vinfo
? &LOOP_VINFO_LENS (loop_vinfo
) : NULL
);
3767 if (!vec_stmt
) /* transformation not required. */
3770 for (i
= 0; i
< nargs
; ++i
)
3771 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
],
3773 ? vectypes
[i
] : vectype_in
))
3775 if (dump_enabled_p ())
3776 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3777 "incompatible vector types for invariants\n");
3780 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3781 DUMP_VECT_SCOPE ("vectorizable_call");
3782 vect_model_simple_cost (vinfo
, stmt_info
,
3783 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3784 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3785 record_stmt_cost (cost_vec
, ncopies
/ 2,
3786 vec_promote_demote
, stmt_info
, 0, vect_body
);
3789 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
3790 && (reduc_idx
>= 0 || mask_opno
>= 0))
3793 && (cond_fn
== IFN_LAST
3794 || !direct_internal_fn_supported_p (cond_fn
, vectype_out
,
3795 OPTIMIZE_FOR_SPEED
))
3796 && (cond_len_fn
== IFN_LAST
3797 || !direct_internal_fn_supported_p (cond_len_fn
, vectype_out
,
3798 OPTIMIZE_FOR_SPEED
)))
3800 if (dump_enabled_p ())
3801 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3802 "can't use a fully-masked loop because no"
3803 " conditional operation is available.\n");
3804 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
3808 unsigned int nvectors
3810 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3812 tree scalar_mask
= NULL_TREE
;
3814 scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3815 if (cond_len_fn
!= IFN_LAST
3816 && direct_internal_fn_supported_p (cond_len_fn
, vectype_out
,
3817 OPTIMIZE_FOR_SPEED
))
3818 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype_out
,
3821 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype_out
,
3830 if (dump_enabled_p ())
3831 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3834 scalar_dest
= gimple_call_lhs (stmt
);
3835 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3837 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3838 bool len_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
);
3839 unsigned int vect_nargs
= nargs
;
3845 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3848 else if (reduc_idx
>= 0)
3851 else if (masked_loop_p
&& reduc_idx
>= 0)
3857 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3859 tree prev_res
= NULL_TREE
;
3860 vargs
.safe_grow (vect_nargs
, true);
3861 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3862 for (j
= 0; j
< ncopies
; ++j
)
3864 /* Build argument list for the vectorized call. */
3867 vec
<tree
> vec_oprnds0
;
3869 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3870 vec_oprnds0
= vec_defs
[0];
3872 /* Arguments are ready. Create the new vector stmt. */
3873 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3876 if (masked_loop_p
&& reduc_idx
>= 0)
3878 unsigned int vec_num
= vec_oprnds0
.length ();
3879 /* Always true for SLP. */
3880 gcc_assert (ncopies
== 1);
3881 vargs
[varg
++] = vect_get_loop_mask (loop_vinfo
,
3882 gsi
, masks
, vec_num
,
3886 for (k
= 0; k
< nargs
; k
++)
3888 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3889 vargs
[varg
++] = vec_oprndsk
[i
];
3891 if (masked_loop_p
&& reduc_idx
>= 0)
3892 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3894 if (modifier
== NARROW
)
3896 /* We don't define any narrowing conditional functions
3898 gcc_assert (mask_opno
< 0);
3899 tree half_res
= make_ssa_name (vectype_in
);
3901 = gimple_build_call_internal_vec (ifn
, vargs
);
3902 gimple_call_set_lhs (call
, half_res
);
3903 gimple_call_set_nothrow (call
, true);
3904 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3907 prev_res
= half_res
;
3910 new_temp
= make_ssa_name (vec_dest
);
3911 new_stmt
= vect_gimple_build (new_temp
, convert_code
,
3912 prev_res
, half_res
);
3913 vect_finish_stmt_generation (vinfo
, stmt_info
,
3918 if (len_opno
>= 0 && len_loop_p
)
3920 unsigned int vec_num
= vec_oprnds0
.length ();
3921 /* Always true for SLP. */
3922 gcc_assert (ncopies
== 1);
3924 = vect_get_loop_len (loop_vinfo
, gsi
, lens
, vec_num
,
3927 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
3928 tree bias
= build_int_cst (intQI_type_node
, biasval
);
3929 vargs
[len_opno
] = len
;
3930 vargs
[len_opno
+ 1] = bias
;
3932 else if (mask_opno
>= 0 && masked_loop_p
)
3934 unsigned int vec_num
= vec_oprnds0
.length ();
3935 /* Always true for SLP. */
3936 gcc_assert (ncopies
== 1);
3937 tree mask
= vect_get_loop_mask (loop_vinfo
,
3938 gsi
, masks
, vec_num
,
3940 vargs
[mask_opno
] = prepare_vec_mask
3941 (loop_vinfo
, TREE_TYPE (mask
), mask
,
3942 vargs
[mask_opno
], gsi
);
3946 if (ifn
!= IFN_LAST
)
3947 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3949 call
= gimple_build_call_vec (fndecl
, vargs
);
3950 new_temp
= make_ssa_name (vec_dest
, call
);
3951 gimple_call_set_lhs (call
, new_temp
);
3952 gimple_call_set_nothrow (call
, true);
3953 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3956 slp_node
->push_vec_def (new_stmt
);
3962 if (masked_loop_p
&& reduc_idx
>= 0)
3963 vargs
[varg
++] = vect_get_loop_mask (loop_vinfo
, gsi
, masks
, ncopies
,
3965 for (i
= 0; i
< nargs
; i
++)
3967 op
= gimple_call_arg (stmt
, i
);
3970 vec_defs
.quick_push (vNULL
);
3971 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3975 vargs
[varg
++] = vec_defs
[i
][j
];
3977 if (masked_loop_p
&& reduc_idx
>= 0)
3978 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3980 if (len_opno
>= 0 && len_loop_p
)
3982 tree len
= vect_get_loop_len (loop_vinfo
, gsi
, lens
, ncopies
,
3985 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
3986 tree bias
= build_int_cst (intQI_type_node
, biasval
);
3987 vargs
[len_opno
] = len
;
3988 vargs
[len_opno
+ 1] = bias
;
3990 else if (mask_opno
>= 0 && masked_loop_p
)
3992 tree mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
, ncopies
,
3995 = prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
3996 vargs
[mask_opno
], gsi
);
4000 if (cfn
== CFN_GOMP_SIMD_LANE
)
4002 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
4004 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
4005 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
4006 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
4007 new_temp
= make_ssa_name (vec_dest
);
4008 new_stmt
= gimple_build_assign (new_temp
, new_var
);
4009 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4011 else if (modifier
== NARROW
)
4013 /* We don't define any narrowing conditional functions at
4015 gcc_assert (mask_opno
< 0);
4016 tree half_res
= make_ssa_name (vectype_in
);
4017 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
4018 gimple_call_set_lhs (call
, half_res
);
4019 gimple_call_set_nothrow (call
, true);
4020 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
4023 prev_res
= half_res
;
4026 new_temp
= make_ssa_name (vec_dest
);
4027 new_stmt
= vect_gimple_build (new_temp
, convert_code
, prev_res
,
4029 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4034 if (ifn
!= IFN_LAST
)
4035 call
= gimple_build_call_internal_vec (ifn
, vargs
);
4037 call
= gimple_build_call_vec (fndecl
, vargs
);
4038 new_temp
= make_ssa_name (vec_dest
, call
);
4039 gimple_call_set_lhs (call
, new_temp
);
4040 gimple_call_set_nothrow (call
, true);
4041 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
4045 if (j
== (modifier
== NARROW
? 1 : 0))
4046 *vec_stmt
= new_stmt
;
4047 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4049 for (i
= 0; i
< nargs
; i
++)
4051 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
4052 vec_oprndsi
.release ();
4055 else if (modifier
== NARROW
)
4057 auto_vec
<vec
<tree
> > vec_defs (nargs
);
4058 /* We don't define any narrowing conditional functions at present. */
4059 gcc_assert (mask_opno
< 0);
4060 for (j
= 0; j
< ncopies
; ++j
)
4062 /* Build argument list for the vectorized call. */
4064 vargs
.create (nargs
* 2);
4070 vec
<tree
> vec_oprnds0
;
4072 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
4073 vec_oprnds0
= vec_defs
[0];
4075 /* Arguments are ready. Create the new vector stmt. */
4076 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
4080 for (k
= 0; k
< nargs
; k
++)
4082 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
4083 vargs
.quick_push (vec_oprndsk
[i
]);
4084 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
4087 if (ifn
!= IFN_LAST
)
4088 call
= gimple_build_call_internal_vec (ifn
, vargs
);
4090 call
= gimple_build_call_vec (fndecl
, vargs
);
4091 new_temp
= make_ssa_name (vec_dest
, call
);
4092 gimple_call_set_lhs (call
, new_temp
);
4093 gimple_call_set_nothrow (call
, true);
4094 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
4095 slp_node
->push_vec_def (call
);
4100 for (i
= 0; i
< nargs
; i
++)
4102 op
= gimple_call_arg (stmt
, i
);
4105 vec_defs
.quick_push (vNULL
);
4106 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
4107 op
, &vec_defs
[i
], vectypes
[i
]);
4109 vec_oprnd0
= vec_defs
[i
][2*j
];
4110 vec_oprnd1
= vec_defs
[i
][2*j
+1];
4112 vargs
.quick_push (vec_oprnd0
);
4113 vargs
.quick_push (vec_oprnd1
);
4116 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
4117 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4118 gimple_call_set_lhs (new_stmt
, new_temp
);
4119 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4121 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4125 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
4127 for (i
= 0; i
< nargs
; i
++)
4129 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
4130 vec_oprndsi
.release ();
4134 /* No current target implements this case. */
4139 /* The call in STMT might prevent it from being removed in dce.
4140 We however cannot remove it here, due to the way the ssa name
4141 it defines is mapped to the new definition. So just replace
4142 rhs of the statement with something harmless. */
4147 stmt_info
= vect_orig_stmt (stmt_info
);
4148 lhs
= gimple_get_lhs (stmt_info
->stmt
);
4151 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
4152 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
4158 struct simd_call_arg_info
4162 HOST_WIDE_INT linear_step
;
4163 enum vect_def_type dt
;
4165 bool simd_lane_linear
;
4168 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
4169 is linear within simd lane (but not within whole loop), note it in
4173 vect_simd_lane_linear (tree op
, class loop
*loop
,
4174 struct simd_call_arg_info
*arginfo
)
4176 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
4178 if (!is_gimple_assign (def_stmt
)
4179 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
4180 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
4183 tree base
= gimple_assign_rhs1 (def_stmt
);
4184 HOST_WIDE_INT linear_step
= 0;
4185 tree v
= gimple_assign_rhs2 (def_stmt
);
4186 while (TREE_CODE (v
) == SSA_NAME
)
4189 def_stmt
= SSA_NAME_DEF_STMT (v
);
4190 if (is_gimple_assign (def_stmt
))
4191 switch (gimple_assign_rhs_code (def_stmt
))
4194 t
= gimple_assign_rhs2 (def_stmt
);
4195 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
4197 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
4198 v
= gimple_assign_rhs1 (def_stmt
);
4201 t
= gimple_assign_rhs2 (def_stmt
);
4202 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
4204 linear_step
= tree_to_shwi (t
);
4205 v
= gimple_assign_rhs1 (def_stmt
);
4208 t
= gimple_assign_rhs1 (def_stmt
);
4209 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
4210 || (TYPE_PRECISION (TREE_TYPE (v
))
4211 < TYPE_PRECISION (TREE_TYPE (t
))))
4220 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
4222 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
4223 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
4228 arginfo
->linear_step
= linear_step
;
4230 arginfo
->simd_lane_linear
= true;
4236 /* Return the number of elements in vector type VECTYPE, which is associated
4237 with a SIMD clone. At present these vectors always have a constant
4240 static unsigned HOST_WIDE_INT
4241 simd_clone_subparts (tree vectype
)
4243 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
4246 /* Function vectorizable_simd_clone_call.
4248 Check if STMT_INFO performs a function call that can be vectorized
4249 by calling a simd clone of the function.
4250 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4251 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4252 Return true if STMT_INFO is vectorizable in this way. */
4255 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
4256 gimple_stmt_iterator
*gsi
,
4257 gimple
**vec_stmt
, slp_tree slp_node
,
4258 stmt_vector_for_cost
*)
4263 tree vec_oprnd0
= NULL_TREE
;
4266 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4267 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4268 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
4269 tree fndecl
, new_temp
;
4271 auto_vec
<simd_call_arg_info
> arginfo
;
4272 vec
<tree
> vargs
= vNULL
;
4274 tree lhs
, rtype
, ratype
;
4275 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
4278 /* Is STMT a vectorizable call? */
4279 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
4283 fndecl
= gimple_call_fndecl (stmt
);
4284 if (fndecl
== NULL_TREE
4285 && gimple_call_internal_p (stmt
, IFN_MASK_CALL
))
4287 fndecl
= gimple_call_arg (stmt
, 0);
4288 gcc_checking_assert (TREE_CODE (fndecl
) == ADDR_EXPR
);
4289 fndecl
= TREE_OPERAND (fndecl
, 0);
4290 gcc_checking_assert (TREE_CODE (fndecl
) == FUNCTION_DECL
);
4293 if (fndecl
== NULL_TREE
)
4296 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
4297 if (node
== NULL
|| node
->simd_clones
== NULL
)
4300 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4303 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4307 if (gimple_call_lhs (stmt
)
4308 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
4311 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
4313 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4315 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
4322 /* Process function arguments. */
4323 nargs
= gimple_call_num_args (stmt
) - arg_offset
;
4325 /* Bail out if the function has zero arguments. */
4329 arginfo
.reserve (nargs
, true);
4331 for (i
= 0; i
< nargs
; i
++)
4333 simd_call_arg_info thisarginfo
;
4336 thisarginfo
.linear_step
= 0;
4337 thisarginfo
.align
= 0;
4338 thisarginfo
.op
= NULL_TREE
;
4339 thisarginfo
.simd_lane_linear
= false;
4341 op
= gimple_call_arg (stmt
, i
+ arg_offset
);
4342 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
4343 &thisarginfo
.vectype
)
4344 || thisarginfo
.dt
== vect_uninitialized_def
)
4346 if (dump_enabled_p ())
4347 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4348 "use not simple.\n");
4352 if (thisarginfo
.dt
== vect_constant_def
4353 || thisarginfo
.dt
== vect_external_def
)
4354 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
4356 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
4358 /* For linear arguments, the analyze phase should have saved
4359 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4360 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
4361 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
4363 gcc_assert (vec_stmt
);
4364 thisarginfo
.linear_step
4365 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
4367 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
4368 thisarginfo
.simd_lane_linear
4369 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
4370 == boolean_true_node
);
4371 /* If loop has been peeled for alignment, we need to adjust it. */
4372 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
4373 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
4374 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
4376 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
4377 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
4378 tree opt
= TREE_TYPE (thisarginfo
.op
);
4379 bias
= fold_convert (TREE_TYPE (step
), bias
);
4380 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
4382 = fold_build2 (POINTER_TYPE_P (opt
)
4383 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
4384 thisarginfo
.op
, bias
);
4388 && thisarginfo
.dt
!= vect_constant_def
4389 && thisarginfo
.dt
!= vect_external_def
4391 && TREE_CODE (op
) == SSA_NAME
4392 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
4394 && tree_fits_shwi_p (iv
.step
))
4396 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
4397 thisarginfo
.op
= iv
.base
;
4399 else if ((thisarginfo
.dt
== vect_constant_def
4400 || thisarginfo
.dt
== vect_external_def
)
4401 && POINTER_TYPE_P (TREE_TYPE (op
)))
4402 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
4403 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4405 if (POINTER_TYPE_P (TREE_TYPE (op
))
4406 && !thisarginfo
.linear_step
4408 && thisarginfo
.dt
!= vect_constant_def
4409 && thisarginfo
.dt
!= vect_external_def
4412 && TREE_CODE (op
) == SSA_NAME
)
4413 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
4415 arginfo
.quick_push (thisarginfo
);
4418 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4419 if (!vf
.is_constant ())
4421 if (dump_enabled_p ())
4422 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4423 "not considering SIMD clones; not yet supported"
4424 " for variable-width vectors.\n");
4428 unsigned int badness
= 0;
4429 struct cgraph_node
*bestn
= NULL
;
4430 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
4431 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
4433 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4434 n
= n
->simdclone
->next_clone
)
4436 unsigned int this_badness
= 0;
4437 unsigned int num_calls
;
4438 if (!constant_multiple_p (vf
, n
->simdclone
->simdlen
, &num_calls
)
4439 || n
->simdclone
->nargs
!= nargs
)
4442 this_badness
+= exact_log2 (num_calls
) * 4096;
4443 if (n
->simdclone
->inbranch
)
4444 this_badness
+= 8192;
4445 int target_badness
= targetm
.simd_clone
.usable (n
);
4446 if (target_badness
< 0)
4448 this_badness
+= target_badness
* 512;
4449 for (i
= 0; i
< nargs
; i
++)
4451 switch (n
->simdclone
->args
[i
].arg_type
)
4453 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4454 if (!useless_type_conversion_p
4455 (n
->simdclone
->args
[i
].orig_type
,
4456 TREE_TYPE (gimple_call_arg (stmt
, i
+ arg_offset
))))
4458 else if (arginfo
[i
].dt
== vect_constant_def
4459 || arginfo
[i
].dt
== vect_external_def
4460 || arginfo
[i
].linear_step
)
4463 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4464 if (arginfo
[i
].dt
!= vect_constant_def
4465 && arginfo
[i
].dt
!= vect_external_def
)
4468 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4469 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4470 if (arginfo
[i
].dt
== vect_constant_def
4471 || arginfo
[i
].dt
== vect_external_def
4472 || (arginfo
[i
].linear_step
4473 != n
->simdclone
->args
[i
].linear_step
))
4476 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4477 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4478 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4479 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4480 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4481 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4485 case SIMD_CLONE_ARG_TYPE_MASK
:
4488 if (i
== (size_t) -1)
4490 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4495 if (arginfo
[i
].align
)
4496 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4497 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4499 if (i
== (size_t) -1)
4501 if (bestn
== NULL
|| this_badness
< badness
)
4504 badness
= this_badness
;
4511 for (i
= 0; i
< nargs
; i
++)
4513 if ((arginfo
[i
].dt
== vect_constant_def
4514 || arginfo
[i
].dt
== vect_external_def
)
4515 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4517 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
+ arg_offset
));
4518 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4520 if (arginfo
[i
].vectype
== NULL
4521 || !constant_multiple_p (bestn
->simdclone
->simdlen
,
4522 simd_clone_subparts (arginfo
[i
].vectype
)))
4526 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
4527 && VECTOR_BOOLEAN_TYPE_P (bestn
->simdclone
->args
[i
].vector_type
))
4529 if (dump_enabled_p ())
4530 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4531 "vector mask arguments are not supported.\n");
4535 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_MASK
4536 && bestn
->simdclone
->mask_mode
== VOIDmode
4537 && (simd_clone_subparts (bestn
->simdclone
->args
[i
].vector_type
)
4538 != simd_clone_subparts (arginfo
[i
].vectype
)))
4540 /* FORNOW we only have partial support for vector-type masks that
4541 can't hold all of simdlen. */
4542 if (dump_enabled_p ())
4543 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4545 "in-branch vector clones are not yet"
4546 " supported for mismatched vector sizes.\n");
4549 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_MASK
4550 && bestn
->simdclone
->mask_mode
!= VOIDmode
)
4552 /* FORNOW don't support integer-type masks. */
4553 if (dump_enabled_p ())
4554 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4556 "in-branch vector clones are not yet"
4557 " supported for integer mask modes.\n");
4562 fndecl
= bestn
->decl
;
4563 nunits
= bestn
->simdclone
->simdlen
;
4564 ncopies
= vector_unroll_factor (vf
, nunits
);
4566 /* If the function isn't const, only allow it in simd loops where user
4567 has asserted that at least nunits consecutive iterations can be
4568 performed using SIMD instructions. */
4569 if ((loop
== NULL
|| maybe_lt ((unsigned) loop
->safelen
, nunits
))
4570 && gimple_vuse (stmt
))
4573 /* Sanity check: make sure that at least one copy of the vectorized stmt
4574 needs to be generated. */
4575 gcc_assert (ncopies
>= 1);
4577 if (!vec_stmt
) /* transformation not required. */
4579 /* When the original call is pure or const but the SIMD ABI dictates
4580 an aggregate return we will have to use a virtual definition and
4581 in a loop eventually even need to add a virtual PHI. That's
4582 not straight-forward so allow to fix this up via renaming. */
4583 if (gimple_call_lhs (stmt
)
4584 && !gimple_vdef (stmt
)
4585 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn
->decl
))) == ARRAY_TYPE
)
4586 vinfo
->any_known_not_updated_vssa
= true;
4587 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4588 for (i
= 0; i
< nargs
; i
++)
4589 if ((bestn
->simdclone
->args
[i
].arg_type
4590 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4591 || (bestn
->simdclone
->args
[i
].arg_type
4592 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4594 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4597 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4598 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4599 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4600 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4601 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4602 tree sll
= arginfo
[i
].simd_lane_linear
4603 ? boolean_true_node
: boolean_false_node
;
4604 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4606 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4607 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4608 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4609 dt, slp_node, cost_vec); */
4615 if (dump_enabled_p ())
4616 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4619 scalar_dest
= gimple_call_lhs (stmt
);
4620 vec_dest
= NULL_TREE
;
4625 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4626 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4627 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4630 rtype
= TREE_TYPE (ratype
);
4634 auto_vec
<vec
<tree
> > vec_oprnds
;
4635 auto_vec
<unsigned> vec_oprnds_i
;
4636 vec_oprnds
.safe_grow_cleared (nargs
, true);
4637 vec_oprnds_i
.safe_grow_cleared (nargs
, true);
4638 for (j
= 0; j
< ncopies
; ++j
)
4640 /* Build argument list for the vectorized call. */
4642 vargs
.create (nargs
);
4646 for (i
= 0; i
< nargs
; i
++)
4648 unsigned int k
, l
, m
, o
;
4650 op
= gimple_call_arg (stmt
, i
+ arg_offset
);
4651 switch (bestn
->simdclone
->args
[i
].arg_type
)
4653 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4654 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4655 o
= vector_unroll_factor (nunits
,
4656 simd_clone_subparts (atype
));
4657 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4659 if (simd_clone_subparts (atype
)
4660 < simd_clone_subparts (arginfo
[i
].vectype
))
4662 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4663 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4664 / simd_clone_subparts (atype
));
4665 gcc_assert ((k
& (k
- 1)) == 0);
4668 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4669 ncopies
* o
/ k
, op
,
4671 vec_oprnds_i
[i
] = 0;
4672 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4676 vec_oprnd0
= arginfo
[i
].op
;
4677 if ((m
& (k
- 1)) == 0)
4678 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4680 arginfo
[i
].op
= vec_oprnd0
;
4682 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4684 bitsize_int ((m
& (k
- 1)) * prec
));
4686 = gimple_build_assign (make_ssa_name (atype
),
4688 vect_finish_stmt_generation (vinfo
, stmt_info
,
4690 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4694 k
= (simd_clone_subparts (atype
)
4695 / simd_clone_subparts (arginfo
[i
].vectype
));
4696 gcc_assert ((k
& (k
- 1)) == 0);
4697 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4699 vec_alloc (ctor_elts
, k
);
4702 for (l
= 0; l
< k
; l
++)
4704 if (m
== 0 && l
== 0)
4706 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4710 vec_oprnds_i
[i
] = 0;
4711 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4714 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4715 arginfo
[i
].op
= vec_oprnd0
;
4718 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4722 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0
),
4726 = build1 (VIEW_CONVERT_EXPR
, atype
, vec_oprnd0
);
4728 = gimple_build_assign (make_ssa_name (atype
),
4730 vect_finish_stmt_generation (vinfo
, stmt_info
,
4732 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4735 vargs
.safe_push (vec_oprnd0
);
4738 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4740 = gimple_build_assign (make_ssa_name (atype
),
4742 vect_finish_stmt_generation (vinfo
, stmt_info
,
4744 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4749 case SIMD_CLONE_ARG_TYPE_MASK
:
4750 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4751 if (bestn
->simdclone
->mask_mode
!= VOIDmode
)
4753 /* FORNOW: this is disabled above. */
4758 tree elt_type
= TREE_TYPE (atype
);
4759 tree one
= fold_convert (elt_type
, integer_one_node
);
4760 tree zero
= fold_convert (elt_type
, integer_zero_node
);
4761 o
= vector_unroll_factor (nunits
,
4762 simd_clone_subparts (atype
));
4763 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4765 if (simd_clone_subparts (atype
)
4766 < simd_clone_subparts (arginfo
[i
].vectype
))
4768 /* The mask type has fewer elements than simdlen. */
4773 else if (simd_clone_subparts (atype
)
4774 == simd_clone_subparts (arginfo
[i
].vectype
))
4776 /* The SIMD clone function has the same number of
4777 elements as the current function. */
4780 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4784 vec_oprnds_i
[i
] = 0;
4786 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4788 = build3 (VEC_COND_EXPR
, atype
, vec_oprnd0
,
4789 build_vector_from_val (atype
, one
),
4790 build_vector_from_val (atype
, zero
));
4792 = gimple_build_assign (make_ssa_name (atype
),
4794 vect_finish_stmt_generation (vinfo
, stmt_info
,
4796 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4800 /* The mask type has more elements than simdlen. */
4808 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4809 vargs
.safe_push (op
);
4811 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4812 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4817 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4818 &stmts
, true, NULL_TREE
);
4822 edge pe
= loop_preheader_edge (loop
);
4823 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4824 gcc_assert (!new_bb
);
4826 if (arginfo
[i
].simd_lane_linear
)
4828 vargs
.safe_push (arginfo
[i
].op
);
4831 tree phi_res
= copy_ssa_name (op
);
4832 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4833 add_phi_arg (new_phi
, arginfo
[i
].op
,
4834 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4836 = POINTER_TYPE_P (TREE_TYPE (op
))
4837 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4838 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4839 ? sizetype
: TREE_TYPE (op
);
4841 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4843 tree tcst
= wide_int_to_tree (type
, cst
);
4844 tree phi_arg
= copy_ssa_name (op
);
4846 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4847 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4848 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4849 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4851 arginfo
[i
].op
= phi_res
;
4852 vargs
.safe_push (phi_res
);
4857 = POINTER_TYPE_P (TREE_TYPE (op
))
4858 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4859 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4860 ? sizetype
: TREE_TYPE (op
);
4862 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4864 tree tcst
= wide_int_to_tree (type
, cst
);
4865 new_temp
= make_ssa_name (TREE_TYPE (op
));
4867 = gimple_build_assign (new_temp
, code
,
4868 arginfo
[i
].op
, tcst
);
4869 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4870 vargs
.safe_push (new_temp
);
4873 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4874 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4875 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4876 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4877 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4878 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4884 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4888 || known_eq (simd_clone_subparts (rtype
), nunits
));
4890 new_temp
= create_tmp_var (ratype
);
4891 else if (useless_type_conversion_p (vectype
, rtype
))
4892 new_temp
= make_ssa_name (vec_dest
, new_call
);
4894 new_temp
= make_ssa_name (rtype
, new_call
);
4895 gimple_call_set_lhs (new_call
, new_temp
);
4897 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4898 gimple
*new_stmt
= new_call
;
4902 if (!multiple_p (simd_clone_subparts (vectype
), nunits
))
4905 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4906 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4907 k
= vector_unroll_factor (nunits
,
4908 simd_clone_subparts (vectype
));
4909 gcc_assert ((k
& (k
- 1)) == 0);
4910 for (l
= 0; l
< k
; l
++)
4915 t
= build_fold_addr_expr (new_temp
);
4916 t
= build2 (MEM_REF
, vectype
, t
,
4917 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4920 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4921 bitsize_int (prec
), bitsize_int (l
* prec
));
4922 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4923 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4925 if (j
== 0 && l
== 0)
4926 *vec_stmt
= new_stmt
;
4927 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4931 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4934 else if (!multiple_p (nunits
, simd_clone_subparts (vectype
)))
4936 unsigned int k
= (simd_clone_subparts (vectype
)
4937 / simd_clone_subparts (rtype
));
4938 gcc_assert ((k
& (k
- 1)) == 0);
4939 if ((j
& (k
- 1)) == 0)
4940 vec_alloc (ret_ctor_elts
, k
);
4944 o
= vector_unroll_factor (nunits
,
4945 simd_clone_subparts (rtype
));
4946 for (m
= 0; m
< o
; m
++)
4948 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4949 size_int (m
), NULL_TREE
, NULL_TREE
);
4950 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4952 vect_finish_stmt_generation (vinfo
, stmt_info
,
4954 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4955 gimple_assign_lhs (new_stmt
));
4957 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4960 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4961 if ((j
& (k
- 1)) != k
- 1)
4963 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4965 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4966 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4968 if ((unsigned) j
== k
- 1)
4969 *vec_stmt
= new_stmt
;
4970 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4975 tree t
= build_fold_addr_expr (new_temp
);
4976 t
= build2 (MEM_REF
, vectype
, t
,
4977 build_int_cst (TREE_TYPE (t
), 0));
4978 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4979 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4980 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4982 else if (!useless_type_conversion_p (vectype
, rtype
))
4984 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
4986 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4987 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4992 *vec_stmt
= new_stmt
;
4993 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4996 for (i
= 0; i
< nargs
; ++i
)
4998 vec
<tree
> oprndsi
= vec_oprnds
[i
];
5003 /* Mark the clone as no longer being a candidate for GC. */
5004 bestn
->gc_candidate
= false;
5006 /* The call in STMT might prevent it from being removed in dce.
5007 We however cannot remove it here, due to the way the ssa name
5008 it defines is mapped to the new definition. So just replace
5009 rhs of the statement with something harmless. */
5017 type
= TREE_TYPE (scalar_dest
);
5018 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
5019 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
5022 new_stmt
= gimple_build_nop ();
5023 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
5024 unlink_stmt_vdef (stmt
);
5030 /* Function vect_gen_widened_results_half
5032 Create a vector stmt whose code, type, number of arguments, and result
5033 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
5034 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
5035 In the case that CODE is a CALL_EXPR, this means that a call to DECL
5036 needs to be created (DECL is a function-decl of a target-builtin).
5037 STMT_INFO is the original scalar stmt that we are vectorizing. */
5040 vect_gen_widened_results_half (vec_info
*vinfo
, code_helper ch
,
5041 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
5042 tree vec_dest
, gimple_stmt_iterator
*gsi
,
5043 stmt_vec_info stmt_info
)
5048 /* Generate half of the widened result: */
5049 if (op_type
!= binary_op
)
5051 new_stmt
= vect_gimple_build (vec_dest
, ch
, vec_oprnd0
, vec_oprnd1
);
5052 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5053 gimple_set_lhs (new_stmt
, new_temp
);
5054 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5060 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
5061 For multi-step conversions store the resulting vectors and call the function
5062 recursively. When NARROW_SRC_P is true, there's still a conversion after
5063 narrowing, don't store the vectors in the SLP_NODE or in vector info of
5064 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
5067 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
5069 stmt_vec_info stmt_info
,
5070 vec
<tree
> &vec_dsts
,
5071 gimple_stmt_iterator
*gsi
,
5072 slp_tree slp_node
, code_helper code
,
5076 tree vop0
, vop1
, new_tmp
, vec_dest
;
5078 vec_dest
= vec_dsts
.pop ();
5080 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
5082 /* Create demotion operation. */
5083 vop0
= (*vec_oprnds
)[i
];
5084 vop1
= (*vec_oprnds
)[i
+ 1];
5085 gimple
*new_stmt
= vect_gimple_build (vec_dest
, code
, vop0
, vop1
);
5086 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
5087 gimple_set_lhs (new_stmt
, new_tmp
);
5088 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5089 if (multi_step_cvt
|| narrow_src_p
)
5090 /* Store the resulting vector for next recursive call,
5091 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
5092 (*vec_oprnds
)[i
/2] = new_tmp
;
5095 /* This is the last step of the conversion sequence. Store the
5096 vectors in SLP_NODE or in vector info of the scalar statement
5097 (or in STMT_VINFO_RELATED_STMT chain). */
5099 slp_node
->push_vec_def (new_stmt
);
5101 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5105 /* For multi-step demotion operations we first generate demotion operations
5106 from the source type to the intermediate types, and then combine the
5107 results (stored in VEC_OPRNDS) in demotion operation to the destination
5111 /* At each level of recursion we have half of the operands we had at the
5113 vec_oprnds
->truncate ((i
+1)/2);
5114 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
5116 stmt_info
, vec_dsts
, gsi
,
5117 slp_node
, VEC_PACK_TRUNC_EXPR
,
5121 vec_dsts
.quick_push (vec_dest
);
5125 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
5126 and VEC_OPRNDS1, for a binary operation associated with scalar statement
5127 STMT_INFO. For multi-step conversions store the resulting vectors and
5128 call the function recursively. */
5131 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
5132 vec
<tree
> *vec_oprnds0
,
5133 vec
<tree
> *vec_oprnds1
,
5134 stmt_vec_info stmt_info
, tree vec_dest
,
5135 gimple_stmt_iterator
*gsi
,
5137 code_helper ch2
, int op_type
)
5140 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
5141 gimple
*new_stmt1
, *new_stmt2
;
5142 vec
<tree
> vec_tmp
= vNULL
;
5144 vec_tmp
.create (vec_oprnds0
->length () * 2);
5145 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
5147 if (op_type
== binary_op
)
5148 vop1
= (*vec_oprnds1
)[i
];
5152 /* Generate the two halves of promotion operation. */
5153 new_stmt1
= vect_gen_widened_results_half (vinfo
, ch1
, vop0
, vop1
,
5154 op_type
, vec_dest
, gsi
,
5156 new_stmt2
= vect_gen_widened_results_half (vinfo
, ch2
, vop0
, vop1
,
5157 op_type
, vec_dest
, gsi
,
5159 if (is_gimple_call (new_stmt1
))
5161 new_tmp1
= gimple_call_lhs (new_stmt1
);
5162 new_tmp2
= gimple_call_lhs (new_stmt2
);
5166 new_tmp1
= gimple_assign_lhs (new_stmt1
);
5167 new_tmp2
= gimple_assign_lhs (new_stmt2
);
5170 /* Store the results for the next step. */
5171 vec_tmp
.quick_push (new_tmp1
);
5172 vec_tmp
.quick_push (new_tmp2
);
5175 vec_oprnds0
->release ();
5176 *vec_oprnds0
= vec_tmp
;
5179 /* Create vectorized promotion stmts for widening stmts using only half the
5180 potential vector size for input. */
5182 vect_create_half_widening_stmts (vec_info
*vinfo
,
5183 vec
<tree
> *vec_oprnds0
,
5184 vec
<tree
> *vec_oprnds1
,
5185 stmt_vec_info stmt_info
, tree vec_dest
,
5186 gimple_stmt_iterator
*gsi
,
5195 vec
<tree
> vec_tmp
= vNULL
;
5197 vec_tmp
.create (vec_oprnds0
->length ());
5198 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
5200 tree new_tmp1
, new_tmp2
, new_tmp3
, out_type
;
5202 gcc_assert (op_type
== binary_op
);
5203 vop1
= (*vec_oprnds1
)[i
];
5205 /* Widen the first vector input. */
5206 out_type
= TREE_TYPE (vec_dest
);
5207 new_tmp1
= make_ssa_name (out_type
);
5208 new_stmt1
= gimple_build_assign (new_tmp1
, NOP_EXPR
, vop0
);
5209 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt1
, gsi
);
5210 if (VECTOR_TYPE_P (TREE_TYPE (vop1
)))
5212 /* Widen the second vector input. */
5213 new_tmp2
= make_ssa_name (out_type
);
5214 new_stmt2
= gimple_build_assign (new_tmp2
, NOP_EXPR
, vop1
);
5215 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt2
, gsi
);
5216 /* Perform the operation. With both vector inputs widened. */
5217 new_stmt3
= vect_gimple_build (vec_dest
, code1
, new_tmp1
, new_tmp2
);
5221 /* Perform the operation. With the single vector input widened. */
5222 new_stmt3
= vect_gimple_build (vec_dest
, code1
, new_tmp1
, vop1
);
5225 new_tmp3
= make_ssa_name (vec_dest
, new_stmt3
);
5226 gimple_assign_set_lhs (new_stmt3
, new_tmp3
);
5227 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt3
, gsi
);
5229 /* Store the results for the next step. */
5230 vec_tmp
.quick_push (new_tmp3
);
5233 vec_oprnds0
->release ();
5234 *vec_oprnds0
= vec_tmp
;
5238 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5239 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5240 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5241 Return true if STMT_INFO is vectorizable in this way. */
5244 vectorizable_conversion (vec_info
*vinfo
,
5245 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5246 gimple
**vec_stmt
, slp_tree slp_node
,
5247 stmt_vector_for_cost
*cost_vec
)
5249 tree vec_dest
, cvt_op
= NULL_TREE
;
5251 tree op0
, op1
= NULL_TREE
;
5252 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5254 code_helper code
, code1
, code2
;
5255 code_helper codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
5257 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5259 poly_uint64 nunits_in
;
5260 poly_uint64 nunits_out
;
5261 tree vectype_out
, vectype_in
;
5263 tree lhs_type
, rhs_type
;
5264 /* For conversions between floating point and integer, there're 2 NARROW
5265 cases. NARROW_SRC is for FLOAT_EXPR, means
5266 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5267 This is safe when the range of the source integer can fit into the lower
5268 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5269 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5270 For other conversions, when there's narrowing, NARROW_DST is used as
5272 enum { NARROW_SRC
, NARROW_DST
, NONE
, WIDEN
} modifier
;
5273 vec
<tree
> vec_oprnds0
= vNULL
;
5274 vec
<tree
> vec_oprnds1
= vNULL
;
5276 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5277 int multi_step_cvt
= 0;
5278 vec
<tree
> interm_types
= vNULL
;
5279 tree intermediate_type
, cvt_type
= NULL_TREE
;
5281 unsigned short fltsz
;
5283 /* Is STMT a vectorizable conversion? */
5285 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5288 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5292 gimple
* stmt
= stmt_info
->stmt
;
5293 if (!(is_gimple_assign (stmt
) || is_gimple_call (stmt
)))
5296 if (gimple_get_lhs (stmt
) == NULL_TREE
5297 || TREE_CODE (gimple_get_lhs (stmt
)) != SSA_NAME
)
5300 if (TREE_CODE (gimple_get_lhs (stmt
)) != SSA_NAME
)
5303 if (is_gimple_assign (stmt
))
5305 code
= gimple_assign_rhs_code (stmt
);
5306 op_type
= TREE_CODE_LENGTH ((tree_code
) code
);
5308 else if (gimple_call_internal_p (stmt
))
5310 code
= gimple_call_internal_fn (stmt
);
5311 op_type
= gimple_call_num_args (stmt
);
5316 bool widen_arith
= (code
== WIDEN_MULT_EXPR
5317 || code
== WIDEN_LSHIFT_EXPR
5318 || widening_fn_p (code
));
5321 && !CONVERT_EXPR_CODE_P (code
)
5322 && code
!= FIX_TRUNC_EXPR
5323 && code
!= FLOAT_EXPR
)
5326 /* Check types of lhs and rhs. */
5327 scalar_dest
= gimple_get_lhs (stmt
);
5328 lhs_type
= TREE_TYPE (scalar_dest
);
5329 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5331 /* Check the operands of the operation. */
5332 slp_tree slp_op0
, slp_op1
= NULL
;
5333 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5334 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
5336 if (dump_enabled_p ())
5337 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5338 "use not simple.\n");
5342 rhs_type
= TREE_TYPE (op0
);
5343 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
5344 && !((INTEGRAL_TYPE_P (lhs_type
)
5345 && INTEGRAL_TYPE_P (rhs_type
))
5346 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
5347 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
5350 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5351 && ((INTEGRAL_TYPE_P (lhs_type
)
5352 && !type_has_mode_precision_p (lhs_type
))
5353 || (INTEGRAL_TYPE_P (rhs_type
)
5354 && !type_has_mode_precision_p (rhs_type
))))
5356 if (dump_enabled_p ())
5357 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5358 "type conversion to/from bit-precision unsupported."
5363 if (op_type
== binary_op
)
5365 gcc_assert (code
== WIDEN_MULT_EXPR
5366 || code
== WIDEN_LSHIFT_EXPR
5367 || widening_fn_p (code
));
5369 op1
= is_gimple_assign (stmt
) ? gimple_assign_rhs2 (stmt
) :
5370 gimple_call_arg (stmt
, 0);
5372 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
5373 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
5375 if (dump_enabled_p ())
5376 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5377 "use not simple.\n");
5380 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5383 vectype_in
= vectype1_in
;
5386 /* If op0 is an external or constant def, infer the vector type
5387 from the scalar type. */
5389 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
5391 gcc_assert (vectype_in
);
5394 if (dump_enabled_p ())
5395 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5396 "no vectype for scalar type %T\n", rhs_type
);
5401 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5402 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5404 if (dump_enabled_p ())
5405 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5406 "can't convert between boolean and non "
5407 "boolean vectors %T\n", rhs_type
);
5412 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
5413 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5414 if (known_eq (nunits_out
, nunits_in
))
5419 else if (multiple_p (nunits_out
, nunits_in
))
5420 modifier
= NARROW_DST
;
5423 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
5427 /* Multiple types in SLP are handled by creating the appropriate number of
5428 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5432 else if (modifier
== NARROW_DST
)
5433 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
5435 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
5437 /* Sanity check: make sure that at least one copy of the vectorized stmt
5438 needs to be generated. */
5439 gcc_assert (ncopies
>= 1);
5441 bool found_mode
= false;
5442 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
5443 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
5444 opt_scalar_mode rhs_mode_iter
;
5446 /* Supportable by target? */
5450 if (code
!= FIX_TRUNC_EXPR
5451 && code
!= FLOAT_EXPR
5452 && !CONVERT_EXPR_CODE_P (code
))
5454 gcc_assert (code
.is_tree_code ());
5455 if (supportable_convert_operation ((tree_code
) code
, vectype_out
,
5462 /* For conversions between float and integer types try whether
5463 we can use intermediate signed integer types to support the
5465 if (GET_MODE_SIZE (lhs_mode
) != GET_MODE_SIZE (rhs_mode
)
5466 && (code
== FLOAT_EXPR
||
5467 (code
== FIX_TRUNC_EXPR
&& !flag_trapping_math
)))
5469 bool demotion
= GET_MODE_SIZE (rhs_mode
) > GET_MODE_SIZE (lhs_mode
);
5470 bool float_expr_p
= code
== FLOAT_EXPR
;
5471 unsigned short target_size
;
5472 scalar_mode intermediate_mode
;
5475 intermediate_mode
= lhs_mode
;
5476 target_size
= GET_MODE_SIZE (rhs_mode
);
5480 target_size
= GET_MODE_SIZE (lhs_mode
);
5481 if (!int_mode_for_size
5482 (GET_MODE_BITSIZE (rhs_mode
), 0).exists (&intermediate_mode
))
5485 code1
= float_expr_p
? code
: NOP_EXPR
;
5486 codecvt1
= float_expr_p
? NOP_EXPR
: code
;
5487 opt_scalar_mode mode_iter
;
5488 FOR_EACH_2XWIDER_MODE (mode_iter
, intermediate_mode
)
5490 intermediate_mode
= mode_iter
.require ();
5492 if (GET_MODE_SIZE (intermediate_mode
) > target_size
)
5495 scalar_mode cvt_mode
;
5496 if (!int_mode_for_size
5497 (GET_MODE_BITSIZE (intermediate_mode
), 0).exists (&cvt_mode
))
5500 cvt_type
= build_nonstandard_integer_type
5501 (GET_MODE_BITSIZE (cvt_mode
), 0);
5503 /* Check if the intermediate type can hold OP0's range.
5504 When converting from float to integer this is not necessary
5505 because values that do not fit the (smaller) target type are
5506 unspecified anyway. */
5507 if (demotion
&& float_expr_p
)
5509 wide_int op_min_value
, op_max_value
;
5510 if (!vect_get_range_info (op0
, &op_min_value
, &op_max_value
))
5513 if (cvt_type
== NULL_TREE
5514 || (wi::min_precision (op_max_value
, SIGNED
)
5515 > TYPE_PRECISION (cvt_type
))
5516 || (wi::min_precision (op_min_value
, SIGNED
)
5517 > TYPE_PRECISION (cvt_type
)))
5521 cvt_type
= get_vectype_for_scalar_type (vinfo
, cvt_type
, slp_node
);
5522 /* This should only happened for SLP as long as loop vectorizer
5523 only supports same-sized vector. */
5524 if (cvt_type
== NULL_TREE
5525 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type
), nunits_in
)
5526 || !supportable_convert_operation ((tree_code
) code1
,
5529 || !supportable_convert_operation ((tree_code
) codecvt1
,
5541 interm_types
.safe_push (cvt_type
);
5542 cvt_type
= NULL_TREE
;
5550 if (dump_enabled_p ())
5551 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5552 "conversion not supported by target.\n");
5556 if (known_eq (nunits_in
, nunits_out
))
5558 if (!(code
.is_tree_code ()
5559 && supportable_half_widening_operation ((tree_code
) code
,
5560 vectype_out
, vectype_in
,
5564 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5567 if (supportable_widening_operation (vinfo
, code
, stmt_info
,
5568 vectype_out
, vectype_in
, &code1
,
5569 &code2
, &multi_step_cvt
,
5572 /* Binary widening operation can only be supported directly by the
5574 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5578 if (code
!= FLOAT_EXPR
5579 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
5582 fltsz
= GET_MODE_SIZE (lhs_mode
);
5583 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
5585 rhs_mode
= rhs_mode_iter
.require ();
5586 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
5590 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5591 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5592 if (cvt_type
== NULL_TREE
)
5595 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5598 gcc_assert (code
.is_tree_code ());
5599 if (!supportable_convert_operation ((tree_code
) code
, vectype_out
,
5604 else if (!supportable_widening_operation (vinfo
, code
,
5605 stmt_info
, vectype_out
,
5606 cvt_type
, &codecvt1
,
5607 &codecvt2
, &multi_step_cvt
,
5611 gcc_assert (multi_step_cvt
== 0);
5613 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
5616 &code2
, &multi_step_cvt
,
5627 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5628 codecvt2
= ERROR_MARK
;
5632 interm_types
.safe_push (cvt_type
);
5633 cvt_type
= NULL_TREE
;
5638 gcc_assert (op_type
== unary_op
);
5639 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
5640 &code1
, &multi_step_cvt
,
5644 if (GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
5647 if (code
== FIX_TRUNC_EXPR
)
5650 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5651 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5652 if (cvt_type
== NULL_TREE
)
5654 if (supportable_convert_operation ((tree_code
) code
, cvt_type
, vectype_in
,
5659 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
5660 &code1
, &multi_step_cvt
,
5664 /* If op0 can be represented with low precision integer,
5665 truncate it to cvt_type and the do FLOAT_EXPR. */
5666 else if (code
== FLOAT_EXPR
)
5668 wide_int op_min_value
, op_max_value
;
5669 if (!vect_get_range_info (op0
, &op_min_value
, &op_max_value
))
5673 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode
), 0);
5674 if (cvt_type
== NULL_TREE
5675 || (wi::min_precision (op_max_value
, SIGNED
)
5676 > TYPE_PRECISION (cvt_type
))
5677 || (wi::min_precision (op_min_value
, SIGNED
)
5678 > TYPE_PRECISION (cvt_type
)))
5681 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_out
);
5682 if (cvt_type
== NULL_TREE
)
5684 if (!supportable_narrowing_operation (NOP_EXPR
, cvt_type
, vectype_in
,
5685 &code1
, &multi_step_cvt
,
5688 if (supportable_convert_operation ((tree_code
) code
, vectype_out
,
5692 modifier
= NARROW_SRC
;
5703 if (!vec_stmt
) /* transformation not required. */
5706 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
5707 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
5709 if (dump_enabled_p ())
5710 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5711 "incompatible vector types for invariants\n");
5714 DUMP_VECT_SCOPE ("vectorizable_conversion");
5715 if (modifier
== NONE
)
5717 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
5718 vect_model_simple_cost (vinfo
, stmt_info
,
5719 ncopies
* (1 + multi_step_cvt
),
5720 dt
, ndts
, slp_node
, cost_vec
);
5722 else if (modifier
== NARROW_SRC
|| modifier
== NARROW_DST
)
5724 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
5725 /* The final packing step produces one vector result per copy. */
5726 unsigned int nvectors
5727 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
5728 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5729 multi_step_cvt
, cost_vec
,
5734 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5735 /* The initial unpacking step produces two vector results
5736 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5737 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5738 unsigned int nvectors
5740 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5742 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5743 multi_step_cvt
, cost_vec
,
5746 interm_types
.release ();
5751 if (dump_enabled_p ())
5752 dump_printf_loc (MSG_NOTE
, vect_location
,
5753 "transform conversion. ncopies = %d.\n", ncopies
);
5755 if (op_type
== binary_op
)
5757 if (CONSTANT_CLASS_P (op0
))
5758 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5759 else if (CONSTANT_CLASS_P (op1
))
5760 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5763 /* In case of multi-step conversion, we first generate conversion operations
5764 to the intermediate types, and then from that types to the final one.
5765 We create vector destinations for the intermediate type (TYPES) received
5766 from supportable_*_operation, and store them in the correct order
5767 for future use in vect_create_vectorized_*_stmts (). */
5768 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5769 bool widen_or_narrow_float_p
5770 = cvt_type
&& (modifier
== WIDEN
|| modifier
== NARROW_SRC
);
5771 vec_dest
= vect_create_destination_var (scalar_dest
,
5772 widen_or_narrow_float_p
5773 ? cvt_type
: vectype_out
);
5774 vec_dsts
.quick_push (vec_dest
);
5778 for (i
= interm_types
.length () - 1;
5779 interm_types
.iterate (i
, &intermediate_type
); i
--)
5781 vec_dest
= vect_create_destination_var (scalar_dest
,
5783 vec_dsts
.quick_push (vec_dest
);
5788 vec_dest
= vect_create_destination_var (scalar_dest
,
5789 widen_or_narrow_float_p
5790 ? vectype_out
: cvt_type
);
5795 if (modifier
== WIDEN
)
5797 else if (modifier
== NARROW_SRC
|| modifier
== NARROW_DST
)
5800 ninputs
= vect_pow2 (multi_step_cvt
);
5808 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5810 /* vec_dest is intermediate type operand when multi_step_cvt. */
5814 vec_dest
= vec_dsts
[0];
5817 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5819 /* Arguments are ready, create the new vector stmt. */
5823 gcc_assert (multi_step_cvt
== 1);
5824 new_stmt
= vect_gimple_build (cvt_op
, codecvt1
, vop0
);
5825 new_temp
= make_ssa_name (cvt_op
, new_stmt
);
5826 gimple_assign_set_lhs (new_stmt
, new_temp
);
5827 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5830 new_stmt
= vect_gimple_build (vec_dest
, code1
, vop0
);
5831 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5832 gimple_set_lhs (new_stmt
, new_temp
);
5833 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5836 slp_node
->push_vec_def (new_stmt
);
5838 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5843 /* In case the vectorization factor (VF) is bigger than the number
5844 of elements that we can fit in a vectype (nunits), we have to
5845 generate more than one vector stmt - i.e - we need to "unroll"
5846 the vector stmt by a factor VF/nunits. */
5847 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5849 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
5851 if (code
== WIDEN_LSHIFT_EXPR
)
5853 int oprnds_size
= vec_oprnds0
.length ();
5854 vec_oprnds1
.create (oprnds_size
);
5855 for (i
= 0; i
< oprnds_size
; ++i
)
5856 vec_oprnds1
.quick_push (op1
);
5858 /* Arguments are ready. Create the new vector stmts. */
5859 for (i
= multi_step_cvt
; i
>= 0; i
--)
5861 tree this_dest
= vec_dsts
[i
];
5862 code_helper c1
= code1
, c2
= code2
;
5863 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5868 if (known_eq (nunits_out
, nunits_in
))
5869 vect_create_half_widening_stmts (vinfo
, &vec_oprnds0
, &vec_oprnds1
,
5870 stmt_info
, this_dest
, gsi
, c1
,
5873 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5874 &vec_oprnds1
, stmt_info
,
5879 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5884 new_temp
= make_ssa_name (vec_dest
);
5885 new_stmt
= vect_gimple_build (new_temp
, codecvt1
, vop0
);
5886 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5889 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
5892 slp_node
->push_vec_def (new_stmt
);
5894 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5900 /* In case the vectorization factor (VF) is bigger than the number
5901 of elements that we can fit in a vectype (nunits), we have to
5902 generate more than one vector stmt - i.e - we need to "unroll"
5903 the vector stmt by a factor VF/nunits. */
5904 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5906 /* Arguments are ready. Create the new vector stmts. */
5907 if (cvt_type
&& modifier
== NARROW_DST
)
5908 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5910 new_temp
= make_ssa_name (vec_dest
);
5911 gimple
*new_stmt
= vect_gimple_build (new_temp
, codecvt1
, vop0
);
5912 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5913 vec_oprnds0
[i
] = new_temp
;
5916 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5918 stmt_info
, vec_dsts
, gsi
,
5920 modifier
== NARROW_SRC
);
5921 /* After demoting op0 to cvt_type, convert it to dest. */
5922 if (cvt_type
&& code
== FLOAT_EXPR
)
5924 for (unsigned int i
= 0; i
!= vec_oprnds0
.length() / 2; i
++)
5926 /* Arguments are ready, create the new vector stmt. */
5927 gcc_assert (TREE_CODE_LENGTH ((tree_code
) codecvt1
) == unary_op
);
5929 = vect_gimple_build (vec_dest
, codecvt1
, vec_oprnds0
[i
]);
5930 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5931 gimple_set_lhs (new_stmt
, new_temp
);
5932 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5934 /* This is the last step of the conversion sequence. Store the
5935 vectors in SLP_NODE or in vector info of the scalar statement
5936 (or in STMT_VINFO_RELATED_STMT chain). */
5938 slp_node
->push_vec_def (new_stmt
);
5940 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5946 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5948 vec_oprnds0
.release ();
5949 vec_oprnds1
.release ();
5950 interm_types
.release ();
5955 /* Return true if we can assume from the scalar form of STMT_INFO that
5956 neither the scalar nor the vector forms will generate code. STMT_INFO
5957 is known not to involve a data reference. */
5960 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5962 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5966 tree lhs
= gimple_assign_lhs (stmt
);
5967 tree_code code
= gimple_assign_rhs_code (stmt
);
5968 tree rhs
= gimple_assign_rhs1 (stmt
);
5970 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5973 if (CONVERT_EXPR_CODE_P (code
))
5974 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5979 /* Function vectorizable_assignment.
5981 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5982 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5983 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5984 Return true if STMT_INFO is vectorizable in this way. */
5987 vectorizable_assignment (vec_info
*vinfo
,
5988 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5989 gimple
**vec_stmt
, slp_tree slp_node
,
5990 stmt_vector_for_cost
*cost_vec
)
5995 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5997 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
6001 vec
<tree
> vec_oprnds
= vNULL
;
6003 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6004 enum tree_code code
;
6007 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6010 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6014 /* Is vectorizable assignment? */
6015 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6019 scalar_dest
= gimple_assign_lhs (stmt
);
6020 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6023 if (STMT_VINFO_DATA_REF (stmt_info
))
6026 code
= gimple_assign_rhs_code (stmt
);
6027 if (!(gimple_assign_single_p (stmt
)
6028 || code
== PAREN_EXPR
6029 || CONVERT_EXPR_CODE_P (code
)))
6032 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6033 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6035 /* Multiple types in SLP are handled by creating the appropriate number of
6036 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6041 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6043 gcc_assert (ncopies
>= 1);
6046 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
6047 &dt
[0], &vectype_in
))
6049 if (dump_enabled_p ())
6050 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6051 "use not simple.\n");
6055 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
6057 /* We can handle NOP_EXPR conversions that do not change the number
6058 of elements or the vector size. */
6059 if ((CONVERT_EXPR_CODE_P (code
)
6060 || code
== VIEW_CONVERT_EXPR
)
6062 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
6063 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
6064 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
6067 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
6069 if (dump_enabled_p ())
6070 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6071 "can't convert between boolean and non "
6072 "boolean vectors %T\n", TREE_TYPE (op
));
6077 /* We do not handle bit-precision changes. */
6078 if ((CONVERT_EXPR_CODE_P (code
)
6079 || code
== VIEW_CONVERT_EXPR
)
6080 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
6081 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
6082 || (INTEGRAL_TYPE_P (TREE_TYPE (op
))
6083 && !type_has_mode_precision_p (TREE_TYPE (op
))))
6084 /* But a conversion that does not change the bit-pattern is ok. */
6085 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
6086 && INTEGRAL_TYPE_P (TREE_TYPE (op
))
6087 && (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
6088 > TYPE_PRECISION (TREE_TYPE (op
)))
6089 && TYPE_UNSIGNED (TREE_TYPE (op
))))
6091 if (dump_enabled_p ())
6092 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6093 "type conversion to/from bit-precision "
6098 if (!vec_stmt
) /* transformation not required. */
6101 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
6103 if (dump_enabled_p ())
6104 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6105 "incompatible vector types for invariants\n");
6108 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
6109 DUMP_VECT_SCOPE ("vectorizable_assignment");
6110 if (!vect_nop_conversion_p (stmt_info
))
6111 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
6117 if (dump_enabled_p ())
6118 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
6121 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6124 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
6126 /* Arguments are ready. create the new vector stmt. */
6127 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
6129 if (CONVERT_EXPR_CODE_P (code
)
6130 || code
== VIEW_CONVERT_EXPR
)
6131 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
6132 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
6133 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6134 gimple_assign_set_lhs (new_stmt
, new_temp
);
6135 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6137 slp_node
->push_vec_def (new_stmt
);
6139 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6142 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6144 vec_oprnds
.release ();
6149 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6150 either as shift by a scalar or by a vector. */
6153 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
6156 machine_mode vec_mode
;
6161 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
6165 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
6167 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
6169 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
6171 || (optab_handler (optab
, TYPE_MODE (vectype
))
6172 == CODE_FOR_nothing
))
6176 vec_mode
= TYPE_MODE (vectype
);
6177 icode
= (int) optab_handler (optab
, vec_mode
);
6178 if (icode
== CODE_FOR_nothing
)
6185 /* Function vectorizable_shift.
6187 Check if STMT_INFO performs a shift operation that can be vectorized.
6188 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6189 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6190 Return true if STMT_INFO is vectorizable in this way. */
6193 vectorizable_shift (vec_info
*vinfo
,
6194 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6195 gimple
**vec_stmt
, slp_tree slp_node
,
6196 stmt_vector_for_cost
*cost_vec
)
6200 tree op0
, op1
= NULL
;
6201 tree vec_oprnd1
= NULL_TREE
;
6203 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6204 enum tree_code code
;
6205 machine_mode vec_mode
;
6209 machine_mode optab_op2_mode
;
6210 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
6212 poly_uint64 nunits_in
;
6213 poly_uint64 nunits_out
;
6218 vec
<tree
> vec_oprnds0
= vNULL
;
6219 vec
<tree
> vec_oprnds1
= vNULL
;
6222 bool scalar_shift_arg
= true;
6223 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6224 bool incompatible_op1_vectype_p
= false;
6226 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6229 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6230 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
6234 /* Is STMT a vectorizable binary/unary operation? */
6235 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6239 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6242 code
= gimple_assign_rhs_code (stmt
);
6244 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
6245 || code
== RROTATE_EXPR
))
6248 scalar_dest
= gimple_assign_lhs (stmt
);
6249 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6250 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
6252 if (dump_enabled_p ())
6253 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6254 "bit-precision shifts not supported.\n");
6259 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6260 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6262 if (dump_enabled_p ())
6263 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6264 "use not simple.\n");
6267 /* If op0 is an external or constant def, infer the vector type
6268 from the scalar type. */
6270 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
6272 gcc_assert (vectype
);
6275 if (dump_enabled_p ())
6276 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6277 "no vectype for scalar type\n");
6281 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6282 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6283 if (maybe_ne (nunits_out
, nunits_in
))
6286 stmt_vec_info op1_def_stmt_info
;
6288 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
6289 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
6291 if (dump_enabled_p ())
6292 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6293 "use not simple.\n");
6297 /* Multiple types in SLP are handled by creating the appropriate number of
6298 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6303 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6305 gcc_assert (ncopies
>= 1);
6307 /* Determine whether the shift amount is a vector, or scalar. If the
6308 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6310 if ((dt
[1] == vect_internal_def
6311 || dt
[1] == vect_induction_def
6312 || dt
[1] == vect_nested_cycle
)
6314 scalar_shift_arg
= false;
6315 else if (dt
[1] == vect_constant_def
6316 || dt
[1] == vect_external_def
6317 || dt
[1] == vect_internal_def
)
6319 /* In SLP, need to check whether the shift count is the same,
6320 in loops if it is a constant or invariant, it is always
6324 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
6325 stmt_vec_info slpstmt_info
;
6327 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
6329 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
6330 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
6331 scalar_shift_arg
= false;
6334 /* For internal SLP defs we have to make sure we see scalar stmts
6335 for all vector elements.
6336 ??? For different vectors we could resort to a different
6337 scalar shift operand but code-generation below simply always
6339 if (dt
[1] == vect_internal_def
6340 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
6342 scalar_shift_arg
= false;
6345 /* If the shift amount is computed by a pattern stmt we cannot
6346 use the scalar amount directly thus give up and use a vector
6348 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
6349 scalar_shift_arg
= false;
6353 if (dump_enabled_p ())
6354 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6355 "operand mode requires invariant argument.\n");
6359 /* Vector shifted by vector. */
6360 bool was_scalar_shift_arg
= scalar_shift_arg
;
6361 if (!scalar_shift_arg
)
6363 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
6364 if (dump_enabled_p ())
6365 dump_printf_loc (MSG_NOTE
, vect_location
,
6366 "vector/vector shift/rotate found.\n");
6369 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
6371 incompatible_op1_vectype_p
6372 = (op1_vectype
== NULL_TREE
6373 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
6374 TYPE_VECTOR_SUBPARTS (vectype
))
6375 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
6376 if (incompatible_op1_vectype_p
6378 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
6379 || slp_op1
->refcnt
!= 1))
6381 if (dump_enabled_p ())
6382 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6383 "unusable type for last operand in"
6384 " vector/vector shift/rotate.\n");
6388 /* See if the machine has a vector shifted by scalar insn and if not
6389 then see if it has a vector shifted by vector insn. */
6392 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
6394 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
6396 if (dump_enabled_p ())
6397 dump_printf_loc (MSG_NOTE
, vect_location
,
6398 "vector/scalar shift/rotate found.\n");
6402 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
6404 && (optab_handler (optab
, TYPE_MODE (vectype
))
6405 != CODE_FOR_nothing
))
6407 scalar_shift_arg
= false;
6409 if (dump_enabled_p ())
6410 dump_printf_loc (MSG_NOTE
, vect_location
,
6411 "vector/vector shift/rotate found.\n");
6414 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
6418 /* Unlike the other binary operators, shifts/rotates have
6419 the rhs being int, instead of the same type as the lhs,
6420 so make sure the scalar is the right type if we are
6421 dealing with vectors of long long/long/short/char. */
6422 incompatible_op1_vectype_p
6424 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
6426 if (incompatible_op1_vectype_p
6427 && dt
[1] == vect_internal_def
)
6429 if (dump_enabled_p ())
6430 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6431 "unusable type for last operand in"
6432 " vector/vector shift/rotate.\n");
6439 /* Supportable by target? */
6442 if (dump_enabled_p ())
6443 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6447 vec_mode
= TYPE_MODE (vectype
);
6448 icode
= (int) optab_handler (optab
, vec_mode
);
6449 if (icode
== CODE_FOR_nothing
)
6451 if (dump_enabled_p ())
6452 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6453 "op not supported by target.\n");
6456 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6457 if (vect_emulated_vector_p (vectype
))
6460 if (!vec_stmt
) /* transformation not required. */
6463 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6464 || ((!scalar_shift_arg
|| dt
[1] == vect_internal_def
)
6465 && (!incompatible_op1_vectype_p
6466 || dt
[1] == vect_constant_def
)
6467 && !vect_maybe_update_slp_op_vectype
6469 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
6471 if (dump_enabled_p ())
6472 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6473 "incompatible vector types for invariants\n");
6476 /* Now adjust the constant shift amount in place. */
6478 && incompatible_op1_vectype_p
6479 && dt
[1] == vect_constant_def
)
6481 for (unsigned i
= 0;
6482 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
6484 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
6485 = fold_convert (TREE_TYPE (vectype
),
6486 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
6487 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
6491 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
6492 DUMP_VECT_SCOPE ("vectorizable_shift");
6493 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
6494 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
6500 if (dump_enabled_p ())
6501 dump_printf_loc (MSG_NOTE
, vect_location
,
6502 "transform binary/unary operation.\n");
6504 if (incompatible_op1_vectype_p
&& !slp_node
)
6506 gcc_assert (!scalar_shift_arg
&& was_scalar_shift_arg
);
6507 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
6508 if (dt
[1] != vect_constant_def
)
6509 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
6510 TREE_TYPE (vectype
), NULL
);
6514 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6516 if (scalar_shift_arg
&& dt
[1] != vect_internal_def
)
6518 /* Vector shl and shr insn patterns can be defined with scalar
6519 operand 2 (shift operand). In this case, use constant or loop
6520 invariant op1 directly, without extending it to vector mode
6522 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
6523 if (!VECTOR_MODE_P (optab_op2_mode
))
6525 if (dump_enabled_p ())
6526 dump_printf_loc (MSG_NOTE
, vect_location
,
6527 "operand 1 using scalar mode.\n");
6529 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
6530 vec_oprnds1
.quick_push (vec_oprnd1
);
6531 /* Store vec_oprnd1 for every vector stmt to be created.
6532 We check during the analysis that all the shift arguments
6534 TODO: Allow different constants for different vector
6535 stmts generated for an SLP instance. */
6537 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
6538 vec_oprnds1
.quick_push (vec_oprnd1
);
6541 else if (!scalar_shift_arg
&& slp_node
&& incompatible_op1_vectype_p
)
6543 if (was_scalar_shift_arg
)
6545 /* If the argument was the same in all lanes create
6546 the correctly typed vector shift amount directly. */
6547 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
6548 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
6549 !loop_vinfo
? gsi
: NULL
);
6550 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
6551 !loop_vinfo
? gsi
: NULL
);
6552 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
6553 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
6554 vec_oprnds1
.quick_push (vec_oprnd1
);
6556 else if (dt
[1] == vect_constant_def
)
6557 /* The constant shift amount has been adjusted in place. */
6560 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
6563 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6564 (a special case for certain kind of vector shifts); otherwise,
6565 operand 1 should be of a vector type (the usual case). */
6566 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6568 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
6570 /* Arguments are ready. Create the new vector stmt. */
6571 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6573 /* For internal defs where we need to use a scalar shift arg
6574 extract the first lane. */
6575 if (scalar_shift_arg
&& dt
[1] == vect_internal_def
)
6577 vop1
= vec_oprnds1
[0];
6578 new_temp
= make_ssa_name (TREE_TYPE (TREE_TYPE (vop1
)));
6580 = gimple_build_assign (new_temp
,
6581 build3 (BIT_FIELD_REF
, TREE_TYPE (new_temp
),
6583 TYPE_SIZE (TREE_TYPE (new_temp
)),
6584 bitsize_zero_node
));
6585 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6589 vop1
= vec_oprnds1
[i
];
6590 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
6591 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6592 gimple_assign_set_lhs (new_stmt
, new_temp
);
6593 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6595 slp_node
->push_vec_def (new_stmt
);
6597 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6601 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6603 vec_oprnds0
.release ();
6604 vec_oprnds1
.release ();
6609 /* Function vectorizable_operation.
6611 Check if STMT_INFO performs a binary, unary or ternary operation that can
6613 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6614 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6615 Return true if STMT_INFO is vectorizable in this way. */
6618 vectorizable_operation (vec_info
*vinfo
,
6619 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6620 gimple
**vec_stmt
, slp_tree slp_node
,
6621 stmt_vector_for_cost
*cost_vec
)
6625 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
6627 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6628 enum tree_code code
, orig_code
;
6629 machine_mode vec_mode
;
6633 bool target_support_p
;
6634 enum vect_def_type dt
[3]
6635 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
6637 poly_uint64 nunits_in
;
6638 poly_uint64 nunits_out
;
6640 int ncopies
, vec_num
;
6642 vec
<tree
> vec_oprnds0
= vNULL
;
6643 vec
<tree
> vec_oprnds1
= vNULL
;
6644 vec
<tree
> vec_oprnds2
= vNULL
;
6645 tree vop0
, vop1
, vop2
;
6646 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6648 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6651 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6655 /* Is STMT a vectorizable binary/unary operation? */
6656 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6660 /* Loads and stores are handled in vectorizable_{load,store}. */
6661 if (STMT_VINFO_DATA_REF (stmt_info
))
6664 orig_code
= code
= gimple_assign_rhs_code (stmt
);
6666 /* Shifts are handled in vectorizable_shift. */
6667 if (code
== LSHIFT_EXPR
6668 || code
== RSHIFT_EXPR
6669 || code
== LROTATE_EXPR
6670 || code
== RROTATE_EXPR
)
6673 /* Comparisons are handled in vectorizable_comparison. */
6674 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
6677 /* Conditions are handled in vectorizable_condition. */
6678 if (code
== COND_EXPR
)
6681 /* For pointer addition and subtraction, we should use the normal
6682 plus and minus for the vector operation. */
6683 if (code
== POINTER_PLUS_EXPR
)
6685 if (code
== POINTER_DIFF_EXPR
)
6688 /* Support only unary or binary operations. */
6689 op_type
= TREE_CODE_LENGTH (code
);
6690 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
6692 if (dump_enabled_p ())
6693 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6694 "num. args = %d (not unary/binary/ternary op).\n",
6699 scalar_dest
= gimple_assign_lhs (stmt
);
6700 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6702 /* Most operations cannot handle bit-precision types without extra
6704 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
6706 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
6707 /* Exception are bitwise binary operations. */
6708 && code
!= BIT_IOR_EXPR
6709 && code
!= BIT_XOR_EXPR
6710 && code
!= BIT_AND_EXPR
)
6712 if (dump_enabled_p ())
6713 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6714 "bit-precision arithmetic not supported.\n");
6719 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6720 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6722 if (dump_enabled_p ())
6723 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6724 "use not simple.\n");
6727 bool is_invariant
= (dt
[0] == vect_external_def
6728 || dt
[0] == vect_constant_def
);
6729 /* If op0 is an external or constant def, infer the vector type
6730 from the scalar type. */
6733 /* For boolean type we cannot determine vectype by
6734 invariant value (don't know whether it is a vector
6735 of booleans or vector of integers). We use output
6736 vectype because operations on boolean don't change
6738 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6740 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6742 if (dump_enabled_p ())
6743 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6744 "not supported operation on bool value.\n");
6747 vectype
= vectype_out
;
6750 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6754 gcc_assert (vectype
);
6757 if (dump_enabled_p ())
6758 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6759 "no vectype for scalar type %T\n",
6765 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6766 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6767 if (maybe_ne (nunits_out
, nunits_in
))
6770 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6771 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6772 if (op_type
== binary_op
|| op_type
== ternary_op
)
6774 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6775 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6777 if (dump_enabled_p ())
6778 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6779 "use not simple.\n");
6782 is_invariant
&= (dt
[1] == vect_external_def
6783 || dt
[1] == vect_constant_def
);
6785 && maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype2
)))
6788 if (op_type
== ternary_op
)
6790 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6791 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6793 if (dump_enabled_p ())
6794 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6795 "use not simple.\n");
6798 is_invariant
&= (dt
[2] == vect_external_def
6799 || dt
[2] == vect_constant_def
);
6801 && maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype3
)))
6805 /* Multiple types in SLP are handled by creating the appropriate number of
6806 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6811 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6815 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6819 gcc_assert (ncopies
>= 1);
6821 /* Reject attempts to combine mask types with nonmask types, e.g. if
6822 we have an AND between a (nonmask) boolean loaded from memory and
6823 a (mask) boolean result of a comparison.
6825 TODO: We could easily fix these cases up using pattern statements. */
6826 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6827 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6828 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6830 if (dump_enabled_p ())
6831 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6832 "mixed mask and nonmask vector types\n");
6836 /* Supportable by target? */
6838 vec_mode
= TYPE_MODE (vectype
);
6839 if (code
== MULT_HIGHPART_EXPR
)
6840 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6843 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6846 if (dump_enabled_p ())
6847 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6851 target_support_p
= (optab_handler (optab
, vec_mode
) != CODE_FOR_nothing
6852 || optab_libfunc (optab
, vec_mode
));
6855 bool using_emulated_vectors_p
= vect_emulated_vector_p (vectype
);
6856 if (!target_support_p
|| using_emulated_vectors_p
)
6858 if (dump_enabled_p ())
6859 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6860 "op not supported by target.\n");
6861 /* When vec_mode is not a vector mode and we verified ops we
6862 do not have to lower like AND are natively supported let
6863 those through even when the mode isn't word_mode. For
6864 ops we have to lower the lowering code assumes we are
6865 dealing with word_mode. */
6866 if ((((code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== NEGATE_EXPR
)
6867 || !target_support_p
)
6868 && maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
))
6869 /* Check only during analysis. */
6870 || (!vec_stmt
&& !vect_can_vectorize_without_simd_p (code
)))
6872 if (dump_enabled_p ())
6873 dump_printf (MSG_NOTE
, "using word mode not possible.\n");
6876 if (dump_enabled_p ())
6877 dump_printf_loc (MSG_NOTE
, vect_location
,
6878 "proceeding using word mode.\n");
6879 using_emulated_vectors_p
= true;
6882 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6883 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6884 vec_loop_lens
*lens
= (loop_vinfo
? &LOOP_VINFO_LENS (loop_vinfo
) : NULL
);
6885 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6886 internal_fn cond_len_fn
= get_conditional_len_internal_fn (code
);
6888 /* If operating on inactive elements could generate spurious traps,
6889 we need to restrict the operation to active lanes. Note that this
6890 specifically doesn't apply to unhoisted invariants, since they
6891 operate on the same value for every lane.
6893 Similarly, if this operation is part of a reduction, a fully-masked
6894 loop should only change the active lanes of the reduction chain,
6895 keeping the inactive lanes as-is. */
6896 bool mask_out_inactive
= ((!is_invariant
&& gimple_could_trap_p (stmt
))
6899 if (!vec_stmt
) /* transformation not required. */
6902 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
6903 && mask_out_inactive
)
6905 if (cond_len_fn
!= IFN_LAST
6906 && direct_internal_fn_supported_p (cond_len_fn
, vectype
,
6907 OPTIMIZE_FOR_SPEED
))
6908 vect_record_loop_len (loop_vinfo
, lens
, ncopies
* vec_num
, vectype
,
6910 else if (cond_fn
!= IFN_LAST
6911 && direct_internal_fn_supported_p (cond_fn
, vectype
,
6912 OPTIMIZE_FOR_SPEED
))
6913 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6917 if (dump_enabled_p ())
6918 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6919 "can't use a fully-masked loop because no"
6920 " conditional operation is available.\n");
6921 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
6925 /* Put types on constant and invariant SLP children. */
6927 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6928 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6929 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6931 if (dump_enabled_p ())
6932 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6933 "incompatible vector types for invariants\n");
6937 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6938 DUMP_VECT_SCOPE ("vectorizable_operation");
6939 vect_model_simple_cost (vinfo
, stmt_info
,
6940 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6941 if (using_emulated_vectors_p
)
6943 /* The above vect_model_simple_cost call handles constants
6944 in the prologue and (mis-)costs one of the stmts as
6945 vector stmt. See below for the actual lowering that will
6948 = slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
;
6961 /* Bit operations do not have extra cost and are accounted
6962 as vector stmt by vect_model_simple_cost. */
6968 /* We also need to materialize two large constants. */
6969 record_stmt_cost (cost_vec
, 2, scalar_stmt
, stmt_info
,
6971 record_stmt_cost (cost_vec
, n
, scalar_stmt
, stmt_info
,
6980 if (dump_enabled_p ())
6981 dump_printf_loc (MSG_NOTE
, vect_location
,
6982 "transform binary/unary operation.\n");
6984 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6985 bool len_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
);
6987 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6988 vectors with unsigned elements, but the result is signed. So, we
6989 need to compute the MINUS_EXPR into vectype temporary and
6990 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6991 tree vec_cvt_dest
= NULL_TREE
;
6992 if (orig_code
== POINTER_DIFF_EXPR
)
6994 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6995 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6999 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
7001 /* In case the vectorization factor (VF) is bigger than the number
7002 of elements that we can fit in a vectype (nunits), we have to generate
7003 more than one vector stmt - i.e - we need to "unroll" the
7004 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7005 from one copy of the vector stmt to the next, in the field
7006 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7007 stages to find the correct vector defs to be used when vectorizing
7008 stmts that use the defs of the current stmt. The example below
7009 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
7010 we need to create 4 vectorized stmts):
7012 before vectorization:
7013 RELATED_STMT VEC_STMT
7017 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
7019 RELATED_STMT VEC_STMT
7020 VS1_0: vx0 = memref0 VS1_1 -
7021 VS1_1: vx1 = memref1 VS1_2 -
7022 VS1_2: vx2 = memref2 VS1_3 -
7023 VS1_3: vx3 = memref3 - -
7024 S1: x = load - VS1_0
7027 step2: vectorize stmt S2 (done here):
7028 To vectorize stmt S2 we first need to find the relevant vector
7029 def for the first operand 'x'. This is, as usual, obtained from
7030 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
7031 that defines 'x' (S1). This way we find the stmt VS1_0, and the
7032 relevant vector def 'vx0'. Having found 'vx0' we can generate
7033 the vector stmt VS2_0, and as usual, record it in the
7034 STMT_VINFO_VEC_STMT of stmt S2.
7035 When creating the second copy (VS2_1), we obtain the relevant vector
7036 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
7037 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
7038 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
7039 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
7040 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
7041 chain of stmts and pointers:
7042 RELATED_STMT VEC_STMT
7043 VS1_0: vx0 = memref0 VS1_1 -
7044 VS1_1: vx1 = memref1 VS1_2 -
7045 VS1_2: vx2 = memref2 VS1_3 -
7046 VS1_3: vx3 = memref3 - -
7047 S1: x = load - VS1_0
7048 VS2_0: vz0 = vx0 + v1 VS2_1 -
7049 VS2_1: vz1 = vx1 + v1 VS2_2 -
7050 VS2_2: vz2 = vx2 + v1 VS2_3 -
7051 VS2_3: vz3 = vx3 + v1 - -
7052 S2: z = x + 1 - VS2_0 */
7054 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
7055 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
7056 /* Arguments are ready. Create the new vector stmt. */
7057 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
7059 gimple
*new_stmt
= NULL
;
7060 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
7061 ? vec_oprnds1
[i
] : NULL_TREE
);
7062 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
7063 if (using_emulated_vectors_p
7064 && (code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== NEGATE_EXPR
))
7066 /* Lower the operation. This follows vector lowering. */
7067 unsigned int width
= vector_element_bits (vectype
);
7068 tree inner_type
= TREE_TYPE (vectype
);
7070 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode
), 1);
7071 HOST_WIDE_INT max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
7072 tree low_bits
= build_replicated_int_cst (word_type
, width
, max
>> 1);
7074 = build_replicated_int_cst (word_type
, width
, max
& ~(max
>> 1));
7075 tree wvop0
= make_ssa_name (word_type
);
7076 new_stmt
= gimple_build_assign (wvop0
, VIEW_CONVERT_EXPR
,
7077 build1 (VIEW_CONVERT_EXPR
,
7079 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7080 tree result_low
, signs
;
7081 if (code
== PLUS_EXPR
|| code
== MINUS_EXPR
)
7083 tree wvop1
= make_ssa_name (word_type
);
7084 new_stmt
= gimple_build_assign (wvop1
, VIEW_CONVERT_EXPR
,
7085 build1 (VIEW_CONVERT_EXPR
,
7087 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7088 signs
= make_ssa_name (word_type
);
7089 new_stmt
= gimple_build_assign (signs
,
7090 BIT_XOR_EXPR
, wvop0
, wvop1
);
7091 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7092 tree b_low
= make_ssa_name (word_type
);
7093 new_stmt
= gimple_build_assign (b_low
,
7094 BIT_AND_EXPR
, wvop1
, low_bits
);
7095 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7096 tree a_low
= make_ssa_name (word_type
);
7097 if (code
== PLUS_EXPR
)
7098 new_stmt
= gimple_build_assign (a_low
,
7099 BIT_AND_EXPR
, wvop0
, low_bits
);
7101 new_stmt
= gimple_build_assign (a_low
,
7102 BIT_IOR_EXPR
, wvop0
, high_bits
);
7103 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7104 if (code
== MINUS_EXPR
)
7106 new_stmt
= gimple_build_assign (NULL_TREE
,
7107 BIT_NOT_EXPR
, signs
);
7108 signs
= make_ssa_name (word_type
);
7109 gimple_assign_set_lhs (new_stmt
, signs
);
7110 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7112 new_stmt
= gimple_build_assign (NULL_TREE
,
7113 BIT_AND_EXPR
, signs
, high_bits
);
7114 signs
= make_ssa_name (word_type
);
7115 gimple_assign_set_lhs (new_stmt
, signs
);
7116 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7117 result_low
= make_ssa_name (word_type
);
7118 new_stmt
= gimple_build_assign (result_low
, code
, a_low
, b_low
);
7119 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7123 tree a_low
= make_ssa_name (word_type
);
7124 new_stmt
= gimple_build_assign (a_low
,
7125 BIT_AND_EXPR
, wvop0
, low_bits
);
7126 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7127 signs
= make_ssa_name (word_type
);
7128 new_stmt
= gimple_build_assign (signs
, BIT_NOT_EXPR
, wvop0
);
7129 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7130 new_stmt
= gimple_build_assign (NULL_TREE
,
7131 BIT_AND_EXPR
, signs
, high_bits
);
7132 signs
= make_ssa_name (word_type
);
7133 gimple_assign_set_lhs (new_stmt
, signs
);
7134 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7135 result_low
= make_ssa_name (word_type
);
7136 new_stmt
= gimple_build_assign (result_low
,
7137 MINUS_EXPR
, high_bits
, a_low
);
7138 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7140 new_stmt
= gimple_build_assign (NULL_TREE
, BIT_XOR_EXPR
, result_low
,
7142 result_low
= make_ssa_name (word_type
);
7143 gimple_assign_set_lhs (new_stmt
, result_low
);
7144 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7145 new_stmt
= gimple_build_assign (NULL_TREE
, VIEW_CONVERT_EXPR
,
7146 build1 (VIEW_CONVERT_EXPR
,
7147 vectype
, result_low
));
7148 new_temp
= make_ssa_name (vectype
);
7149 gimple_assign_set_lhs (new_stmt
, new_temp
);
7150 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7152 else if ((masked_loop_p
|| len_loop_p
) && mask_out_inactive
)
7156 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
,
7157 vec_num
* ncopies
, vectype
, i
);
7160 mask
= build_minus_one_cst (truth_type_for (vectype
));
7161 auto_vec
<tree
> vops (6);
7162 vops
.quick_push (mask
);
7163 vops
.quick_push (vop0
);
7165 vops
.quick_push (vop1
);
7167 vops
.quick_push (vop2
);
7170 /* Perform the operation on active elements only and take
7171 inactive elements from the reduction chain input. */
7173 vops
.quick_push (reduc_idx
== 1 ? vop1
: vop0
);
7177 auto else_value
= targetm
.preferred_else_value
7178 (cond_fn
, vectype
, vops
.length () - 1, &vops
[1]);
7179 vops
.quick_push (else_value
);
7183 tree len
= vect_get_loop_len (loop_vinfo
, gsi
, lens
,
7184 vec_num
* ncopies
, vectype
, i
, 1);
7186 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
7187 tree bias
= build_int_cst (intQI_type_node
, biasval
);
7188 vops
.quick_push (len
);
7189 vops
.quick_push (bias
);
7192 = gimple_build_call_internal_vec (masked_loop_p
? cond_fn
7195 new_temp
= make_ssa_name (vec_dest
, call
);
7196 gimple_call_set_lhs (call
, new_temp
);
7197 gimple_call_set_nothrow (call
, true);
7198 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
7203 tree mask
= NULL_TREE
;
7204 /* When combining two masks check if either of them is elsewhere
7205 combined with a loop mask, if that's the case we can mark that the
7206 new combined mask doesn't need to be combined with a loop mask. */
7208 && code
== BIT_AND_EXPR
7209 && VECTOR_BOOLEAN_TYPE_P (vectype
))
7211 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op0
,
7214 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
,
7215 vec_num
* ncopies
, vectype
, i
);
7217 vop0
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
7221 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op1
,
7224 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
,
7225 vec_num
* ncopies
, vectype
, i
);
7227 vop1
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
7232 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
7233 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7234 gimple_assign_set_lhs (new_stmt
, new_temp
);
7235 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7236 if (using_emulated_vectors_p
)
7237 suppress_warning (new_stmt
, OPT_Wvector_operation_performance
);
7239 /* Enter the combined value into the vector cond hash so we don't
7240 AND it with a loop mask again. */
7242 loop_vinfo
->vec_cond_masked_set
.add ({ new_temp
, mask
});
7247 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
7248 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
7250 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
7251 gimple_assign_set_lhs (new_stmt
, new_temp
);
7252 vect_finish_stmt_generation (vinfo
, stmt_info
,
7257 slp_node
->push_vec_def (new_stmt
);
7259 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
7263 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7265 vec_oprnds0
.release ();
7266 vec_oprnds1
.release ();
7267 vec_oprnds2
.release ();
7272 /* A helper function to ensure data reference DR_INFO's base alignment. */
7275 ensure_base_align (dr_vec_info
*dr_info
)
7277 /* Alignment is only analyzed for the first element of a DR group,
7278 use that to look at base alignment we need to enforce. */
7279 if (STMT_VINFO_GROUPED_ACCESS (dr_info
->stmt
))
7280 dr_info
= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info
->stmt
));
7282 gcc_assert (dr_info
->misalignment
!= DR_MISALIGNMENT_UNINITIALIZED
);
7284 if (dr_info
->base_misaligned
)
7286 tree base_decl
= dr_info
->base_decl
;
7288 // We should only be able to increase the alignment of a base object if
7289 // we know what its new alignment should be at compile time.
7290 unsigned HOST_WIDE_INT align_base_to
=
7291 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
7293 if (decl_in_symtab_p (base_decl
))
7294 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
7295 else if (DECL_ALIGN (base_decl
) < align_base_to
)
7297 SET_DECL_ALIGN (base_decl
, align_base_to
);
7298 DECL_USER_ALIGN (base_decl
) = 1;
7300 dr_info
->base_misaligned
= false;
7305 /* Function get_group_alias_ptr_type.
7307 Return the alias type for the group starting at FIRST_STMT_INFO. */
7310 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
7312 struct data_reference
*first_dr
, *next_dr
;
7314 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
7315 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
7316 while (next_stmt_info
)
7318 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
7319 if (get_alias_set (DR_REF (first_dr
))
7320 != get_alias_set (DR_REF (next_dr
)))
7322 if (dump_enabled_p ())
7323 dump_printf_loc (MSG_NOTE
, vect_location
,
7324 "conflicting alias set types.\n");
7325 return ptr_type_node
;
7327 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7329 return reference_alias_ptr_type (DR_REF (first_dr
));
7333 /* Function scan_operand_equal_p.
7335 Helper function for check_scan_store. Compare two references
7336 with .GOMP_SIMD_LANE bases. */
7339 scan_operand_equal_p (tree ref1
, tree ref2
)
7341 tree ref
[2] = { ref1
, ref2
};
7342 poly_int64 bitsize
[2], bitpos
[2];
7343 tree offset
[2], base
[2];
7344 for (int i
= 0; i
< 2; ++i
)
7347 int unsignedp
, reversep
, volatilep
= 0;
7348 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
7349 &offset
[i
], &mode
, &unsignedp
,
7350 &reversep
, &volatilep
);
7351 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
7353 if (TREE_CODE (base
[i
]) == MEM_REF
7354 && offset
[i
] == NULL_TREE
7355 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
7357 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
7358 if (is_gimple_assign (def_stmt
)
7359 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
7360 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
7361 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
7363 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
7365 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
7366 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
7371 if (!operand_equal_p (base
[0], base
[1], 0))
7373 if (maybe_ne (bitsize
[0], bitsize
[1]))
7375 if (offset
[0] != offset
[1])
7377 if (!offset
[0] || !offset
[1])
7379 if (!operand_equal_p (offset
[0], offset
[1], 0))
7382 for (int i
= 0; i
< 2; ++i
)
7384 step
[i
] = integer_one_node
;
7385 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
7387 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
7388 if (is_gimple_assign (def_stmt
)
7389 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
7390 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
7393 step
[i
] = gimple_assign_rhs2 (def_stmt
);
7394 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
7397 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
7399 step
[i
] = TREE_OPERAND (offset
[i
], 1);
7400 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
7402 tree rhs1
= NULL_TREE
;
7403 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
7405 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
7406 if (gimple_assign_cast_p (def_stmt
))
7407 rhs1
= gimple_assign_rhs1 (def_stmt
);
7409 else if (CONVERT_EXPR_P (offset
[i
]))
7410 rhs1
= TREE_OPERAND (offset
[i
], 0);
7412 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
7413 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
7414 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
7415 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
7418 if (!operand_equal_p (offset
[0], offset
[1], 0)
7419 || !operand_equal_p (step
[0], step
[1], 0))
7427 enum scan_store_kind
{
7428 /* Normal permutation. */
7429 scan_store_kind_perm
,
7431 /* Whole vector left shift permutation with zero init. */
7432 scan_store_kind_lshift_zero
,
7434 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7435 scan_store_kind_lshift_cond
7438 /* Function check_scan_store.
7440 Verify if we can perform the needed permutations or whole vector shifts.
7441 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7442 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7443 to do at each step. */
7446 scan_store_can_perm_p (tree vectype
, tree init
,
7447 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
7449 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7450 unsigned HOST_WIDE_INT nunits
;
7451 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7453 int units_log2
= exact_log2 (nunits
);
7454 if (units_log2
<= 0)
7458 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
7459 for (i
= 0; i
<= units_log2
; ++i
)
7461 unsigned HOST_WIDE_INT j
, k
;
7462 enum scan_store_kind kind
= scan_store_kind_perm
;
7463 vec_perm_builder
sel (nunits
, nunits
, 1);
7464 sel
.quick_grow (nunits
);
7465 if (i
== units_log2
)
7467 for (j
= 0; j
< nunits
; ++j
)
7468 sel
[j
] = nunits
- 1;
7472 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7474 for (k
= 0; j
< nunits
; ++j
, ++k
)
7475 sel
[j
] = nunits
+ k
;
7477 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7478 if (!can_vec_perm_const_p (vec_mode
, vec_mode
, indices
))
7480 if (i
== units_log2
)
7483 if (whole_vector_shift_kind
== scan_store_kind_perm
)
7485 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
7487 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
7488 /* Whole vector shifts shift in zeros, so if init is all zero
7489 constant, there is no need to do anything further. */
7490 if ((TREE_CODE (init
) != INTEGER_CST
7491 && TREE_CODE (init
) != REAL_CST
)
7492 || !initializer_zerop (init
))
7494 tree masktype
= truth_type_for (vectype
);
7495 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
7497 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
7500 kind
= whole_vector_shift_kind
;
7502 if (use_whole_vector
)
7504 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
7505 use_whole_vector
->safe_grow_cleared (i
, true);
7506 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
7507 use_whole_vector
->safe_push (kind
);
7515 /* Function check_scan_store.
7517 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7520 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
7521 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
7522 vect_memory_access_type memory_access_type
)
7524 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7525 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7528 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
7531 || memory_access_type
!= VMAT_CONTIGUOUS
7532 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
7533 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
7534 || loop_vinfo
== NULL
7535 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7536 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7537 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
7538 || !integer_zerop (DR_INIT (dr_info
->dr
))
7539 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
7540 || !alias_sets_conflict_p (get_alias_set (vectype
),
7541 get_alias_set (TREE_TYPE (ref_type
))))
7543 if (dump_enabled_p ())
7544 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7545 "unsupported OpenMP scan store.\n");
7549 /* We need to pattern match code built by OpenMP lowering and simplified
7550 by following optimizations into something we can handle.
7551 #pragma omp simd reduction(inscan,+:r)
7555 #pragma omp scan inclusive (r)
7558 shall have body with:
7559 // Initialization for input phase, store the reduction initializer:
7560 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7561 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7563 // Actual input phase:
7565 r.0_5 = D.2042[_20];
7568 // Initialization for scan phase:
7569 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7575 // Actual scan phase:
7577 r.1_8 = D.2042[_20];
7579 The "omp simd array" variable D.2042 holds the privatized copy used
7580 inside of the loop and D.2043 is another one that holds copies of
7581 the current original list item. The separate GOMP_SIMD_LANE ifn
7582 kinds are there in order to allow optimizing the initializer store
7583 and combiner sequence, e.g. if it is originally some C++ish user
7584 defined reduction, but allow the vectorizer to pattern recognize it
7585 and turn into the appropriate vectorized scan.
7587 For exclusive scan, this is slightly different:
7588 #pragma omp simd reduction(inscan,+:r)
7592 #pragma omp scan exclusive (r)
7595 shall have body with:
7596 // Initialization for input phase, store the reduction initializer:
7597 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7598 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7600 // Actual input phase:
7602 r.0_5 = D.2042[_20];
7605 // Initialization for scan phase:
7606 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7612 // Actual scan phase:
7614 r.1_8 = D.2044[_20];
7617 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
7619 /* Match the D.2042[_21] = 0; store above. Just require that
7620 it is a constant or external definition store. */
7621 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
7624 if (dump_enabled_p ())
7625 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7626 "unsupported OpenMP scan initializer store.\n");
7630 if (! loop_vinfo
->scan_map
)
7631 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
7632 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7633 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
7636 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
7638 /* These stores can be vectorized normally. */
7642 if (rhs_dt
!= vect_internal_def
)
7645 if (dump_enabled_p ())
7646 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7647 "unsupported OpenMP scan combiner pattern.\n");
7651 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7652 tree rhs
= gimple_assign_rhs1 (stmt
);
7653 if (TREE_CODE (rhs
) != SSA_NAME
)
7656 gimple
*other_store_stmt
= NULL
;
7657 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7658 bool inscan_var_store
7659 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7661 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7663 if (!inscan_var_store
)
7665 use_operand_p use_p
;
7666 imm_use_iterator iter
;
7667 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7669 gimple
*use_stmt
= USE_STMT (use_p
);
7670 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7672 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
7673 || !is_gimple_assign (use_stmt
)
7674 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
7676 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
7678 other_store_stmt
= use_stmt
;
7680 if (other_store_stmt
== NULL
)
7682 rhs
= gimple_assign_lhs (other_store_stmt
);
7683 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
7687 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
7689 use_operand_p use_p
;
7690 imm_use_iterator iter
;
7691 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7693 gimple
*use_stmt
= USE_STMT (use_p
);
7694 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7696 if (other_store_stmt
)
7698 other_store_stmt
= use_stmt
;
7704 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7705 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
7706 || !is_gimple_assign (def_stmt
)
7707 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
7710 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7711 /* For pointer addition, we should use the normal plus for the vector
7715 case POINTER_PLUS_EXPR
:
7718 case MULT_HIGHPART_EXPR
:
7723 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
7726 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7727 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7728 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
7731 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7732 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7733 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
7734 || !gimple_assign_load_p (load1_stmt
)
7735 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
7736 || !gimple_assign_load_p (load2_stmt
))
7739 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7740 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7741 if (load1_stmt_info
== NULL
7742 || load2_stmt_info
== NULL
7743 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
7744 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
7745 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
7746 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7749 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
7751 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7752 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
7753 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
7755 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7757 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7761 use_operand_p use_p
;
7762 imm_use_iterator iter
;
7763 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
7765 gimple
*use_stmt
= USE_STMT (use_p
);
7766 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
7768 if (other_store_stmt
)
7770 other_store_stmt
= use_stmt
;
7774 if (other_store_stmt
== NULL
)
7776 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
7777 || !gimple_store_p (other_store_stmt
))
7780 stmt_vec_info other_store_stmt_info
7781 = loop_vinfo
->lookup_stmt (other_store_stmt
);
7782 if (other_store_stmt_info
== NULL
7783 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
7784 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7787 gimple
*stmt1
= stmt
;
7788 gimple
*stmt2
= other_store_stmt
;
7789 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7790 std::swap (stmt1
, stmt2
);
7791 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7792 gimple_assign_rhs1 (load2_stmt
)))
7794 std::swap (rhs1
, rhs2
);
7795 std::swap (load1_stmt
, load2_stmt
);
7796 std::swap (load1_stmt_info
, load2_stmt_info
);
7798 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7799 gimple_assign_rhs1 (load1_stmt
)))
7802 tree var3
= NULL_TREE
;
7803 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
7804 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
7805 gimple_assign_rhs1 (load2_stmt
)))
7807 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7809 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7810 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
7811 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
7813 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7814 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
7815 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
7816 || lookup_attribute ("omp simd inscan exclusive",
7817 DECL_ATTRIBUTES (var3
)))
7821 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
7822 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
7823 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
7826 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7827 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
7828 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
7829 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
7830 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7831 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
7834 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7835 std::swap (var1
, var2
);
7837 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7839 if (!lookup_attribute ("omp simd inscan exclusive",
7840 DECL_ATTRIBUTES (var1
)))
7845 if (loop_vinfo
->scan_map
== NULL
)
7847 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7851 /* The IL is as expected, now check if we can actually vectorize it.
7858 should be vectorized as (where _40 is the vectorized rhs
7859 from the D.2042[_21] = 0; store):
7860 _30 = MEM <vector(8) int> [(int *)&D.2043];
7861 _31 = MEM <vector(8) int> [(int *)&D.2042];
7862 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7864 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7865 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7867 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7868 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7869 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7871 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7872 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7874 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7875 MEM <vector(8) int> [(int *)&D.2043] = _39;
7876 MEM <vector(8) int> [(int *)&D.2042] = _38;
7883 should be vectorized as (where _40 is the vectorized rhs
7884 from the D.2042[_21] = 0; store):
7885 _30 = MEM <vector(8) int> [(int *)&D.2043];
7886 _31 = MEM <vector(8) int> [(int *)&D.2042];
7887 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7888 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7890 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7891 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7892 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7894 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7895 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7896 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7898 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7899 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7902 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7903 MEM <vector(8) int> [(int *)&D.2044] = _39;
7904 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7905 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7906 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7907 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7910 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7911 if (units_log2
== -1)
7918 /* Function vectorizable_scan_store.
7920 Helper of vectorizable_score, arguments like on vectorizable_store.
7921 Handle only the transformation, checking is done in check_scan_store. */
7924 vectorizable_scan_store (vec_info
*vinfo
,
7925 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7926 gimple
**vec_stmt
, int ncopies
)
7928 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7929 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7930 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7931 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7933 if (dump_enabled_p ())
7934 dump_printf_loc (MSG_NOTE
, vect_location
,
7935 "transform scan store. ncopies = %d\n", ncopies
);
7937 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7938 tree rhs
= gimple_assign_rhs1 (stmt
);
7939 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7941 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7942 bool inscan_var_store
7943 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7945 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7947 use_operand_p use_p
;
7948 imm_use_iterator iter
;
7949 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7951 gimple
*use_stmt
= USE_STMT (use_p
);
7952 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7954 rhs
= gimple_assign_lhs (use_stmt
);
7959 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7960 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7961 if (code
== POINTER_PLUS_EXPR
)
7963 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7964 && commutative_tree_code (code
));
7965 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7966 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7967 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7968 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7969 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7970 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7971 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7972 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7973 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7974 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7975 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7977 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7979 std::swap (rhs1
, rhs2
);
7980 std::swap (var1
, var2
);
7981 std::swap (load1_dr_info
, load2_dr_info
);
7984 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7987 unsigned HOST_WIDE_INT nunits
;
7988 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7990 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7991 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7992 gcc_assert (units_log2
> 0);
7993 auto_vec
<tree
, 16> perms
;
7994 perms
.quick_grow (units_log2
+ 1);
7995 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7996 for (int i
= 0; i
<= units_log2
; ++i
)
7998 unsigned HOST_WIDE_INT j
, k
;
7999 vec_perm_builder
sel (nunits
, nunits
, 1);
8000 sel
.quick_grow (nunits
);
8001 if (i
== units_log2
)
8002 for (j
= 0; j
< nunits
; ++j
)
8003 sel
[j
] = nunits
- 1;
8006 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
8008 for (k
= 0; j
< nunits
; ++j
, ++k
)
8009 sel
[j
] = nunits
+ k
;
8011 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
8012 if (!use_whole_vector
.is_empty ()
8013 && use_whole_vector
[i
] != scan_store_kind_perm
)
8015 if (zero_vec
== NULL_TREE
)
8016 zero_vec
= build_zero_cst (vectype
);
8017 if (masktype
== NULL_TREE
8018 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
8019 masktype
= truth_type_for (vectype
);
8020 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
8023 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
8026 tree vec_oprnd1
= NULL_TREE
;
8027 tree vec_oprnd2
= NULL_TREE
;
8028 tree vec_oprnd3
= NULL_TREE
;
8029 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
8030 tree dataref_offset
= build_int_cst (ref_type
, 0);
8031 tree bump
= vect_get_data_ptr_increment (vinfo
, gsi
, dr_info
,
8032 vectype
, VMAT_CONTIGUOUS
);
8033 tree ldataref_ptr
= NULL_TREE
;
8034 tree orig
= NULL_TREE
;
8035 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
8036 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
8037 auto_vec
<tree
> vec_oprnds1
;
8038 auto_vec
<tree
> vec_oprnds2
;
8039 auto_vec
<tree
> vec_oprnds3
;
8040 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
8041 *init
, &vec_oprnds1
,
8042 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
8043 rhs2
, &vec_oprnds3
);
8044 for (int j
= 0; j
< ncopies
; j
++)
8046 vec_oprnd1
= vec_oprnds1
[j
];
8047 if (ldataref_ptr
== NULL
)
8048 vec_oprnd2
= vec_oprnds2
[j
];
8049 vec_oprnd3
= vec_oprnds3
[j
];
8052 else if (!inscan_var_store
)
8053 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8057 vec_oprnd2
= make_ssa_name (vectype
);
8058 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
8059 unshare_expr (ldataref_ptr
),
8061 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
8062 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
8063 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8064 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8065 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
8068 tree v
= vec_oprnd2
;
8069 for (int i
= 0; i
< units_log2
; ++i
)
8071 tree new_temp
= make_ssa_name (vectype
);
8072 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
8074 && (use_whole_vector
[i
]
8075 != scan_store_kind_perm
))
8076 ? zero_vec
: vec_oprnd1
, v
,
8078 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8079 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8080 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
8082 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
8084 /* Whole vector shift shifted in zero bits, but if *init
8085 is not initializer_zerop, we need to replace those elements
8086 with elements from vec_oprnd1. */
8087 tree_vector_builder
vb (masktype
, nunits
, 1);
8088 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
8089 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
8090 ? boolean_false_node
: boolean_true_node
);
8092 tree new_temp2
= make_ssa_name (vectype
);
8093 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
8094 new_temp
, vec_oprnd1
);
8095 vect_finish_stmt_generation (vinfo
, stmt_info
,
8097 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8098 new_temp
= new_temp2
;
8101 /* For exclusive scan, perform the perms[i] permutation once
8104 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
8112 tree new_temp2
= make_ssa_name (vectype
);
8113 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
8114 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8115 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8120 tree new_temp
= make_ssa_name (vectype
);
8121 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
8122 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8123 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8125 tree last_perm_arg
= new_temp
;
8126 /* For exclusive scan, new_temp computed above is the exclusive scan
8127 prefix sum. Turn it into inclusive prefix sum for the broadcast
8128 of the last element into orig. */
8129 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
8131 last_perm_arg
= make_ssa_name (vectype
);
8132 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
8133 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8134 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8137 orig
= make_ssa_name (vectype
);
8138 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
8139 last_perm_arg
, perms
[units_log2
]);
8140 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8141 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8143 if (!inscan_var_store
)
8145 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
8146 unshare_expr (dataref_ptr
),
8148 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
8149 g
= gimple_build_assign (data_ref
, new_temp
);
8150 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8151 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8155 if (inscan_var_store
)
8156 for (int j
= 0; j
< ncopies
; j
++)
8159 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8161 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
8162 unshare_expr (dataref_ptr
),
8164 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
8165 gimple
*g
= gimple_build_assign (data_ref
, orig
);
8166 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8167 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8173 /* Function vectorizable_store.
8175 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8176 that can be vectorized.
8177 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8178 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8179 Return true if STMT_INFO is vectorizable in this way. */
8182 vectorizable_store (vec_info
*vinfo
,
8183 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8184 gimple
**vec_stmt
, slp_tree slp_node
,
8185 stmt_vector_for_cost
*cost_vec
)
8189 tree vec_oprnd
= NULL_TREE
;
8191 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8192 class loop
*loop
= NULL
;
8193 machine_mode vec_mode
;
8195 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
8196 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8197 tree dataref_ptr
= NULL_TREE
;
8198 tree dataref_offset
= NULL_TREE
;
8199 gimple
*ptr_incr
= NULL
;
8202 stmt_vec_info first_stmt_info
;
8204 unsigned int group_size
, i
;
8205 bool slp
= (slp_node
!= NULL
);
8206 unsigned int vec_num
;
8207 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8209 gather_scatter_info gs_info
;
8211 vec_load_store_type vls_type
;
8214 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8217 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8221 /* Is vectorizable store? */
8223 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8224 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8226 tree scalar_dest
= gimple_assign_lhs (assign
);
8227 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
8228 && is_pattern_stmt_p (stmt_info
))
8229 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
8230 if (TREE_CODE (scalar_dest
) != ARRAY_REF
8231 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
8232 && TREE_CODE (scalar_dest
) != INDIRECT_REF
8233 && TREE_CODE (scalar_dest
) != COMPONENT_REF
8234 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
8235 && TREE_CODE (scalar_dest
) != REALPART_EXPR
8236 && TREE_CODE (scalar_dest
) != MEM_REF
)
8241 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8242 if (!call
|| !gimple_call_internal_p (call
))
8245 internal_fn ifn
= gimple_call_internal_fn (call
);
8246 if (!internal_store_fn_p (ifn
))
8249 int mask_index
= internal_fn_mask_index (ifn
);
8250 if (mask_index
>= 0 && slp_node
)
8251 mask_index
= vect_slp_child_index_for_operand (call
, mask_index
);
8253 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
8254 &mask
, NULL
, &mask_dt
, &mask_vectype
))
8258 op
= vect_get_store_rhs (stmt_info
);
8260 /* Cannot have hybrid store SLP -- that would mean storing to the
8261 same location twice. */
8262 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
8264 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
8265 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8269 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8270 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8275 /* Multiple types in SLP are handled by creating the appropriate number of
8276 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8281 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8283 gcc_assert (ncopies
>= 1);
8285 /* FORNOW. This restriction should be relaxed. */
8286 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
8288 if (dump_enabled_p ())
8289 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8290 "multiple types in nested loop.\n");
8294 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
8295 op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
8298 elem_type
= TREE_TYPE (vectype
);
8299 vec_mode
= TYPE_MODE (vectype
);
8301 if (!STMT_VINFO_DATA_REF (stmt_info
))
8304 vect_memory_access_type memory_access_type
;
8305 enum dr_alignment_support alignment_support_scheme
;
8308 internal_fn lanes_ifn
;
8309 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, vls_type
,
8310 ncopies
, &memory_access_type
, &poffset
,
8311 &alignment_support_scheme
, &misalignment
, &gs_info
,
8317 if (memory_access_type
== VMAT_CONTIGUOUS
)
8319 if (!VECTOR_MODE_P (vec_mode
)
8320 || !can_vec_mask_load_store_p (vec_mode
,
8321 TYPE_MODE (mask_vectype
), false))
8324 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8325 && (memory_access_type
!= VMAT_GATHER_SCATTER
8326 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
8328 if (dump_enabled_p ())
8329 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8330 "unsupported access type for masked store.\n");
8333 else if (memory_access_type
== VMAT_GATHER_SCATTER
8334 && gs_info
.ifn
== IFN_LAST
8337 if (dump_enabled_p ())
8338 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8339 "unsupported masked emulated scatter.\n");
8345 /* FORNOW. In some cases can vectorize even if data-type not supported
8346 (e.g. - array initialization with 0). */
8347 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
8351 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8352 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
8353 && memory_access_type
!= VMAT_GATHER_SCATTER
8354 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
8357 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8358 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8359 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8363 first_stmt_info
= stmt_info
;
8364 first_dr_info
= dr_info
;
8365 group_size
= vec_num
= 1;
8368 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
8370 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
8371 memory_access_type
))
8375 if (!vec_stmt
) /* transformation not required. */
8377 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8380 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
8381 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
8382 vls_type
, group_size
,
8383 memory_access_type
, &gs_info
,
8387 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
8390 if (dump_enabled_p ())
8391 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8392 "incompatible vector types for invariants\n");
8396 if (dump_enabled_p ()
8397 && memory_access_type
!= VMAT_ELEMENTWISE
8398 && memory_access_type
!= VMAT_GATHER_SCATTER
8399 && alignment_support_scheme
!= dr_aligned
)
8400 dump_printf_loc (MSG_NOTE
, vect_location
,
8401 "Vectorizing an unaligned access.\n");
8403 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
8404 vect_model_store_cost (vinfo
, stmt_info
, ncopies
,
8405 memory_access_type
, &gs_info
,
8406 alignment_support_scheme
,
8407 misalignment
, vls_type
, slp_node
, cost_vec
);
8410 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8414 ensure_base_align (dr_info
);
8416 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8418 vect_build_scatter_store_calls (vinfo
, stmt_info
, gsi
, vec_stmt
,
8422 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
8423 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
8428 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
8432 grouped_store
= false;
8433 /* VEC_NUM is the number of vect stmts to be created for this
8435 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8436 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8437 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
8438 == first_stmt_info
);
8439 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8440 op
= vect_get_store_rhs (first_stmt_info
);
8443 /* VEC_NUM is the number of vect stmts to be created for this
8445 vec_num
= group_size
;
8447 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8450 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
8452 if (dump_enabled_p ())
8453 dump_printf_loc (MSG_NOTE
, vect_location
,
8454 "transform store. ncopies = %d\n", ncopies
);
8456 if (memory_access_type
== VMAT_ELEMENTWISE
8457 || memory_access_type
== VMAT_STRIDED_SLP
)
8459 gimple_stmt_iterator incr_gsi
;
8465 tree stride_base
, stride_step
, alias_off
;
8469 /* Checked by get_load_store_type. */
8470 unsigned int const_nunits
= nunits
.to_constant ();
8472 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
8473 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
8475 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
8477 = fold_build_pointer_plus
8478 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8479 size_binop (PLUS_EXPR
,
8480 convert_to_ptrofftype (dr_offset
),
8481 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8482 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8484 /* For a store with loop-invariant (but other than power-of-2)
8485 stride (i.e. not a grouped access) like so:
8487 for (i = 0; i < n; i += stride)
8490 we generate a new induction variable and new stores from
8491 the components of the (vectorized) rhs:
8493 for (j = 0; ; j += VF*stride)
8498 array[j + stride] = tmp2;
8502 unsigned nstores
= const_nunits
;
8504 tree ltype
= elem_type
;
8505 tree lvectype
= vectype
;
8508 if (group_size
< const_nunits
8509 && const_nunits
% group_size
== 0)
8511 nstores
= const_nunits
/ group_size
;
8513 ltype
= build_vector_type (elem_type
, group_size
);
8516 /* First check if vec_extract optab doesn't support extraction
8517 of vector elts directly. */
8518 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
8520 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
8521 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
8522 group_size
).exists (&vmode
)
8523 || (convert_optab_handler (vec_extract_optab
,
8524 TYPE_MODE (vectype
), vmode
)
8525 == CODE_FOR_nothing
))
8527 /* Try to avoid emitting an extract of vector elements
8528 by performing the extracts using an integer type of the
8529 same size, extracting from a vector of those and then
8530 re-interpreting it as the original vector type if
8533 = group_size
* GET_MODE_BITSIZE (elmode
);
8534 unsigned int lnunits
= const_nunits
/ group_size
;
8535 /* If we can't construct such a vector fall back to
8536 element extracts from the original vector type and
8537 element size stores. */
8538 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
8539 && VECTOR_MODE_P (TYPE_MODE (vectype
))
8540 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
8541 lnunits
).exists (&vmode
)
8542 && (convert_optab_handler (vec_extract_optab
,
8544 != CODE_FOR_nothing
))
8548 ltype
= build_nonstandard_integer_type (lsize
, 1);
8549 lvectype
= build_vector_type (ltype
, nstores
);
8551 /* Else fall back to vector extraction anyway.
8552 Fewer stores are more important than avoiding spilling
8553 of the vector we extract from. Compared to the
8554 construction case in vectorizable_load no store-forwarding
8555 issue exists here for reasonable archs. */
8558 else if (group_size
>= const_nunits
8559 && group_size
% const_nunits
== 0)
8562 lnel
= const_nunits
;
8566 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
8567 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8570 ivstep
= stride_step
;
8571 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
8572 build_int_cst (TREE_TYPE (ivstep
), vf
));
8574 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8576 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8577 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8578 create_iv (stride_base
, PLUS_EXPR
, ivstep
, NULL
,
8579 loop
, &incr_gsi
, insert_after
,
8581 incr
= gsi_stmt (incr_gsi
);
8583 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8585 alias_off
= build_int_cst (ref_type
, 0);
8586 stmt_vec_info next_stmt_info
= first_stmt_info
;
8587 auto_vec
<tree
> vec_oprnds (ncopies
);
8588 for (g
= 0; g
< group_size
; g
++)
8590 running_off
= offvar
;
8593 tree size
= TYPE_SIZE_UNIT (ltype
);
8594 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
8596 tree newoff
= copy_ssa_name (running_off
, NULL
);
8597 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8599 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8600 running_off
= newoff
;
8603 op
= vect_get_store_rhs (next_stmt_info
);
8604 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
,
8606 unsigned int group_el
= 0;
8607 unsigned HOST_WIDE_INT
8608 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8609 for (j
= 0; j
< ncopies
; j
++)
8611 vec_oprnd
= vec_oprnds
[j
];
8612 /* Pun the vector to extract from if necessary. */
8613 if (lvectype
!= vectype
)
8615 tree tem
= make_ssa_name (lvectype
);
8617 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
8618 lvectype
, vec_oprnd
));
8619 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
8622 for (i
= 0; i
< nstores
; i
++)
8624 tree newref
, newoff
;
8625 gimple
*incr
, *assign
;
8626 tree size
= TYPE_SIZE (ltype
);
8627 /* Extract the i'th component. */
8628 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
8629 bitsize_int (i
), size
);
8630 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
8633 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
8637 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8639 newref
= build2 (MEM_REF
, ltype
,
8640 running_off
, this_off
);
8641 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
8643 /* And store it to *running_off. */
8644 assign
= gimple_build_assign (newref
, elem
);
8645 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
8649 || group_el
== group_size
)
8651 newoff
= copy_ssa_name (running_off
, NULL
);
8652 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8653 running_off
, stride_step
);
8654 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8656 running_off
= newoff
;
8659 if (g
== group_size
- 1
8662 if (j
== 0 && i
== 0)
8664 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
8668 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8669 vec_oprnds
.truncate(0);
8677 gcc_assert (alignment_support_scheme
);
8678 vec_loop_masks
*loop_masks
8679 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8680 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8682 vec_loop_lens
*loop_lens
8683 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
8684 ? &LOOP_VINFO_LENS (loop_vinfo
)
8687 /* Shouldn't go with length-based approach if fully masked. */
8688 gcc_assert (!loop_lens
|| !loop_masks
);
8690 /* Targets with store-lane instructions must not require explicit
8691 realignment. vect_supportable_dr_alignment always returns either
8692 dr_aligned or dr_unaligned_supported for masked operations. */
8693 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8696 || alignment_support_scheme
== dr_aligned
8697 || alignment_support_scheme
== dr_unaligned_supported
);
8699 tree offset
= NULL_TREE
;
8700 if (!known_eq (poffset
, 0))
8701 offset
= size_int (poffset
);
8704 tree vec_offset
= NULL_TREE
;
8705 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8707 aggr_type
= NULL_TREE
;
8710 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8712 aggr_type
= elem_type
;
8713 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, gsi
, &gs_info
,
8714 &bump
, &vec_offset
, loop_lens
);
8718 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8719 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8721 aggr_type
= vectype
;
8722 bump
= vect_get_data_ptr_increment (vinfo
, gsi
, dr_info
, aggr_type
,
8723 memory_access_type
, loop_lens
);
8727 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8729 /* In case the vectorization factor (VF) is bigger than the number
8730 of elements that we can fit in a vectype (nunits), we have to generate
8731 more than one vector stmt - i.e - we need to "unroll" the
8732 vector stmt by a factor VF/nunits. */
8734 /* In case of interleaving (non-unit grouped access):
8741 We create vectorized stores starting from base address (the access of the
8742 first stmt in the chain (S2 in the above example), when the last store stmt
8743 of the chain (S4) is reached:
8746 VS2: &base + vec_size*1 = vx0
8747 VS3: &base + vec_size*2 = vx1
8748 VS4: &base + vec_size*3 = vx3
8750 Then permutation statements are generated:
8752 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8753 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8756 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8757 (the order of the data-refs in the output of vect_permute_store_chain
8758 corresponds to the order of scalar stmts in the interleaving chain - see
8759 the documentation of vect_permute_store_chain()).
8761 In case of both multiple types and interleaving, above vector stores and
8762 permutation stmts are created for every copy. The result vector stmts are
8763 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8764 STMT_VINFO_RELATED_STMT for the next copies.
8767 auto_vec
<tree
> dr_chain (group_size
);
8768 auto_vec
<tree
> vec_masks
;
8769 tree vec_mask
= NULL
;
8770 auto_delete_vec
<auto_vec
<tree
>> gvec_oprnds (group_size
);
8771 for (i
= 0; i
< group_size
; i
++)
8772 gvec_oprnds
.quick_push (new auto_vec
<tree
> (ncopies
));
8774 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8776 gcc_assert (!slp
&& grouped_store
);
8777 for (j
= 0; j
< ncopies
; j
++)
8782 /* For interleaved stores we collect vectorized defs for all
8783 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8784 as an input to vect_permute_store_chain(). */
8785 stmt_vec_info next_stmt_info
= first_stmt_info
;
8786 for (i
= 0; i
< group_size
; i
++)
8788 /* Since gaps are not supported for interleaved stores,
8789 DR_GROUP_SIZE is the exact number of stmts in the
8790 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8791 op
= vect_get_store_rhs (next_stmt_info
);
8792 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
, ncopies
,
8793 op
, gvec_oprnds
[i
]);
8794 vec_oprnd
= (*gvec_oprnds
[i
])[0];
8795 dr_chain
.quick_push (vec_oprnd
);
8796 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8800 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
8803 vec_mask
= vec_masks
[0];
8806 /* We should have catched mismatched types earlier. */
8808 useless_type_conversion_p (vectype
, TREE_TYPE (vec_oprnd
)));
8810 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
8811 NULL
, offset
, &dummy
, gsi
,
8812 &ptr_incr
, false, bump
);
8816 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
8817 /* DR_CHAIN is then used as an input to
8818 vect_permute_store_chain(). */
8819 for (i
= 0; i
< group_size
; i
++)
8821 vec_oprnd
= (*gvec_oprnds
[i
])[j
];
8822 dr_chain
[i
] = vec_oprnd
;
8825 vec_mask
= vec_masks
[j
];
8826 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8830 /* Get an array into which we can store the individual vectors. */
8831 tree vec_array
= create_vector_array (vectype
, vec_num
);
8833 /* Invalidate the current contents of VEC_ARRAY. This should
8834 become an RTL clobber too, which prevents the vector registers
8835 from being upward-exposed. */
8836 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8838 /* Store the individual vectors into the array. */
8839 for (i
= 0; i
< vec_num
; i
++)
8841 vec_oprnd
= dr_chain
[i
];
8842 write_vector_array (vinfo
, stmt_info
, gsi
, vec_oprnd
, vec_array
,
8846 tree final_mask
= NULL
;
8847 tree final_len
= NULL
;
8850 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
8851 ncopies
, vectype
, j
);
8853 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
8856 if (lanes_ifn
== IFN_MASK_LEN_STORE_LANES
)
8859 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
8860 ncopies
, vectype
, j
, 1);
8862 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
8864 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
8865 bias
= build_int_cst (intQI_type_node
, biasval
);
8868 mask_vectype
= truth_type_for (vectype
);
8869 final_mask
= build_minus_one_cst (mask_vectype
);
8874 if (final_len
&& final_mask
)
8877 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8878 LEN, BIAS, VEC_ARRAY). */
8879 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8880 tree alias_ptr
= build_int_cst (ref_type
, align
);
8881 call
= gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES
, 6,
8882 dataref_ptr
, alias_ptr
,
8883 final_mask
, final_len
, bias
,
8886 else if (final_mask
)
8889 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8891 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8892 tree alias_ptr
= build_int_cst (ref_type
, align
);
8893 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8894 dataref_ptr
, alias_ptr
,
8895 final_mask
, vec_array
);
8900 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8901 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8902 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
8903 gimple_call_set_lhs (call
, data_ref
);
8905 gimple_call_set_nothrow (call
, true);
8906 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8909 /* Record that VEC_ARRAY is now dead. */
8910 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8912 *vec_stmt
= new_stmt
;
8913 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8919 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8921 gcc_assert (!slp
&& !grouped_store
);
8922 auto_vec
<tree
> vec_offsets
;
8923 for (j
= 0; j
< ncopies
; j
++)
8928 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
8929 DR_CHAIN is of size 1. */
8930 gcc_assert (group_size
== 1);
8931 op
= vect_get_store_rhs (first_stmt_info
);
8932 vect_get_vec_defs_for_operand (vinfo
, first_stmt_info
, ncopies
,
8933 op
, gvec_oprnds
[0]);
8934 vec_oprnd
= (*gvec_oprnds
[0])[0];
8935 dr_chain
.quick_push (vec_oprnd
);
8938 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
8941 vec_mask
= vec_masks
[0];
8944 /* We should have catched mismatched types earlier. */
8945 gcc_assert (useless_type_conversion_p (vectype
,
8946 TREE_TYPE (vec_oprnd
)));
8947 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8948 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
8949 slp_node
, &gs_info
, &dataref_ptr
,
8953 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
8954 NULL
, offset
, &dummy
, gsi
,
8955 &ptr_incr
, false, bump
);
8959 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
8960 vec_oprnd
= (*gvec_oprnds
[0])[j
];
8961 dr_chain
[0] = vec_oprnd
;
8963 vec_mask
= vec_masks
[j
];
8964 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8965 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
8966 gsi
, stmt_info
, bump
);
8970 unsigned HOST_WIDE_INT align
;
8971 tree final_mask
= NULL_TREE
;
8972 tree final_len
= NULL_TREE
;
8973 tree bias
= NULL_TREE
;
8975 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
8976 ncopies
, vectype
, j
);
8978 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
8981 if (gs_info
.ifn
!= IFN_LAST
)
8983 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8984 vec_offset
= vec_offsets
[j
];
8985 tree scale
= size_int (gs_info
.scale
);
8987 if (gs_info
.ifn
== IFN_MASK_LEN_SCATTER_STORE
)
8990 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
8991 ncopies
, vectype
, j
, 1);
8993 final_len
= build_int_cst (sizetype
,
8994 TYPE_VECTOR_SUBPARTS (vectype
));
8996 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
8997 bias
= build_int_cst (intQI_type_node
, biasval
);
9000 mask_vectype
= truth_type_for (vectype
);
9001 final_mask
= build_minus_one_cst (mask_vectype
);
9006 if (final_len
&& final_mask
)
9007 call
= gimple_build_call_internal (IFN_MASK_LEN_SCATTER_STORE
,
9008 7, dataref_ptr
, vec_offset
,
9009 scale
, vec_oprnd
, final_mask
,
9011 else if (final_mask
)
9013 = gimple_build_call_internal (IFN_MASK_SCATTER_STORE
, 5,
9014 dataref_ptr
, vec_offset
, scale
,
9015 vec_oprnd
, final_mask
);
9017 call
= gimple_build_call_internal (IFN_SCATTER_STORE
, 4,
9018 dataref_ptr
, vec_offset
,
9020 gimple_call_set_nothrow (call
, true);
9021 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9026 /* Emulated scatter. */
9027 gcc_assert (!final_mask
);
9028 unsigned HOST_WIDE_INT const_nunits
= nunits
.to_constant ();
9029 unsigned HOST_WIDE_INT const_offset_nunits
9030 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
).to_constant ();
9031 vec
<constructor_elt
, va_gc
> *ctor_elts
;
9032 vec_alloc (ctor_elts
, const_nunits
);
9033 gimple_seq stmts
= NULL
;
9034 tree elt_type
= TREE_TYPE (vectype
);
9035 unsigned HOST_WIDE_INT elt_size
9036 = tree_to_uhwi (TYPE_SIZE (elt_type
));
9037 /* We support offset vectors with more elements
9038 than the data vector for now. */
9039 unsigned HOST_WIDE_INT factor
9040 = const_offset_nunits
/ const_nunits
;
9041 vec_offset
= vec_offsets
[j
/ factor
];
9042 unsigned elt_offset
= (j
% factor
) * const_nunits
;
9043 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
9044 tree scale
= size_int (gs_info
.scale
);
9045 align
= get_object_alignment (DR_REF (first_dr_info
->dr
));
9046 tree ltype
= build_aligned_type (TREE_TYPE (vectype
), align
);
9047 for (unsigned k
= 0; k
< const_nunits
; ++k
)
9049 /* Compute the offsetted pointer. */
9050 tree boff
= size_binop (MULT_EXPR
, TYPE_SIZE (idx_type
),
9051 bitsize_int (k
+ elt_offset
));
9053 = gimple_build (&stmts
, BIT_FIELD_REF
, idx_type
, vec_offset
,
9054 TYPE_SIZE (idx_type
), boff
);
9055 idx
= gimple_convert (&stmts
, sizetype
, idx
);
9056 idx
= gimple_build (&stmts
, MULT_EXPR
, sizetype
, idx
, scale
);
9058 = gimple_build (&stmts
, PLUS_EXPR
, TREE_TYPE (dataref_ptr
),
9060 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
9061 /* Extract the element to be stored. */
9063 = gimple_build (&stmts
, BIT_FIELD_REF
, TREE_TYPE (vectype
),
9064 vec_oprnd
, TYPE_SIZE (elt_type
),
9065 bitsize_int (k
* elt_size
));
9066 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
9069 = build2 (MEM_REF
, ltype
, ptr
, build_int_cst (ref_type
, 0));
9070 new_stmt
= gimple_build_assign (ref
, elt
);
9071 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9075 *vec_stmt
= new_stmt
;
9076 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9081 auto_vec
<tree
> result_chain (group_size
);
9082 auto_vec
<tree
, 1> vec_oprnds
;
9083 for (j
= 0; j
< ncopies
; j
++)
9090 /* Get vectorized arguments for SLP_NODE. */
9091 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1, op
,
9092 &vec_oprnds
, mask
, &vec_masks
);
9093 vec_oprnd
= vec_oprnds
[0];
9095 vec_mask
= vec_masks
[0];
9099 /* For interleaved stores we collect vectorized defs for all the
9100 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9101 input to vect_permute_store_chain().
9103 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9105 stmt_vec_info next_stmt_info
= first_stmt_info
;
9106 for (i
= 0; i
< group_size
; i
++)
9108 /* Since gaps are not supported for interleaved stores,
9109 DR_GROUP_SIZE is the exact number of stmts in the chain.
9110 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9111 that there is no interleaving, DR_GROUP_SIZE is 1,
9112 and only one iteration of the loop will be executed. */
9113 op
= vect_get_store_rhs (next_stmt_info
);
9114 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
, ncopies
,
9115 op
, gvec_oprnds
[i
]);
9116 vec_oprnd
= (*gvec_oprnds
[i
])[0];
9117 dr_chain
.quick_push (vec_oprnd
);
9118 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9122 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
9125 vec_mask
= vec_masks
[0];
9129 /* We should have catched mismatched types earlier. */
9130 gcc_assert (useless_type_conversion_p (vectype
,
9131 TREE_TYPE (vec_oprnd
)));
9132 bool simd_lane_access_p
9133 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9134 if (simd_lane_access_p
9136 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9137 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9138 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9139 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9140 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9141 get_alias_set (TREE_TYPE (ref_type
))))
9143 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9144 dataref_offset
= build_int_cst (ref_type
, 0);
9148 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9149 simd_lane_access_p
? loop
: NULL
,
9150 offset
, &dummy
, gsi
, &ptr_incr
,
9151 simd_lane_access_p
, bump
);
9155 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
9156 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9157 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9159 for (i
= 0; i
< group_size
; i
++)
9161 vec_oprnd
= (*gvec_oprnds
[i
])[j
];
9162 dr_chain
[i
] = vec_oprnd
;
9165 vec_mask
= vec_masks
[j
];
9167 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
9169 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9176 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
, gsi
,
9179 stmt_vec_info next_stmt_info
= first_stmt_info
;
9180 for (i
= 0; i
< vec_num
; i
++)
9183 unsigned HOST_WIDE_INT align
;
9185 tree final_mask
= NULL_TREE
;
9186 tree final_len
= NULL_TREE
;
9187 tree bias
= NULL_TREE
;
9189 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
9190 vec_num
* ncopies
, vectype
,
9192 if (slp
&& vec_mask
)
9193 vec_mask
= vec_masks
[i
];
9195 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
9199 /* Bump the vector pointer. */
9200 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9204 vec_oprnd
= vec_oprnds
[i
];
9205 else if (grouped_store
)
9206 /* For grouped stores vectorized defs are interleaved in
9207 vect_permute_store_chain(). */
9208 vec_oprnd
= result_chain
[i
];
9210 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9211 if (alignment_support_scheme
== dr_aligned
)
9213 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
9215 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
9219 misalign
= misalignment
;
9220 if (dataref_offset
== NULL_TREE
9221 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9222 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
9224 align
= least_bit_hwi (misalign
| align
);
9226 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9228 tree perm_mask
= perm_mask_for_reverse (vectype
);
9230 = vect_create_destination_var (vect_get_store_rhs (stmt_info
),
9232 tree new_temp
= make_ssa_name (perm_dest
);
9234 /* Generate the permute statement. */
9236 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
9237 vec_oprnd
, perm_mask
);
9238 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
9240 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9241 vec_oprnd
= new_temp
;
9244 /* Compute IFN when LOOP_LENS or final_mask valid. */
9245 machine_mode vmode
= TYPE_MODE (vectype
);
9246 machine_mode new_vmode
= vmode
;
9247 internal_fn partial_ifn
= IFN_LAST
;
9250 opt_machine_mode new_ovmode
9251 = get_len_load_store_mode (vmode
, false, &partial_ifn
);
9252 new_vmode
= new_ovmode
.require ();
9254 = (new_ovmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vmode
);
9255 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
9256 vec_num
* ncopies
, vectype
,
9257 vec_num
* j
+ i
, factor
);
9259 else if (final_mask
)
9261 if (!can_vec_mask_load_store_p (
9262 vmode
, TYPE_MODE (TREE_TYPE (final_mask
)), false,
9267 if (partial_ifn
== IFN_MASK_LEN_STORE
)
9271 /* Pass VF value to 'len' argument of
9272 MASK_LEN_STORE if LOOP_LENS is invalid. */
9273 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9277 /* Pass all ones value to 'mask' argument of
9278 MASK_LEN_STORE if final_mask is invalid. */
9279 mask_vectype
= truth_type_for (vectype
);
9280 final_mask
= build_minus_one_cst (mask_vectype
);
9286 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
9288 bias
= build_int_cst (intQI_type_node
, biasval
);
9291 /* Arguments are ready. Create the new vector stmt. */
9295 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
9296 /* Need conversion if it's wrapped with VnQI. */
9297 if (vmode
!= new_vmode
)
9300 = build_vector_type_for_mode (unsigned_intQI_type_node
,
9302 tree var
= vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
9303 vec_oprnd
= build1 (VIEW_CONVERT_EXPR
, new_vtype
, vec_oprnd
);
9305 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, vec_oprnd
);
9306 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9310 if (partial_ifn
== IFN_MASK_LEN_STORE
)
9311 call
= gimple_build_call_internal (IFN_MASK_LEN_STORE
, 6,
9312 dataref_ptr
, ptr
, final_mask
,
9313 final_len
, bias
, vec_oprnd
);
9315 call
= gimple_build_call_internal (IFN_LEN_STORE
, 5,
9316 dataref_ptr
, ptr
, final_len
,
9318 gimple_call_set_nothrow (call
, true);
9319 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9322 else if (final_mask
)
9324 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
9326 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
9327 ptr
, final_mask
, vec_oprnd
);
9328 gimple_call_set_nothrow (call
, true);
9329 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9335 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
9336 dataref_offset
? dataref_offset
9337 : build_int_cst (ref_type
, 0));
9338 if (alignment_support_scheme
== dr_aligned
)
9341 TREE_TYPE (data_ref
)
9342 = build_aligned_type (TREE_TYPE (data_ref
),
9343 align
* BITS_PER_UNIT
);
9344 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9345 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
9346 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9352 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9353 if (!next_stmt_info
)
9359 *vec_stmt
= new_stmt
;
9360 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9367 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9368 VECTOR_CST mask. No checks are made that the target platform supports the
9369 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9370 vect_gen_perm_mask_checked. */
9373 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
9377 poly_uint64 nunits
= sel
.length ();
9378 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
9380 mask_type
= build_vector_type (ssizetype
, nunits
);
9381 return vec_perm_indices_to_tree (mask_type
, sel
);
9384 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9385 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9388 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
9390 machine_mode vmode
= TYPE_MODE (vectype
);
9391 gcc_assert (can_vec_perm_const_p (vmode
, vmode
, sel
));
9392 return vect_gen_perm_mask_any (vectype
, sel
);
9395 /* Given a vector variable X and Y, that was generated for the scalar
9396 STMT_INFO, generate instructions to permute the vector elements of X and Y
9397 using permutation mask MASK_VEC, insert them at *GSI and return the
9398 permuted vector variable. */
9401 permute_vec_elements (vec_info
*vinfo
,
9402 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
9403 gimple_stmt_iterator
*gsi
)
9405 tree vectype
= TREE_TYPE (x
);
9406 tree perm_dest
, data_ref
;
9409 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
9410 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
9411 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9413 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
9414 data_ref
= make_ssa_name (perm_dest
);
9416 /* Generate the permute statement. */
9417 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
9418 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
9423 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9424 inserting them on the loops preheader edge. Returns true if we
9425 were successful in doing so (and thus STMT_INFO can be moved then),
9426 otherwise returns false. HOIST_P indicates if we want to hoist the
9427 definitions of all SSA uses, it would be false when we are costing. */
9430 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
, bool hoist_p
)
9436 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
9438 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
9439 if (!gimple_nop_p (def_stmt
)
9440 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
9442 /* Make sure we don't need to recurse. While we could do
9443 so in simple cases when there are more complex use webs
9444 we don't have an easy way to preserve stmt order to fulfil
9445 dependencies within them. */
9448 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
9450 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
9452 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
9453 if (!gimple_nop_p (def_stmt2
)
9454 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
9467 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
9469 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
9470 if (!gimple_nop_p (def_stmt
)
9471 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
9473 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
9474 gsi_remove (&gsi
, false);
9475 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
9482 /* vectorizable_load.
9484 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9485 that can be vectorized.
9486 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9487 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9488 Return true if STMT_INFO is vectorizable in this way. */
9491 vectorizable_load (vec_info
*vinfo
,
9492 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9493 gimple
**vec_stmt
, slp_tree slp_node
,
9494 stmt_vector_for_cost
*cost_vec
)
9497 tree vec_dest
= NULL
;
9498 tree data_ref
= NULL
;
9499 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
9500 class loop
*loop
= NULL
;
9501 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
9502 bool nested_in_vect_loop
= false;
9504 /* Avoid false positive uninitialized warning, see PR110652. */
9505 tree new_temp
= NULL_TREE
;
9508 tree dataref_ptr
= NULL_TREE
;
9509 tree dataref_offset
= NULL_TREE
;
9510 gimple
*ptr_incr
= NULL
;
9513 unsigned int group_size
;
9514 poly_uint64 group_gap_adj
;
9515 tree msq
= NULL_TREE
, lsq
;
9516 tree realignment_token
= NULL_TREE
;
9518 vec
<tree
> dr_chain
= vNULL
;
9519 bool grouped_load
= false;
9520 stmt_vec_info first_stmt_info
;
9521 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
9522 bool compute_in_loop
= false;
9523 class loop
*at_loop
;
9525 bool slp
= (slp_node
!= NULL
);
9526 bool slp_perm
= false;
9527 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
9530 gather_scatter_info gs_info
;
9532 enum vect_def_type mask_dt
= vect_unknown_def_type
;
9534 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9537 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
9541 if (!STMT_VINFO_DATA_REF (stmt_info
))
9544 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
9545 int mask_index
= -1;
9546 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
9548 scalar_dest
= gimple_assign_lhs (assign
);
9549 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
9552 tree_code code
= gimple_assign_rhs_code (assign
);
9553 if (code
!= ARRAY_REF
9554 && code
!= BIT_FIELD_REF
9555 && code
!= INDIRECT_REF
9556 && code
!= COMPONENT_REF
9557 && code
!= IMAGPART_EXPR
9558 && code
!= REALPART_EXPR
9560 && TREE_CODE_CLASS (code
) != tcc_declaration
)
9565 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
9566 if (!call
|| !gimple_call_internal_p (call
))
9569 internal_fn ifn
= gimple_call_internal_fn (call
);
9570 if (!internal_load_fn_p (ifn
))
9573 scalar_dest
= gimple_call_lhs (call
);
9577 mask_index
= internal_fn_mask_index (ifn
);
9578 if (mask_index
>= 0 && slp_node
)
9579 mask_index
= vect_slp_child_index_for_operand (call
, mask_index
);
9581 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
9582 &mask
, NULL
, &mask_dt
, &mask_vectype
))
9586 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9587 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
9591 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
9592 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
9593 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
9598 /* Multiple types in SLP are handled by creating the appropriate number of
9599 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9604 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9606 gcc_assert (ncopies
>= 1);
9608 /* FORNOW. This restriction should be relaxed. */
9609 if (nested_in_vect_loop
&& ncopies
> 1)
9611 if (dump_enabled_p ())
9612 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9613 "multiple types in nested loop.\n");
9617 /* Invalidate assumptions made by dependence analysis when vectorization
9618 on the unrolled body effectively re-orders stmts. */
9620 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
9621 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
9622 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
9624 if (dump_enabled_p ())
9625 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9626 "cannot perform implicit CSE when unrolling "
9627 "with negative dependence distance\n");
9631 elem_type
= TREE_TYPE (vectype
);
9632 mode
= TYPE_MODE (vectype
);
9634 /* FORNOW. In some cases can vectorize even if data-type not supported
9635 (e.g. - data copies). */
9636 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
9638 if (dump_enabled_p ())
9639 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9640 "Aligned load, but unsupported type.\n");
9644 /* Check if the load is a part of an interleaving chain. */
9645 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
9647 grouped_load
= true;
9649 gcc_assert (!nested_in_vect_loop
);
9650 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
9652 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9653 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9655 /* Refuse non-SLP vectorization of SLP-only groups. */
9656 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
9658 if (dump_enabled_p ())
9659 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9660 "cannot vectorize load in non-SLP mode.\n");
9664 /* Invalidate assumptions made by dependence analysis when vectorization
9665 on the unrolled body effectively re-orders stmts. */
9666 if (!PURE_SLP_STMT (stmt_info
)
9667 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
9668 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
9669 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
9671 if (dump_enabled_p ())
9672 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9673 "cannot perform implicit CSE when performing "
9674 "group loads with negative dependence distance\n");
9681 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9687 /* In BB vectorization we may not actually use a loaded vector
9688 accessing elements in excess of DR_GROUP_SIZE. */
9689 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9690 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
9691 unsigned HOST_WIDE_INT nunits
;
9692 unsigned j
, k
, maxk
= 0;
9693 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
9696 tree vectype
= SLP_TREE_VECTYPE (slp_node
);
9697 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
9698 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
9700 if (dump_enabled_p ())
9701 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9702 "BB vectorization with gaps at the end of "
9703 "a load is not supported\n");
9710 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
9713 if (dump_enabled_p ())
9714 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
9716 "unsupported load permutation\n");
9721 vect_memory_access_type memory_access_type
;
9722 enum dr_alignment_support alignment_support_scheme
;
9725 internal_fn lanes_ifn
;
9726 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, VLS_LOAD
,
9727 ncopies
, &memory_access_type
, &poffset
,
9728 &alignment_support_scheme
, &misalignment
, &gs_info
,
9734 if (memory_access_type
== VMAT_CONTIGUOUS
)
9736 machine_mode vec_mode
= TYPE_MODE (vectype
);
9737 if (!VECTOR_MODE_P (vec_mode
)
9738 || !can_vec_mask_load_store_p (vec_mode
,
9739 TYPE_MODE (mask_vectype
), true))
9742 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
9743 && memory_access_type
!= VMAT_GATHER_SCATTER
)
9745 if (dump_enabled_p ())
9746 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9747 "unsupported access type for masked load.\n");
9750 else if (memory_access_type
== VMAT_GATHER_SCATTER
9751 && gs_info
.ifn
== IFN_LAST
9754 if (dump_enabled_p ())
9755 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9756 "unsupported masked emulated gather.\n");
9761 bool costing_p
= !vec_stmt
;
9763 if (costing_p
) /* transformation not required. */
9767 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
9770 if (dump_enabled_p ())
9771 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9772 "incompatible vector types for invariants\n");
9777 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
9780 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
9781 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
9782 VLS_LOAD
, group_size
,
9783 memory_access_type
, &gs_info
,
9786 if (dump_enabled_p ()
9787 && memory_access_type
!= VMAT_ELEMENTWISE
9788 && memory_access_type
!= VMAT_GATHER_SCATTER
9789 && alignment_support_scheme
!= dr_aligned
)
9790 dump_printf_loc (MSG_NOTE
, vect_location
,
9791 "Vectorizing an unaligned access.\n");
9793 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9794 vinfo
->any_known_not_updated_vssa
= true;
9796 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
9800 gcc_assert (memory_access_type
9801 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
9803 if (dump_enabled_p () && !costing_p
)
9804 dump_printf_loc (MSG_NOTE
, vect_location
,
9805 "transform load. ncopies = %d\n", ncopies
);
9809 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
9810 ensure_base_align (dr_info
);
9812 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
9814 vect_build_gather_load_calls (vinfo
, stmt_info
, gsi
, vec_stmt
, &gs_info
,
9819 if (memory_access_type
== VMAT_INVARIANT
)
9821 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
9822 /* If we have versioned for aliasing or the loop doesn't
9823 have any data dependencies that would preclude this,
9824 then we are sure this is a loop invariant load and
9825 thus we can insert it on the preheader edge.
9826 TODO: hoist_defs_of_uses should ideally be computed
9827 once at analysis time, remembered and used in the
9829 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
9830 && !nested_in_vect_loop
9831 && hoist_defs_of_uses (stmt_info
, loop
, !costing_p
));
9834 enum vect_cost_model_location cost_loc
9835 = hoist_p
? vect_prologue
: vect_body
;
9836 unsigned int cost
= record_stmt_cost (cost_vec
, 1, scalar_load
,
9837 stmt_info
, 0, cost_loc
);
9838 cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
, stmt_info
, 0,
9840 unsigned int prologue_cost
= hoist_p
? cost
: 0;
9841 unsigned int inside_cost
= hoist_p
? 0 : cost
;
9842 if (dump_enabled_p ())
9843 dump_printf_loc (MSG_NOTE
, vect_location
,
9844 "vect_model_load_cost: inside_cost = %d, "
9845 "prologue_cost = %d .\n",
9846 inside_cost
, prologue_cost
);
9851 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
9852 if (dump_enabled_p ())
9853 dump_printf_loc (MSG_NOTE
, vect_location
,
9854 "hoisting out of the vectorized loop: %G",
9856 scalar_dest
= copy_ssa_name (scalar_dest
);
9857 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
9858 edge pe
= loop_preheader_edge (loop
);
9859 gphi
*vphi
= get_virtual_phi (loop
->header
);
9862 vuse
= PHI_ARG_DEF_FROM_EDGE (vphi
, pe
);
9864 vuse
= gimple_vuse (gsi_stmt (*gsi
));
9865 gimple
*new_stmt
= gimple_build_assign (scalar_dest
, rhs
);
9866 gimple_set_vuse (new_stmt
, vuse
);
9867 gsi_insert_on_edge_immediate (pe
, new_stmt
);
9869 /* These copies are all equivalent. */
9871 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
9875 gimple_stmt_iterator gsi2
= *gsi
;
9877 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
9880 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9882 for (j
= 0; j
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
); ++j
)
9883 slp_node
->push_vec_def (new_stmt
);
9886 for (j
= 0; j
< ncopies
; ++j
)
9887 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9888 *vec_stmt
= new_stmt
;
9893 if (memory_access_type
== VMAT_ELEMENTWISE
9894 || memory_access_type
== VMAT_STRIDED_SLP
)
9896 gimple_stmt_iterator incr_gsi
;
9901 vec
<constructor_elt
, va_gc
> *v
= NULL
;
9902 tree stride_base
, stride_step
, alias_off
;
9903 /* Checked by get_load_store_type. */
9904 unsigned int const_nunits
= nunits
.to_constant ();
9905 unsigned HOST_WIDE_INT cst_offset
= 0;
9907 unsigned int inside_cost
= 0;
9909 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
));
9910 gcc_assert (!nested_in_vect_loop
);
9914 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9915 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9919 first_stmt_info
= stmt_info
;
9920 first_dr_info
= dr_info
;
9923 if (slp
&& grouped_load
)
9925 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9926 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9932 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
9933 * vect_get_place_in_interleaving_chain (stmt_info
,
9936 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
9941 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
9942 stride_base
= fold_build_pointer_plus (
9943 DR_BASE_ADDRESS (first_dr_info
->dr
),
9944 size_binop (PLUS_EXPR
, convert_to_ptrofftype (dr_offset
),
9945 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
9946 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
9948 /* For a load with loop-invariant (but other than power-of-2)
9949 stride (i.e. not a grouped access) like so:
9951 for (i = 0; i < n; i += stride)
9954 we generate a new induction variable and new accesses to
9955 form a new vector (or vectors, depending on ncopies):
9957 for (j = 0; ; j += VF*stride)
9959 tmp2 = array[j + stride];
9961 vectemp = {tmp1, tmp2, ...}
9964 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
9965 build_int_cst (TREE_TYPE (stride_step
), vf
));
9967 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
9969 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
9970 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
9971 create_iv (stride_base
, PLUS_EXPR
, ivstep
, NULL
,
9972 loop
, &incr_gsi
, insert_after
,
9975 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
9978 running_off
= offvar
;
9979 alias_off
= build_int_cst (ref_type
, 0);
9980 int nloads
= const_nunits
;
9982 tree ltype
= TREE_TYPE (vectype
);
9983 tree lvectype
= vectype
;
9984 auto_vec
<tree
> dr_chain
;
9985 if (memory_access_type
== VMAT_STRIDED_SLP
)
9987 if (group_size
< const_nunits
)
9989 /* First check if vec_init optab supports construction from vector
9990 elts directly. Otherwise avoid emitting a constructor of
9991 vector elements by performing the loads using an integer type
9992 of the same size, constructing a vector of those and then
9993 re-interpreting it as the original vector type. This avoids a
9994 huge runtime penalty due to the general inability to perform
9995 store forwarding from smaller stores to a larger load. */
9998 = vector_vector_composition_type (vectype
,
9999 const_nunits
/ group_size
,
10001 if (vtype
!= NULL_TREE
)
10003 nloads
= const_nunits
/ group_size
;
10012 lnel
= const_nunits
;
10015 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
10017 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10018 else if (nloads
== 1)
10023 /* For SLP permutation support we need to load the whole group,
10024 not only the number of vector stmts the permutation result
10028 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10030 unsigned int const_vf
= vf
.to_constant ();
10031 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
10032 dr_chain
.create (ncopies
);
10035 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10037 unsigned int group_el
= 0;
10038 unsigned HOST_WIDE_INT
10039 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
10040 unsigned int n_groups
= 0;
10041 for (j
= 0; j
< ncopies
; j
++)
10043 if (nloads
> 1 && !costing_p
)
10044 vec_alloc (v
, nloads
);
10045 gimple
*new_stmt
= NULL
;
10046 for (i
= 0; i
< nloads
; i
++)
10050 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10051 avoid ICE, see PR110776. */
10052 if (VECTOR_TYPE_P (ltype
)
10053 && memory_access_type
!= VMAT_ELEMENTWISE
)
10054 vect_get_load_cost (vinfo
, stmt_info
, 1,
10055 alignment_support_scheme
, misalignment
,
10056 false, &inside_cost
, nullptr, cost_vec
,
10059 inside_cost
+= record_stmt_cost (cost_vec
, 1, scalar_load
,
10060 stmt_info
, 0, vect_body
);
10063 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
10064 group_el
* elsz
+ cst_offset
);
10065 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
10066 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
10067 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
10068 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10070 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
10071 gimple_assign_lhs (new_stmt
));
10075 || group_el
== group_size
)
10078 /* When doing SLP make sure to not load elements from
10079 the next vector iteration, those will not be accessed
10080 so just use the last element again. See PR107451. */
10081 if (!slp
|| known_lt (n_groups
, vf
))
10083 tree newoff
= copy_ssa_name (running_off
);
10085 = gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
10086 running_off
, stride_step
);
10087 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
10088 running_off
= newoff
;
10097 inside_cost
+= record_stmt_cost (cost_vec
, 1, vec_construct
,
10098 stmt_info
, 0, vect_body
);
10101 tree vec_inv
= build_constructor (lvectype
, v
);
10102 new_temp
= vect_init_vector (vinfo
, stmt_info
, vec_inv
,
10104 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
10105 if (lvectype
!= vectype
)
10108 = gimple_build_assign (make_ssa_name (vectype
),
10110 build1 (VIEW_CONVERT_EXPR
,
10111 vectype
, new_temp
));
10112 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
10123 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
10125 slp_node
->push_vec_def (new_stmt
);
10130 *vec_stmt
= new_stmt
;
10131 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10141 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
, vf
,
10142 true, &n_perms
, &n_loads
);
10143 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
10144 first_stmt_info
, 0, vect_body
);
10147 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
10151 if (costing_p
&& dump_enabled_p ())
10152 dump_printf_loc (MSG_NOTE
, vect_location
,
10153 "vect_model_load_cost: inside_cost = %u, "
10154 "prologue_cost = 0 .\n",
10160 if (memory_access_type
== VMAT_GATHER_SCATTER
10161 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
10162 grouped_load
= false;
10165 || (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()))
10169 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
10170 group_size
= DR_GROUP_SIZE (first_stmt_info
);
10174 first_stmt_info
= stmt_info
;
10177 /* For SLP vectorization we directly vectorize a subchain
10178 without permutation. */
10179 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
10180 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
10181 /* For BB vectorization always use the first stmt to base
10182 the data ref pointer on. */
10184 first_stmt_info_for_drptr
10185 = vect_find_first_scalar_stmt_in_slp (slp_node
);
10187 /* Check if the chain of loads is already vectorized. */
10188 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
10189 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10190 ??? But we can only do so if there is exactly one
10191 as we have no way to get at the rest. Leave the CSE
10193 ??? With the group load eventually participating
10194 in multiple different permutations (having multiple
10195 slp nodes which refer to the same group) the CSE
10196 is even wrong code. See PR56270. */
10199 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10202 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
10205 /* VEC_NUM is the number of vect stmts to be created for this group. */
10208 grouped_load
= false;
10209 /* If an SLP permutation is from N elements to N elements,
10210 and if one vector holds a whole number of N, we can load
10211 the inputs to the permutation in the same way as an
10212 unpermuted sequence. In other cases we need to load the
10213 whole group, not only the number of vector stmts the
10214 permutation result fits in. */
10215 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
10217 && (group_size
!= scalar_lanes
10218 || !multiple_p (nunits
, group_size
)))
10220 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10221 variable VF; see vect_transform_slp_perm_load. */
10222 unsigned int const_vf
= vf
.to_constant ();
10223 unsigned int const_nunits
= nunits
.to_constant ();
10224 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
10225 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
10229 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10231 = group_size
- scalar_lanes
;
10235 vec_num
= group_size
;
10237 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
10241 first_stmt_info
= stmt_info
;
10242 first_dr_info
= dr_info
;
10243 group_size
= vec_num
= 1;
10245 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
10247 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10250 gcc_assert (alignment_support_scheme
);
10251 vec_loop_masks
*loop_masks
10252 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
10253 ? &LOOP_VINFO_MASKS (loop_vinfo
)
10255 vec_loop_lens
*loop_lens
10256 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
10257 ? &LOOP_VINFO_LENS (loop_vinfo
)
10260 /* Shouldn't go with length-based approach if fully masked. */
10261 gcc_assert (!loop_lens
|| !loop_masks
);
10263 /* Targets with store-lane instructions must not require explicit
10264 realignment. vect_supportable_dr_alignment always returns either
10265 dr_aligned or dr_unaligned_supported for masked operations. */
10266 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
10269 || alignment_support_scheme
== dr_aligned
10270 || alignment_support_scheme
== dr_unaligned_supported
);
10272 /* In case the vectorization factor (VF) is bigger than the number
10273 of elements that we can fit in a vectype (nunits), we have to generate
10274 more than one vector stmt - i.e - we need to "unroll" the
10275 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10276 from one copy of the vector stmt to the next, in the field
10277 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10278 stages to find the correct vector defs to be used when vectorizing
10279 stmts that use the defs of the current stmt. The example below
10280 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10281 need to create 4 vectorized stmts):
10283 before vectorization:
10284 RELATED_STMT VEC_STMT
10288 step 1: vectorize stmt S1:
10289 We first create the vector stmt VS1_0, and, as usual, record a
10290 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10291 Next, we create the vector stmt VS1_1, and record a pointer to
10292 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10293 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10294 stmts and pointers:
10295 RELATED_STMT VEC_STMT
10296 VS1_0: vx0 = memref0 VS1_1 -
10297 VS1_1: vx1 = memref1 VS1_2 -
10298 VS1_2: vx2 = memref2 VS1_3 -
10299 VS1_3: vx3 = memref3 - -
10300 S1: x = load - VS1_0
10304 /* In case of interleaving (non-unit grouped access):
10311 Vectorized loads are created in the order of memory accesses
10312 starting from the access of the first stmt of the chain:
10315 VS2: vx1 = &base + vec_size*1
10316 VS3: vx3 = &base + vec_size*2
10317 VS4: vx4 = &base + vec_size*3
10319 Then permutation statements are generated:
10321 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10322 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10325 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10326 (the order of the data-refs in the output of vect_permute_load_chain
10327 corresponds to the order of scalar stmts in the interleaving chain - see
10328 the documentation of vect_permute_load_chain()).
10329 The generation of permutation stmts and recording them in
10330 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10332 In case of both multiple types and interleaving, the vector loads and
10333 permutation stmts above are created for every copy. The result vector
10334 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10335 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10337 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10338 on a target that supports unaligned accesses (dr_unaligned_supported)
10339 we generate the following code:
10343 p = p + indx * vectype_size;
10348 Otherwise, the data reference is potentially unaligned on a target that
10349 does not support unaligned accesses (dr_explicit_realign_optimized) -
10350 then generate the following code, in which the data in each iteration is
10351 obtained by two vector loads, one from the previous iteration, and one
10352 from the current iteration:
10354 msq_init = *(floor(p1))
10355 p2 = initial_addr + VS - 1;
10356 realignment_token = call target_builtin;
10359 p2 = p2 + indx * vectype_size
10361 vec_dest = realign_load (msq, lsq, realignment_token)
10366 /* If the misalignment remains the same throughout the execution of the
10367 loop, we can create the init_addr and permutation mask at the loop
10368 preheader. Otherwise, it needs to be created inside the loop.
10369 This can only occur when vectorizing memory accesses in the inner-loop
10370 nested within an outer-loop that is being vectorized. */
10372 if (nested_in_vect_loop
10373 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
10374 GET_MODE_SIZE (TYPE_MODE (vectype
))))
10376 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
10377 compute_in_loop
= true;
10380 bool diff_first_stmt_info
10381 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
10383 tree offset
= NULL_TREE
;
10384 if ((alignment_support_scheme
== dr_explicit_realign_optimized
10385 || alignment_support_scheme
== dr_explicit_realign
)
10386 && !compute_in_loop
)
10388 /* If we have different first_stmt_info, we can't set up realignment
10389 here, since we can't guarantee first_stmt_info DR has been
10390 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10391 distance from first_stmt_info DR instead as below. */
10394 if (!diff_first_stmt_info
)
10395 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
10396 &realignment_token
,
10397 alignment_support_scheme
, NULL_TREE
,
10399 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
10401 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
10402 offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
10404 gcc_assert (!first_stmt_info_for_drptr
);
10411 if (!known_eq (poffset
, 0))
10413 ? size_binop (PLUS_EXPR
, offset
, size_int (poffset
))
10414 : size_int (poffset
));
10417 tree vec_offset
= NULL_TREE
;
10418 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10420 aggr_type
= NULL_TREE
;
10423 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
10425 aggr_type
= elem_type
;
10427 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, gsi
, &gs_info
,
10428 &bump
, &vec_offset
, loop_lens
);
10432 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
10433 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
10435 aggr_type
= vectype
;
10436 bump
= vect_get_data_ptr_increment (vinfo
, gsi
, dr_info
, aggr_type
,
10437 memory_access_type
, loop_lens
);
10440 auto_vec
<tree
> vec_offsets
;
10441 auto_vec
<tree
> vec_masks
;
10442 if (mask
&& !costing_p
)
10445 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[mask_index
],
10448 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
, mask
,
10449 &vec_masks
, mask_vectype
);
10452 tree vec_mask
= NULL_TREE
;
10453 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
10455 gcc_assert (alignment_support_scheme
== dr_aligned
10456 || alignment_support_scheme
== dr_unaligned_supported
);
10457 gcc_assert (grouped_load
&& !slp
);
10459 unsigned int inside_cost
= 0, prologue_cost
= 0;
10460 for (j
= 0; j
< ncopies
; j
++)
10464 /* An IFN_LOAD_LANES will load all its vector results,
10465 regardless of which ones we actually need. Account
10466 for the cost of unused results. */
10467 if (first_stmt_info
== stmt_info
)
10469 unsigned int gaps
= DR_GROUP_SIZE (first_stmt_info
);
10470 stmt_vec_info next_stmt_info
= first_stmt_info
;
10474 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
10476 while (next_stmt_info
);
10479 if (dump_enabled_p ())
10480 dump_printf_loc (MSG_NOTE
, vect_location
,
10481 "vect_model_load_cost: %d "
10482 "unused vectors.\n",
10484 vect_get_load_cost (vinfo
, stmt_info
, gaps
,
10485 alignment_support_scheme
,
10486 misalignment
, false, &inside_cost
,
10487 &prologue_cost
, cost_vec
, cost_vec
,
10491 vect_get_load_cost (vinfo
, stmt_info
, 1, alignment_support_scheme
,
10492 misalignment
, false, &inside_cost
,
10493 &prologue_cost
, cost_vec
, cost_vec
, true);
10497 /* 1. Create the vector or array pointer update chain. */
10500 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
10501 at_loop
, offset
, &dummy
, gsi
,
10502 &ptr_incr
, false, bump
);
10505 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
10506 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
10510 vec_mask
= vec_masks
[j
];
10512 tree vec_array
= create_vector_array (vectype
, vec_num
);
10514 tree final_mask
= NULL_TREE
;
10515 tree final_len
= NULL_TREE
;
10516 tree bias
= NULL_TREE
;
10518 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
10519 ncopies
, vectype
, j
);
10521 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
10524 if (lanes_ifn
== IFN_MASK_LEN_LOAD_LANES
)
10527 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
10528 ncopies
, vectype
, j
, 1);
10530 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
10531 signed char biasval
10532 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
10533 bias
= build_int_cst (intQI_type_node
, biasval
);
10536 mask_vectype
= truth_type_for (vectype
);
10537 final_mask
= build_minus_one_cst (mask_vectype
);
10542 if (final_len
&& final_mask
)
10545 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10546 VEC_MASK, LEN, BIAS). */
10547 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
10548 tree alias_ptr
= build_int_cst (ref_type
, align
);
10549 call
= gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES
, 5,
10550 dataref_ptr
, alias_ptr
,
10551 final_mask
, final_len
, bias
);
10553 else if (final_mask
)
10556 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10558 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
10559 tree alias_ptr
= build_int_cst (ref_type
, align
);
10560 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
10561 dataref_ptr
, alias_ptr
,
10567 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10568 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
10569 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
10571 gimple_call_set_lhs (call
, vec_array
);
10572 gimple_call_set_nothrow (call
, true);
10573 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
10575 dr_chain
.create (vec_num
);
10576 /* Extract each vector into an SSA_NAME. */
10577 for (i
= 0; i
< vec_num
; i
++)
10579 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
10581 dr_chain
.quick_push (new_temp
);
10584 /* Record the mapping between SSA_NAMEs and statements. */
10585 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
10587 /* Record that VEC_ARRAY is now dead. */
10588 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
10590 dr_chain
.release ();
10592 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10595 if (costing_p
&& dump_enabled_p ())
10596 dump_printf_loc (MSG_NOTE
, vect_location
,
10597 "vect_model_load_cost: inside_cost = %u, "
10598 "prologue_cost = %u .\n",
10599 inside_cost
, prologue_cost
);
10604 if (memory_access_type
== VMAT_GATHER_SCATTER
)
10606 gcc_assert (alignment_support_scheme
== dr_aligned
10607 || alignment_support_scheme
== dr_unaligned_supported
);
10608 gcc_assert (!grouped_load
&& !slp_perm
);
10610 unsigned int inside_cost
= 0, prologue_cost
= 0;
10611 for (j
= 0; j
< ncopies
; j
++)
10613 /* 1. Create the vector or array pointer update chain. */
10614 if (j
== 0 && !costing_p
)
10616 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10617 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
10618 slp_node
, &gs_info
, &dataref_ptr
,
10622 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
10623 at_loop
, offset
, &dummy
, gsi
,
10624 &ptr_incr
, false, bump
);
10626 else if (!costing_p
)
10628 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
10629 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10630 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10631 gsi
, stmt_info
, bump
);
10634 if (mask
&& !costing_p
)
10635 vec_mask
= vec_masks
[j
];
10637 gimple
*new_stmt
= NULL
;
10638 for (i
= 0; i
< vec_num
; i
++)
10640 tree final_mask
= NULL_TREE
;
10641 tree final_len
= NULL_TREE
;
10642 tree bias
= NULL_TREE
;
10647 = vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
10648 vec_num
* ncopies
, vectype
,
10651 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
10652 final_mask
, vec_mask
, gsi
);
10654 if (i
> 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10655 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10656 gsi
, stmt_info
, bump
);
10659 /* 2. Create the vector-load in the loop. */
10660 unsigned HOST_WIDE_INT align
;
10661 if (gs_info
.ifn
!= IFN_LAST
)
10665 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
10667 = record_stmt_cost (cost_vec
, cnunits
, scalar_load
,
10668 stmt_info
, 0, vect_body
);
10671 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10672 vec_offset
= vec_offsets
[vec_num
* j
+ i
];
10673 tree zero
= build_zero_cst (vectype
);
10674 tree scale
= size_int (gs_info
.scale
);
10676 if (gs_info
.ifn
== IFN_MASK_LEN_GATHER_LOAD
)
10680 = vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
10681 vec_num
* ncopies
, vectype
,
10682 vec_num
* j
+ i
, 1);
10685 = build_int_cst (sizetype
,
10686 TYPE_VECTOR_SUBPARTS (vectype
));
10687 signed char biasval
10688 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
10689 bias
= build_int_cst (intQI_type_node
, biasval
);
10692 mask_vectype
= truth_type_for (vectype
);
10693 final_mask
= build_minus_one_cst (mask_vectype
);
10698 if (final_len
&& final_mask
)
10700 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD
, 7,
10701 dataref_ptr
, vec_offset
,
10702 scale
, zero
, final_mask
,
10704 else if (final_mask
)
10705 call
= gimple_build_call_internal (IFN_MASK_GATHER_LOAD
, 5,
10706 dataref_ptr
, vec_offset
,
10707 scale
, zero
, final_mask
);
10709 call
= gimple_build_call_internal (IFN_GATHER_LOAD
, 4,
10710 dataref_ptr
, vec_offset
,
10712 gimple_call_set_nothrow (call
, true);
10714 data_ref
= NULL_TREE
;
10718 /* Emulated gather-scatter. */
10719 gcc_assert (!final_mask
);
10720 unsigned HOST_WIDE_INT const_nunits
= nunits
.to_constant ();
10723 /* For emulated gathers N offset vector element
10724 offset add is consumed by the load). */
10725 inside_cost
= record_stmt_cost (cost_vec
, const_nunits
,
10726 vec_to_scalar
, stmt_info
,
10728 /* N scalar loads plus gathering them into a
10731 = record_stmt_cost (cost_vec
, const_nunits
, scalar_load
,
10732 stmt_info
, 0, vect_body
);
10734 = record_stmt_cost (cost_vec
, 1, vec_construct
,
10735 stmt_info
, 0, vect_body
);
10738 unsigned HOST_WIDE_INT const_offset_nunits
10739 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
)
10741 vec
<constructor_elt
, va_gc
> *ctor_elts
;
10742 vec_alloc (ctor_elts
, const_nunits
);
10743 gimple_seq stmts
= NULL
;
10744 /* We support offset vectors with more elements
10745 than the data vector for now. */
10746 unsigned HOST_WIDE_INT factor
10747 = const_offset_nunits
/ const_nunits
;
10748 vec_offset
= vec_offsets
[j
/ factor
];
10749 unsigned elt_offset
= (j
% factor
) * const_nunits
;
10750 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
10751 tree scale
= size_int (gs_info
.scale
);
10752 align
= get_object_alignment (DR_REF (first_dr_info
->dr
));
10753 tree ltype
= build_aligned_type (TREE_TYPE (vectype
), align
);
10754 for (unsigned k
= 0; k
< const_nunits
; ++k
)
10756 tree boff
= size_binop (MULT_EXPR
, TYPE_SIZE (idx_type
),
10757 bitsize_int (k
+ elt_offset
));
10759 = gimple_build (&stmts
, BIT_FIELD_REF
, idx_type
,
10760 vec_offset
, TYPE_SIZE (idx_type
), boff
);
10761 idx
= gimple_convert (&stmts
, sizetype
, idx
);
10762 idx
= gimple_build (&stmts
, MULT_EXPR
, sizetype
, idx
,
10764 tree ptr
= gimple_build (&stmts
, PLUS_EXPR
,
10765 TREE_TYPE (dataref_ptr
),
10767 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
10768 tree elt
= make_ssa_name (TREE_TYPE (vectype
));
10769 tree ref
= build2 (MEM_REF
, ltype
, ptr
,
10770 build_int_cst (ref_type
, 0));
10771 new_stmt
= gimple_build_assign (elt
, ref
);
10772 gimple_set_vuse (new_stmt
, gimple_vuse (gsi_stmt (*gsi
)));
10773 gimple_seq_add_stmt (&stmts
, new_stmt
);
10774 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
, elt
);
10776 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
10777 new_stmt
= gimple_build_assign (
10778 NULL_TREE
, build_constructor (vectype
, ctor_elts
));
10779 data_ref
= NULL_TREE
;
10782 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10783 /* DATA_REF is null if we've already built the statement. */
10786 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
10787 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
10789 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
10790 gimple_set_lhs (new_stmt
, new_temp
);
10791 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10793 /* Store vector loads in the corresponding SLP_NODE. */
10795 slp_node
->push_vec_def (new_stmt
);
10798 if (!slp
&& !costing_p
)
10799 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10802 if (!slp
&& !costing_p
)
10803 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10805 if (costing_p
&& dump_enabled_p ())
10806 dump_printf_loc (MSG_NOTE
, vect_location
,
10807 "vect_model_load_cost: inside_cost = %u, "
10808 "prologue_cost = %u .\n",
10809 inside_cost
, prologue_cost
);
10813 poly_uint64 group_elt
= 0;
10814 unsigned int inside_cost
= 0, prologue_cost
= 0;
10815 for (j
= 0; j
< ncopies
; j
++)
10817 /* 1. Create the vector or array pointer update chain. */
10818 if (j
== 0 && !costing_p
)
10820 bool simd_lane_access_p
10821 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
10822 if (simd_lane_access_p
10823 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
10824 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
10825 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
10826 && integer_zerop (DR_INIT (first_dr_info
->dr
))
10827 && alias_sets_conflict_p (get_alias_set (aggr_type
),
10828 get_alias_set (TREE_TYPE (ref_type
)))
10829 && (alignment_support_scheme
== dr_aligned
10830 || alignment_support_scheme
== dr_unaligned_supported
))
10832 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
10833 dataref_offset
= build_int_cst (ref_type
, 0);
10835 else if (diff_first_stmt_info
)
10838 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
10839 aggr_type
, at_loop
, offset
, &dummy
,
10840 gsi
, &ptr_incr
, simd_lane_access_p
,
10842 /* Adjust the pointer by the difference to first_stmt. */
10843 data_reference_p ptrdr
10844 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
10846 = fold_convert (sizetype
,
10847 size_binop (MINUS_EXPR
,
10848 DR_INIT (first_dr_info
->dr
),
10850 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
10852 if (alignment_support_scheme
== dr_explicit_realign
)
10854 msq
= vect_setup_realignment (vinfo
,
10855 first_stmt_info_for_drptr
, gsi
,
10856 &realignment_token
,
10857 alignment_support_scheme
,
10858 dataref_ptr
, &at_loop
);
10859 gcc_assert (!compute_in_loop
);
10864 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
10866 offset
, &dummy
, gsi
, &ptr_incr
,
10867 simd_lane_access_p
, bump
);
10869 vec_mask
= vec_masks
[0];
10871 else if (!costing_p
)
10873 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
10874 if (dataref_offset
)
10875 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
10878 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
10881 vec_mask
= vec_masks
[j
];
10884 if (grouped_load
|| slp_perm
)
10885 dr_chain
.create (vec_num
);
10887 gimple
*new_stmt
= NULL
;
10888 for (i
= 0; i
< vec_num
; i
++)
10890 tree final_mask
= NULL_TREE
;
10891 tree final_len
= NULL_TREE
;
10892 tree bias
= NULL_TREE
;
10896 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
10897 vec_num
* ncopies
, vectype
,
10900 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
10901 final_mask
, vec_mask
, gsi
);
10904 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10905 gsi
, stmt_info
, bump
);
10908 /* 2. Create the vector-load in the loop. */
10909 switch (alignment_support_scheme
)
10912 case dr_unaligned_supported
:
10917 unsigned int misalign
;
10918 unsigned HOST_WIDE_INT align
;
10919 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
10920 if (alignment_support_scheme
== dr_aligned
)
10922 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
10925 = dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
10929 misalign
= misalignment
;
10930 if (dataref_offset
== NULL_TREE
10931 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
10932 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
10934 align
= least_bit_hwi (misalign
| align
);
10936 /* Compute IFN when LOOP_LENS or final_mask valid. */
10937 machine_mode vmode
= TYPE_MODE (vectype
);
10938 machine_mode new_vmode
= vmode
;
10939 internal_fn partial_ifn
= IFN_LAST
;
10942 opt_machine_mode new_ovmode
10943 = get_len_load_store_mode (vmode
, true, &partial_ifn
);
10944 new_vmode
= new_ovmode
.require ();
10946 = (new_ovmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vmode
);
10947 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
10948 vec_num
* ncopies
, vectype
,
10949 vec_num
* j
+ i
, factor
);
10951 else if (final_mask
)
10953 if (!can_vec_mask_load_store_p (
10954 vmode
, TYPE_MODE (TREE_TYPE (final_mask
)), true,
10956 gcc_unreachable ();
10959 if (partial_ifn
== IFN_MASK_LEN_LOAD
)
10963 /* Pass VF value to 'len' argument of
10964 MASK_LEN_LOAD if LOOP_LENS is invalid. */
10965 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
10969 /* Pass all ones value to 'mask' argument of
10970 MASK_LEN_LOAD if final_mask is invalid. */
10971 mask_vectype
= truth_type_for (vectype
);
10972 final_mask
= build_minus_one_cst (mask_vectype
);
10977 signed char biasval
10978 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
10980 bias
= build_int_cst (intQI_type_node
, biasval
);
10985 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
10987 if (partial_ifn
== IFN_MASK_LEN_LOAD
)
10988 call
= gimple_build_call_internal (IFN_MASK_LEN_LOAD
, 5,
10990 final_mask
, final_len
,
10993 call
= gimple_build_call_internal (IFN_LEN_LOAD
, 4,
10996 gimple_call_set_nothrow (call
, true);
10998 data_ref
= NULL_TREE
;
11000 /* Need conversion if it's wrapped with VnQI. */
11001 if (vmode
!= new_vmode
)
11003 tree new_vtype
= build_vector_type_for_mode (
11004 unsigned_intQI_type_node
, new_vmode
);
11006 = vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
11007 gimple_set_lhs (call
, var
);
11008 vect_finish_stmt_generation (vinfo
, stmt_info
, call
,
11010 tree op
= build1 (VIEW_CONVERT_EXPR
, vectype
, var
);
11011 new_stmt
= gimple_build_assign (vec_dest
,
11012 VIEW_CONVERT_EXPR
, op
);
11015 else if (final_mask
)
11017 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
11018 gcall
*call
= gimple_build_call_internal (IFN_MASK_LOAD
, 3,
11021 gimple_call_set_nothrow (call
, true);
11023 data_ref
= NULL_TREE
;
11027 tree ltype
= vectype
;
11028 tree new_vtype
= NULL_TREE
;
11029 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
11030 unsigned int vect_align
11031 = vect_known_alignment_in_bytes (first_dr_info
, vectype
);
11032 unsigned int scalar_dr_size
11033 = vect_get_scalar_dr_size (first_dr_info
);
11034 /* If there's no peeling for gaps but we have a gap
11035 with slp loads then load the lower half of the
11036 vector only. See get_group_load_store_type for
11037 when we apply this optimization. */
11040 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) && gap
!= 0
11041 && known_eq (nunits
, (group_size
- gap
) * 2)
11042 && known_eq (nunits
, group_size
)
11043 && gap
>= (vect_align
/ scalar_dr_size
))
11047 = vector_vector_composition_type (vectype
, 2,
11049 if (new_vtype
!= NULL_TREE
)
11050 ltype
= half_vtype
;
11053 = (dataref_offset
? dataref_offset
11054 : build_int_cst (ref_type
, 0));
11055 if (ltype
!= vectype
11056 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
11058 unsigned HOST_WIDE_INT gap_offset
11059 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
11060 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
11061 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
11064 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
11065 if (alignment_support_scheme
== dr_aligned
)
11068 TREE_TYPE (data_ref
)
11069 = build_aligned_type (TREE_TYPE (data_ref
),
11070 align
* BITS_PER_UNIT
);
11071 if (ltype
!= vectype
)
11073 vect_copy_ref_info (data_ref
,
11074 DR_REF (first_dr_info
->dr
));
11075 tree tem
= make_ssa_name (ltype
);
11076 new_stmt
= gimple_build_assign (tem
, data_ref
);
11077 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
11080 vec
<constructor_elt
, va_gc
> *v
;
11082 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
11084 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
11085 build_zero_cst (ltype
));
11086 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
11090 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
11091 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
11092 build_zero_cst (ltype
));
11094 gcc_assert (new_vtype
!= NULL_TREE
);
11095 if (new_vtype
== vectype
)
11096 new_stmt
= gimple_build_assign (
11097 vec_dest
, build_constructor (vectype
, v
));
11100 tree new_vname
= make_ssa_name (new_vtype
);
11101 new_stmt
= gimple_build_assign (
11102 new_vname
, build_constructor (new_vtype
, v
));
11103 vect_finish_stmt_generation (vinfo
, stmt_info
,
11105 new_stmt
= gimple_build_assign (
11107 build1 (VIEW_CONVERT_EXPR
, vectype
, new_vname
));
11113 case dr_explicit_realign
:
11119 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
11121 if (compute_in_loop
)
11122 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
11123 &realignment_token
,
11124 dr_explicit_realign
,
11125 dataref_ptr
, NULL
);
11127 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
11128 ptr
= copy_ssa_name (dataref_ptr
);
11130 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
11131 // For explicit realign the target alignment should be
11132 // known at compile time.
11133 unsigned HOST_WIDE_INT align
11134 = DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
11135 new_stmt
= gimple_build_assign (
11136 ptr
, BIT_AND_EXPR
, dataref_ptr
,
11137 build_int_cst (TREE_TYPE (dataref_ptr
),
11138 -(HOST_WIDE_INT
) align
));
11139 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11141 = build2 (MEM_REF
, vectype
, ptr
, build_int_cst (ref_type
, 0));
11142 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
11143 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11144 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
11145 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11146 gimple_assign_set_lhs (new_stmt
, new_temp
);
11147 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
11148 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11151 bump
= size_binop (MULT_EXPR
, vs
, TYPE_SIZE_UNIT (elem_type
));
11152 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
11153 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
, stmt_info
,
11155 new_stmt
= gimple_build_assign (
11156 NULL_TREE
, BIT_AND_EXPR
, ptr
,
11157 build_int_cst (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
11158 if (TREE_CODE (ptr
) == SSA_NAME
)
11159 ptr
= copy_ssa_name (ptr
, new_stmt
);
11161 ptr
= make_ssa_name (TREE_TYPE (ptr
), new_stmt
);
11162 gimple_assign_set_lhs (new_stmt
, ptr
);
11163 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11165 = build2 (MEM_REF
, vectype
, ptr
, build_int_cst (ref_type
, 0));
11168 case dr_explicit_realign_optimized
:
11172 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
11173 new_temp
= copy_ssa_name (dataref_ptr
);
11175 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
11176 // We should only be doing this if we know the target
11177 // alignment at compile time.
11178 unsigned HOST_WIDE_INT align
11179 = DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
11180 new_stmt
= gimple_build_assign (
11181 new_temp
, BIT_AND_EXPR
, dataref_ptr
,
11182 build_int_cst (TREE_TYPE (dataref_ptr
),
11183 -(HOST_WIDE_INT
) align
));
11184 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11185 data_ref
= build2 (MEM_REF
, vectype
, new_temp
,
11186 build_int_cst (ref_type
, 0));
11190 gcc_unreachable ();
11193 /* One common place to cost the above vect load for different
11194 alignment support schemes. */
11197 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11198 only need to take care of the first stmt, whose
11199 stmt_info is first_stmt_info, vec_num iterating on it
11200 will cover the cost for the remaining, it's consistent
11201 with transforming. For the prologue cost for realign,
11202 we only need to count it once for the whole group. */
11203 bool first_stmt_info_p
= first_stmt_info
== stmt_info
;
11204 bool add_realign_cost
= first_stmt_info_p
&& i
== 0;
11205 if (memory_access_type
== VMAT_CONTIGUOUS
11206 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
11207 || (memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
11208 && (!grouped_load
|| first_stmt_info_p
)))
11209 vect_get_load_cost (vinfo
, stmt_info
, 1,
11210 alignment_support_scheme
, misalignment
,
11211 add_realign_cost
, &inside_cost
,
11212 &prologue_cost
, cost_vec
, cost_vec
, true);
11216 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11217 /* DATA_REF is null if we've already built the statement. */
11220 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
11221 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
11223 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11224 gimple_set_lhs (new_stmt
, new_temp
);
11225 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11228 /* 3. Handle explicit realignment if necessary/supported.
11230 vec_dest = realign_load (msq, lsq, realignment_token) */
11232 && (alignment_support_scheme
== dr_explicit_realign_optimized
11233 || alignment_support_scheme
== dr_explicit_realign
))
11235 lsq
= gimple_assign_lhs (new_stmt
);
11236 if (!realignment_token
)
11237 realignment_token
= dataref_ptr
;
11238 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11239 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
, msq
,
11240 lsq
, realignment_token
);
11241 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11242 gimple_assign_set_lhs (new_stmt
, new_temp
);
11243 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11245 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
11248 if (i
== vec_num
- 1 && j
== ncopies
- 1)
11249 add_phi_arg (phi
, lsq
, loop_latch_edge (containing_loop
),
11255 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
11258 inside_cost
= record_stmt_cost (cost_vec
, 1, vec_perm
,
11259 stmt_info
, 0, vect_body
);
11262 tree perm_mask
= perm_mask_for_reverse (vectype
);
11263 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
11264 perm_mask
, stmt_info
, gsi
);
11265 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
11269 /* Collect vector loads and later create their permutation in
11270 vect_transform_grouped_load (). */
11271 if (!costing_p
&& (grouped_load
|| slp_perm
))
11272 dr_chain
.quick_push (new_temp
);
11274 /* Store vector loads in the corresponding SLP_NODE. */
11275 if (!costing_p
&& slp
&& !slp_perm
)
11276 slp_node
->push_vec_def (new_stmt
);
11278 /* With SLP permutation we load the gaps as well, without
11279 we need to skip the gaps after we manage to fully load
11280 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11281 group_elt
+= nunits
;
11283 && maybe_ne (group_gap_adj
, 0U)
11285 && known_eq (group_elt
, group_size
- group_gap_adj
))
11287 poly_wide_int bump_val
11288 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
)) * group_gap_adj
);
11289 if (tree_int_cst_sgn (vect_dr_behavior (vinfo
, dr_info
)->step
)
11291 bump_val
= -bump_val
;
11292 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
11293 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11298 /* Bump the vector pointer to account for a gap or for excess
11299 elements loaded for a permuted SLP load. */
11301 && maybe_ne (group_gap_adj
, 0U)
11304 poly_wide_int bump_val
11305 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
)) * group_gap_adj
);
11306 if (tree_int_cst_sgn (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
11307 bump_val
= -bump_val
;
11308 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
11309 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11313 if (slp
&& !slp_perm
)
11319 /* For SLP we know we've seen all possible uses of dr_chain so
11320 direct vect_transform_slp_perm_load to DCE the unused parts.
11321 ??? This is a hack to prevent compile-time issues as seen
11322 in PR101120 and friends. */
11325 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, nullptr, vf
,
11326 true, &n_perms
, nullptr);
11327 inside_cost
= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
11328 stmt_info
, 0, vect_body
);
11332 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
11333 gsi
, vf
, false, &n_perms
,
11342 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
);
11343 /* We assume that the cost of a single load-lanes instruction
11344 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11345 If a grouped access is instead being provided by a
11346 load-and-permute operation, include the cost of the
11348 if (costing_p
&& first_stmt_info
== stmt_info
)
11350 /* Uses an even and odd extract operations or shuffle
11351 operations for each needed permute. */
11352 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
11353 int nstmts
= ceil_log2 (group_size
) * group_size
;
11354 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
11355 stmt_info
, 0, vect_body
);
11357 if (dump_enabled_p ())
11358 dump_printf_loc (MSG_NOTE
, vect_location
,
11359 "vect_model_load_cost:"
11360 "strided group_size = %d .\n",
11363 else if (!costing_p
)
11365 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
11367 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11370 else if (!costing_p
)
11371 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
11373 dr_chain
.release ();
11375 if (!slp
&& !costing_p
)
11376 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11380 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS
11381 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
11382 || memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
);
11383 if (dump_enabled_p ())
11384 dump_printf_loc (MSG_NOTE
, vect_location
,
11385 "vect_model_load_cost: inside_cost = %u, "
11386 "prologue_cost = %u .\n",
11387 inside_cost
, prologue_cost
);
11393 /* Function vect_is_simple_cond.
11396 LOOP - the loop that is being vectorized.
11397 COND - Condition that is checked for simple use.
11400 *COMP_VECTYPE - the vector type for the comparison.
11401 *DTS - The def types for the arguments of the comparison
11403 Returns whether a COND can be vectorized. Checks whether
11404 condition operands are supportable using vec_is_simple_use. */
11407 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
11408 slp_tree slp_node
, tree
*comp_vectype
,
11409 enum vect_def_type
*dts
, tree vectype
)
11412 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
11416 if (TREE_CODE (cond
) == SSA_NAME
11417 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
11419 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
11420 &slp_op
, &dts
[0], comp_vectype
)
11422 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
11427 if (!COMPARISON_CLASS_P (cond
))
11430 lhs
= TREE_OPERAND (cond
, 0);
11431 rhs
= TREE_OPERAND (cond
, 1);
11433 if (TREE_CODE (lhs
) == SSA_NAME
)
11435 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
11436 &lhs
, &slp_op
, &dts
[0], &vectype1
))
11439 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
11440 || TREE_CODE (lhs
) == FIXED_CST
)
11441 dts
[0] = vect_constant_def
;
11445 if (TREE_CODE (rhs
) == SSA_NAME
)
11447 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
11448 &rhs
, &slp_op
, &dts
[1], &vectype2
))
11451 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
11452 || TREE_CODE (rhs
) == FIXED_CST
)
11453 dts
[1] = vect_constant_def
;
11457 if (vectype1
&& vectype2
11458 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
11459 TYPE_VECTOR_SUBPARTS (vectype2
)))
11462 *comp_vectype
= vectype1
? vectype1
: vectype2
;
11463 /* Invariant comparison. */
11464 if (! *comp_vectype
)
11466 tree scalar_type
= TREE_TYPE (lhs
);
11467 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11468 *comp_vectype
= truth_type_for (vectype
);
11471 /* If we can widen the comparison to match vectype do so. */
11472 if (INTEGRAL_TYPE_P (scalar_type
)
11474 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
11475 TYPE_SIZE (TREE_TYPE (vectype
))))
11476 scalar_type
= build_nonstandard_integer_type
11477 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
11478 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
11486 /* vectorizable_condition.
11488 Check if STMT_INFO is conditional modify expression that can be vectorized.
11489 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11490 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
11493 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
11495 Return true if STMT_INFO is vectorizable in this way. */
11498 vectorizable_condition (vec_info
*vinfo
,
11499 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11501 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
11503 tree scalar_dest
= NULL_TREE
;
11504 tree vec_dest
= NULL_TREE
;
11505 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
11506 tree then_clause
, else_clause
;
11507 tree comp_vectype
= NULL_TREE
;
11508 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
11509 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
11512 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
11513 enum vect_def_type dts
[4]
11514 = {vect_unknown_def_type
, vect_unknown_def_type
,
11515 vect_unknown_def_type
, vect_unknown_def_type
};
11519 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
11521 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
11522 vec
<tree
> vec_oprnds0
= vNULL
;
11523 vec
<tree
> vec_oprnds1
= vNULL
;
11524 vec
<tree
> vec_oprnds2
= vNULL
;
11525 vec
<tree
> vec_oprnds3
= vNULL
;
11527 bool masked
= false;
11529 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
11532 /* Is vectorizable conditional operation? */
11533 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
11537 code
= gimple_assign_rhs_code (stmt
);
11538 if (code
!= COND_EXPR
)
11541 stmt_vec_info reduc_info
= NULL
;
11542 int reduc_index
= -1;
11543 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
11545 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
11550 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
11551 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
11552 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
11553 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
11554 || reduc_index
!= -1);
11558 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
11562 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11563 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
11568 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
11572 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
11576 gcc_assert (ncopies
>= 1);
11577 if (for_reduction
&& ncopies
> 1)
11578 return false; /* FORNOW */
11580 cond_expr
= gimple_assign_rhs1 (stmt
);
11582 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
11583 &comp_vectype
, &dts
[0], vectype
)
11587 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
11588 slp_tree then_slp_node
, else_slp_node
;
11589 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
11590 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
11592 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
11593 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
11596 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
11599 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
11602 masked
= !COMPARISON_CLASS_P (cond_expr
);
11603 vec_cmp_type
= truth_type_for (comp_vectype
);
11605 if (vec_cmp_type
== NULL_TREE
)
11608 cond_code
= TREE_CODE (cond_expr
);
11611 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
11612 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
11615 /* For conditional reductions, the "then" value needs to be the candidate
11616 value calculated by this iteration while the "else" value needs to be
11617 the result carried over from previous iterations. If the COND_EXPR
11618 is the other way around, we need to swap it. */
11619 bool must_invert_cmp_result
= false;
11620 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
11623 must_invert_cmp_result
= true;
11626 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
11627 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
11628 if (new_code
== ERROR_MARK
)
11629 must_invert_cmp_result
= true;
11632 cond_code
= new_code
;
11633 /* Make sure we don't accidentally use the old condition. */
11634 cond_expr
= NULL_TREE
;
11637 std::swap (then_clause
, else_clause
);
11640 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
11642 /* Boolean values may have another representation in vectors
11643 and therefore we prefer bit operations over comparison for
11644 them (which also works for scalar masks). We store opcodes
11645 to use in bitop1 and bitop2. Statement is vectorized as
11646 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
11647 depending on bitop1 and bitop2 arity. */
11651 bitop1
= BIT_NOT_EXPR
;
11652 bitop2
= BIT_AND_EXPR
;
11655 bitop1
= BIT_NOT_EXPR
;
11656 bitop2
= BIT_IOR_EXPR
;
11659 bitop1
= BIT_NOT_EXPR
;
11660 bitop2
= BIT_AND_EXPR
;
11661 std::swap (cond_expr0
, cond_expr1
);
11664 bitop1
= BIT_NOT_EXPR
;
11665 bitop2
= BIT_IOR_EXPR
;
11666 std::swap (cond_expr0
, cond_expr1
);
11669 bitop1
= BIT_XOR_EXPR
;
11672 bitop1
= BIT_XOR_EXPR
;
11673 bitop2
= BIT_NOT_EXPR
;
11678 cond_code
= SSA_NAME
;
11681 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
11682 && reduction_type
== EXTRACT_LAST_REDUCTION
11683 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
11685 if (dump_enabled_p ())
11686 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11687 "reduction comparison operation not supported.\n");
11693 if (bitop1
!= NOP_EXPR
)
11695 machine_mode mode
= TYPE_MODE (comp_vectype
);
11698 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
11699 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
11702 if (bitop2
!= NOP_EXPR
)
11704 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
11706 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
11711 vect_cost_for_stmt kind
= vector_stmt
;
11712 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
11713 /* Count one reduction-like operation per vector. */
11714 kind
= vec_to_scalar
;
11715 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
)
11717 || (!expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
,
11719 || !expand_vec_cond_expr_p (vectype
, vec_cmp_type
,
11724 && (!vect_maybe_update_slp_op_vectype
11725 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
11727 && !vect_maybe_update_slp_op_vectype
11728 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
11729 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
11730 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
11732 if (dump_enabled_p ())
11733 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11734 "incompatible vector types for invariants\n");
11738 if (loop_vinfo
&& for_reduction
11739 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
11741 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
11743 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST
,
11744 vectype
, OPTIMIZE_FOR_SPEED
))
11745 vect_record_loop_len (loop_vinfo
,
11746 &LOOP_VINFO_LENS (loop_vinfo
),
11747 ncopies
* vec_num
, vectype
, 1);
11749 vect_record_loop_mask (loop_vinfo
,
11750 &LOOP_VINFO_MASKS (loop_vinfo
),
11751 ncopies
* vec_num
, vectype
, NULL
);
11753 /* Extra inactive lanes should be safe for vect_nested_cycle. */
11754 else if (STMT_VINFO_DEF_TYPE (reduc_info
) != vect_nested_cycle
)
11756 if (dump_enabled_p ())
11757 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11758 "conditional reduction prevents the use"
11759 " of partial vectors.\n");
11760 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
11764 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
11765 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
11773 scalar_dest
= gimple_assign_lhs (stmt
);
11774 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
11775 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11777 bool swap_cond_operands
= false;
11779 /* See whether another part of the vectorized code applies a loop
11780 mask to the condition, or to its inverse. */
11782 vec_loop_masks
*masks
= NULL
;
11783 vec_loop_lens
*lens
= NULL
;
11784 if (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
))
11786 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
11787 lens
= &LOOP_VINFO_LENS (loop_vinfo
);
11789 else if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
11791 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
11792 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
11795 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
11796 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
11797 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
11800 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
11801 tree_code orig_code
= cond
.code
;
11802 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
11803 if (!masked
&& loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
11805 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
11806 cond_code
= cond
.code
;
11807 swap_cond_operands
= true;
11811 /* Try the inverse of the current mask. We check if the
11812 inverse mask is live and if so we generate a negate of
11813 the current mask such that we still honor NaNs. */
11814 cond
.inverted_p
= true;
11815 cond
.code
= orig_code
;
11816 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
11818 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
11819 cond_code
= cond
.code
;
11820 swap_cond_operands
= true;
11821 must_invert_cmp_result
= true;
11828 /* Handle cond expr. */
11830 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
11831 cond_expr
, &vec_oprnds0
, comp_vectype
,
11832 then_clause
, &vec_oprnds2
, vectype
,
11833 reduction_type
!= EXTRACT_LAST_REDUCTION
11834 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
11836 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
11837 cond_expr0
, &vec_oprnds0
, comp_vectype
,
11838 cond_expr1
, &vec_oprnds1
, comp_vectype
,
11839 then_clause
, &vec_oprnds2
, vectype
,
11840 reduction_type
!= EXTRACT_LAST_REDUCTION
11841 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
11843 /* Arguments are ready. Create the new vector stmt. */
11844 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
11846 vec_then_clause
= vec_oprnds2
[i
];
11847 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
11848 vec_else_clause
= vec_oprnds3
[i
];
11850 if (swap_cond_operands
)
11851 std::swap (vec_then_clause
, vec_else_clause
);
11854 vec_compare
= vec_cond_lhs
;
11857 vec_cond_rhs
= vec_oprnds1
[i
];
11858 if (bitop1
== NOP_EXPR
)
11860 gimple_seq stmts
= NULL
;
11861 vec_compare
= gimple_build (&stmts
, cond_code
, vec_cmp_type
,
11862 vec_cond_lhs
, vec_cond_rhs
);
11863 gsi_insert_before (gsi
, stmts
, GSI_SAME_STMT
);
11867 new_temp
= make_ssa_name (vec_cmp_type
);
11869 if (bitop1
== BIT_NOT_EXPR
)
11870 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
11874 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
11876 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11877 if (bitop2
== NOP_EXPR
)
11878 vec_compare
= new_temp
;
11879 else if (bitop2
== BIT_NOT_EXPR
11880 && reduction_type
!= EXTRACT_LAST_REDUCTION
)
11882 /* Instead of doing ~x ? y : z do x ? z : y. */
11883 vec_compare
= new_temp
;
11884 std::swap (vec_then_clause
, vec_else_clause
);
11888 vec_compare
= make_ssa_name (vec_cmp_type
);
11889 if (bitop2
== BIT_NOT_EXPR
)
11891 = gimple_build_assign (vec_compare
, bitop2
, new_temp
);
11894 = gimple_build_assign (vec_compare
, bitop2
,
11895 vec_cond_lhs
, new_temp
);
11896 vect_finish_stmt_generation (vinfo
, stmt_info
,
11902 /* If we decided to apply a loop mask to the result of the vector
11903 comparison, AND the comparison with the mask now. Later passes
11904 should then be able to reuse the AND results between mulitple
11908 for (int i = 0; i < 100; ++i)
11909 x[i] = y[i] ? z[i] : 10;
11911 results in following optimized GIMPLE:
11913 mask__35.8_43 = vect__4.7_41 != { 0, ... };
11914 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
11915 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
11916 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
11917 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
11918 vect_iftmp.11_47, { 10, ... }>;
11920 instead of using a masked and unmasked forms of
11921 vec != { 0, ... } (masked in the MASK_LOAD,
11922 unmasked in the VEC_COND_EXPR). */
11924 /* Force vec_compare to be an SSA_NAME rather than a comparison,
11925 in cases where that's necessary. */
11927 tree len
= NULL_TREE
, bias
= NULL_TREE
;
11928 if (masks
|| lens
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
11930 if (!is_gimple_val (vec_compare
))
11932 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
11933 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
11935 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11936 vec_compare
= vec_compare_name
;
11939 if (must_invert_cmp_result
)
11941 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
11942 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
11945 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11946 vec_compare
= vec_compare_name
;
11949 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST
,
11950 vectype
, OPTIMIZE_FOR_SPEED
))
11954 len
= vect_get_loop_len (loop_vinfo
, gsi
, lens
,
11955 vec_num
* ncopies
, vectype
, i
, 1);
11956 signed char biasval
11957 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
11958 bias
= build_int_cst (intQI_type_node
, biasval
);
11962 len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
11963 bias
= build_int_cst (intQI_type_node
, 0);
11969 = vect_get_loop_mask (loop_vinfo
, gsi
, masks
, vec_num
* ncopies
,
11971 tree tmp2
= make_ssa_name (vec_cmp_type
);
11973 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
11975 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
11976 vec_compare
= tmp2
;
11981 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
11983 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
11984 tree lhs
= gimple_get_lhs (old_stmt
);
11986 new_stmt
= gimple_build_call_internal
11987 (IFN_LEN_FOLD_EXTRACT_LAST
, 5, else_clause
, vec_compare
,
11988 vec_then_clause
, len
, bias
);
11990 new_stmt
= gimple_build_call_internal
11991 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
11993 gimple_call_set_lhs (new_stmt
, lhs
);
11994 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
11995 if (old_stmt
== gsi_stmt (*gsi
))
11996 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
11999 /* In this case we're moving the definition to later in the
12000 block. That doesn't matter because the only uses of the
12001 lhs are in phi statements. */
12002 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
12003 gsi_remove (&old_gsi
, true);
12004 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12009 new_temp
= make_ssa_name (vec_dest
);
12010 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
12011 vec_then_clause
, vec_else_clause
);
12012 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12015 slp_node
->push_vec_def (new_stmt
);
12017 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
12021 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
12023 vec_oprnds0
.release ();
12024 vec_oprnds1
.release ();
12025 vec_oprnds2
.release ();
12026 vec_oprnds3
.release ();
12031 /* vectorizable_comparison.
12033 Check if STMT_INFO is comparison expression that can be vectorized.
12034 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12035 comparison, put it in VEC_STMT, and insert it at GSI.
12037 Return true if STMT_INFO is vectorizable in this way. */
12040 vectorizable_comparison (vec_info
*vinfo
,
12041 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
12043 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
12045 tree lhs
, rhs1
, rhs2
;
12046 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
12047 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12048 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
12050 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
12051 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
12053 poly_uint64 nunits
;
12055 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
12057 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
12058 vec
<tree
> vec_oprnds0
= vNULL
;
12059 vec
<tree
> vec_oprnds1
= vNULL
;
12063 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
12066 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
12069 mask_type
= vectype
;
12070 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
12075 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
12077 gcc_assert (ncopies
>= 1);
12078 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
12081 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
12085 code
= gimple_assign_rhs_code (stmt
);
12087 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
12090 slp_tree slp_rhs1
, slp_rhs2
;
12091 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
12092 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
12095 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
12096 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
12099 if (vectype1
&& vectype2
12100 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
12101 TYPE_VECTOR_SUBPARTS (vectype2
)))
12104 vectype
= vectype1
? vectype1
: vectype2
;
12106 /* Invariant comparison. */
12109 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
12110 vectype
= mask_type
;
12112 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
12114 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
12117 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
12120 /* Can't compare mask and non-mask types. */
12121 if (vectype1
&& vectype2
12122 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
12125 /* Boolean values may have another representation in vectors
12126 and therefore we prefer bit operations over comparison for
12127 them (which also works for scalar masks). We store opcodes
12128 to use in bitop1 and bitop2. Statement is vectorized as
12129 BITOP2 (rhs1 BITOP1 rhs2) or
12130 rhs1 BITOP2 (BITOP1 rhs2)
12131 depending on bitop1 and bitop2 arity. */
12132 bool swap_p
= false;
12133 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
12135 if (code
== GT_EXPR
)
12137 bitop1
= BIT_NOT_EXPR
;
12138 bitop2
= BIT_AND_EXPR
;
12140 else if (code
== GE_EXPR
)
12142 bitop1
= BIT_NOT_EXPR
;
12143 bitop2
= BIT_IOR_EXPR
;
12145 else if (code
== LT_EXPR
)
12147 bitop1
= BIT_NOT_EXPR
;
12148 bitop2
= BIT_AND_EXPR
;
12151 else if (code
== LE_EXPR
)
12153 bitop1
= BIT_NOT_EXPR
;
12154 bitop2
= BIT_IOR_EXPR
;
12159 bitop1
= BIT_XOR_EXPR
;
12160 if (code
== EQ_EXPR
)
12161 bitop2
= BIT_NOT_EXPR
;
12167 if (bitop1
== NOP_EXPR
)
12169 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
12174 machine_mode mode
= TYPE_MODE (vectype
);
12177 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
12178 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12181 if (bitop2
!= NOP_EXPR
)
12183 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
12184 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12189 /* Put types on constant and invariant SLP children. */
12191 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
12192 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
12194 if (dump_enabled_p ())
12195 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12196 "incompatible vector types for invariants\n");
12200 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
12201 vect_model_simple_cost (vinfo
, stmt_info
,
12202 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
12203 dts
, ndts
, slp_node
, cost_vec
);
12210 lhs
= gimple_assign_lhs (stmt
);
12211 mask
= vect_create_destination_var (lhs
, mask_type
);
12213 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
12214 rhs1
, &vec_oprnds0
, vectype
,
12215 rhs2
, &vec_oprnds1
, vectype
);
12217 std::swap (vec_oprnds0
, vec_oprnds1
);
12219 /* Arguments are ready. Create the new vector stmt. */
12220 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
12223 vec_rhs2
= vec_oprnds1
[i
];
12225 new_temp
= make_ssa_name (mask
);
12226 if (bitop1
== NOP_EXPR
)
12228 new_stmt
= gimple_build_assign (new_temp
, code
,
12229 vec_rhs1
, vec_rhs2
);
12230 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12234 if (bitop1
== BIT_NOT_EXPR
)
12235 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
12237 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
12239 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12240 if (bitop2
!= NOP_EXPR
)
12242 tree res
= make_ssa_name (mask
);
12243 if (bitop2
== BIT_NOT_EXPR
)
12244 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
12246 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
12248 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12252 slp_node
->push_vec_def (new_stmt
);
12254 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
12258 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
12260 vec_oprnds0
.release ();
12261 vec_oprnds1
.release ();
12266 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
12267 can handle all live statements in the node. Otherwise return true
12268 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
12269 VEC_STMT_P is as for vectorizable_live_operation. */
12272 can_vectorize_live_stmts (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12273 slp_tree slp_node
, slp_instance slp_node_instance
,
12275 stmt_vector_for_cost
*cost_vec
)
12279 stmt_vec_info slp_stmt_info
;
12281 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
12283 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
12284 && !vectorizable_live_operation (vinfo
, slp_stmt_info
, slp_node
,
12285 slp_node_instance
, i
,
12286 vec_stmt_p
, cost_vec
))
12290 else if (STMT_VINFO_LIVE_P (stmt_info
)
12291 && !vectorizable_live_operation (vinfo
, stmt_info
,
12292 slp_node
, slp_node_instance
, -1,
12293 vec_stmt_p
, cost_vec
))
12299 /* Make sure the statement is vectorizable. */
12302 vect_analyze_stmt (vec_info
*vinfo
,
12303 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
12304 slp_tree node
, slp_instance node_instance
,
12305 stmt_vector_for_cost
*cost_vec
)
12307 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
12308 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
12310 gimple_seq pattern_def_seq
;
12312 if (dump_enabled_p ())
12313 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
12316 if (gimple_has_volatile_ops (stmt_info
->stmt
))
12317 return opt_result::failure_at (stmt_info
->stmt
,
12319 " stmt has volatile operands: %G\n",
12322 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
12324 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
12326 gimple_stmt_iterator si
;
12328 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
12330 stmt_vec_info pattern_def_stmt_info
12331 = vinfo
->lookup_stmt (gsi_stmt (si
));
12332 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
12333 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
12335 /* Analyze def stmt of STMT if it's a pattern stmt. */
12336 if (dump_enabled_p ())
12337 dump_printf_loc (MSG_NOTE
, vect_location
,
12338 "==> examining pattern def statement: %G",
12339 pattern_def_stmt_info
->stmt
);
12342 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
12343 need_to_vectorize
, node
, node_instance
,
12351 /* Skip stmts that do not need to be vectorized. In loops this is expected
12353 - the COND_EXPR which is the loop exit condition
12354 - any LABEL_EXPRs in the loop
12355 - computations that are used only for array indexing or loop control.
12356 In basic blocks we only analyze statements that are a part of some SLP
12357 instance, therefore, all the statements are relevant.
12359 Pattern statement needs to be analyzed instead of the original statement
12360 if the original statement is not relevant. Otherwise, we analyze both
12361 statements. In basic blocks we are called from some SLP instance
12362 traversal, don't analyze pattern stmts instead, the pattern stmts
12363 already will be part of SLP instance. */
12365 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
12366 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
12367 && !STMT_VINFO_LIVE_P (stmt_info
))
12369 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
12370 && pattern_stmt_info
12371 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
12372 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
12374 /* Analyze PATTERN_STMT instead of the original stmt. */
12375 stmt_info
= pattern_stmt_info
;
12376 if (dump_enabled_p ())
12377 dump_printf_loc (MSG_NOTE
, vect_location
,
12378 "==> examining pattern statement: %G",
12383 if (dump_enabled_p ())
12384 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
12386 return opt_result::success ();
12389 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
12391 && pattern_stmt_info
12392 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
12393 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
12395 /* Analyze PATTERN_STMT too. */
12396 if (dump_enabled_p ())
12397 dump_printf_loc (MSG_NOTE
, vect_location
,
12398 "==> examining pattern statement: %G",
12399 pattern_stmt_info
->stmt
);
12402 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
12403 node_instance
, cost_vec
);
12408 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
12410 case vect_internal_def
:
12413 case vect_reduction_def
:
12414 case vect_nested_cycle
:
12415 gcc_assert (!bb_vinfo
12416 && (relevance
== vect_used_in_outer
12417 || relevance
== vect_used_in_outer_by_reduction
12418 || relevance
== vect_used_by_reduction
12419 || relevance
== vect_unused_in_scope
12420 || relevance
== vect_used_only_live
));
12423 case vect_induction_def
:
12424 case vect_first_order_recurrence
:
12425 gcc_assert (!bb_vinfo
);
12428 case vect_constant_def
:
12429 case vect_external_def
:
12430 case vect_unknown_def_type
:
12432 gcc_unreachable ();
12435 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12437 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (node
);
12439 if (STMT_VINFO_RELEVANT_P (stmt_info
))
12441 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
12442 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
12443 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
12444 *need_to_vectorize
= true;
12447 if (PURE_SLP_STMT (stmt_info
) && !node
)
12449 if (dump_enabled_p ())
12450 dump_printf_loc (MSG_NOTE
, vect_location
,
12451 "handled only by SLP analysis\n");
12452 return opt_result::success ();
12457 && (STMT_VINFO_RELEVANT_P (stmt_info
)
12458 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
12459 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
12460 -mveclibabi= takes preference over library functions with
12461 the simd attribute. */
12462 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
12463 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
12465 || vectorizable_conversion (vinfo
, stmt_info
,
12466 NULL
, NULL
, node
, cost_vec
)
12467 || vectorizable_operation (vinfo
, stmt_info
,
12468 NULL
, NULL
, node
, cost_vec
)
12469 || vectorizable_assignment (vinfo
, stmt_info
,
12470 NULL
, NULL
, node
, cost_vec
)
12471 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
12472 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
12473 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
12474 node
, node_instance
, cost_vec
)
12475 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
12476 NULL
, node
, cost_vec
)
12477 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
12478 || vectorizable_condition (vinfo
, stmt_info
,
12479 NULL
, NULL
, node
, cost_vec
)
12480 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
12482 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
12483 stmt_info
, NULL
, node
)
12484 || vectorizable_recurr (as_a
<loop_vec_info
> (vinfo
),
12485 stmt_info
, NULL
, node
, cost_vec
));
12489 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
12490 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
12491 NULL
, NULL
, node
, cost_vec
)
12492 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
12494 || vectorizable_shift (vinfo
, stmt_info
,
12495 NULL
, NULL
, node
, cost_vec
)
12496 || vectorizable_operation (vinfo
, stmt_info
,
12497 NULL
, NULL
, node
, cost_vec
)
12498 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
12500 || vectorizable_load (vinfo
, stmt_info
,
12501 NULL
, NULL
, node
, cost_vec
)
12502 || vectorizable_store (vinfo
, stmt_info
,
12503 NULL
, NULL
, node
, cost_vec
)
12504 || vectorizable_condition (vinfo
, stmt_info
,
12505 NULL
, NULL
, node
, cost_vec
)
12506 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
12508 || vectorizable_phi (vinfo
, stmt_info
, NULL
, node
, cost_vec
));
12512 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
12515 return opt_result::failure_at (stmt_info
->stmt
,
12517 " relevant stmt not supported: %G",
12520 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
12521 need extra handling, except for vectorizable reductions. */
12523 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
12524 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
12525 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
12526 stmt_info
, node
, node_instance
,
12528 return opt_result::failure_at (stmt_info
->stmt
,
12530 " live stmt not supported: %G",
12533 return opt_result::success ();
12537 /* Function vect_transform_stmt.
12539 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
12542 vect_transform_stmt (vec_info
*vinfo
,
12543 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
12544 slp_tree slp_node
, slp_instance slp_node_instance
)
12546 bool is_store
= false;
12547 gimple
*vec_stmt
= NULL
;
12550 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
12552 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12554 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (slp_node
);
12556 switch (STMT_VINFO_TYPE (stmt_info
))
12558 case type_demotion_vec_info_type
:
12559 case type_promotion_vec_info_type
:
12560 case type_conversion_vec_info_type
:
12561 done
= vectorizable_conversion (vinfo
, stmt_info
,
12562 gsi
, &vec_stmt
, slp_node
, NULL
);
12566 case induc_vec_info_type
:
12567 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
12568 stmt_info
, &vec_stmt
, slp_node
,
12573 case shift_vec_info_type
:
12574 done
= vectorizable_shift (vinfo
, stmt_info
,
12575 gsi
, &vec_stmt
, slp_node
, NULL
);
12579 case op_vec_info_type
:
12580 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
12585 case assignment_vec_info_type
:
12586 done
= vectorizable_assignment (vinfo
, stmt_info
,
12587 gsi
, &vec_stmt
, slp_node
, NULL
);
12591 case load_vec_info_type
:
12592 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
12597 case store_vec_info_type
:
12598 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
12600 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))
12601 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info
))))
12602 /* In case of interleaving, the whole chain is vectorized when the
12603 last store in the chain is reached. Store stmts before the last
12604 one are skipped, and there vec_stmt_info shouldn't be freed
12609 done
= vectorizable_store (vinfo
, stmt_info
,
12610 gsi
, &vec_stmt
, slp_node
, NULL
);
12616 case condition_vec_info_type
:
12617 done
= vectorizable_condition (vinfo
, stmt_info
,
12618 gsi
, &vec_stmt
, slp_node
, NULL
);
12622 case comparison_vec_info_type
:
12623 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
12628 case call_vec_info_type
:
12629 done
= vectorizable_call (vinfo
, stmt_info
,
12630 gsi
, &vec_stmt
, slp_node
, NULL
);
12633 case call_simd_clone_vec_info_type
:
12634 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
12638 case reduc_vec_info_type
:
12639 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
12640 gsi
, &vec_stmt
, slp_node
);
12644 case cycle_phi_info_type
:
12645 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
12646 &vec_stmt
, slp_node
, slp_node_instance
);
12650 case lc_phi_info_type
:
12651 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
12652 stmt_info
, &vec_stmt
, slp_node
);
12656 case recurr_info_type
:
12657 done
= vectorizable_recurr (as_a
<loop_vec_info
> (vinfo
),
12658 stmt_info
, &vec_stmt
, slp_node
, NULL
);
12662 case phi_info_type
:
12663 done
= vectorizable_phi (vinfo
, stmt_info
, &vec_stmt
, slp_node
, NULL
);
12668 if (!STMT_VINFO_LIVE_P (stmt_info
))
12670 if (dump_enabled_p ())
12671 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12672 "stmt not supported.\n");
12673 gcc_unreachable ();
12678 if (!slp_node
&& vec_stmt
)
12679 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
12681 if (STMT_VINFO_TYPE (stmt_info
) != store_vec_info_type
)
12683 /* Handle stmts whose DEF is used outside the loop-nest that is
12684 being vectorized. */
12685 done
= can_vectorize_live_stmts (vinfo
, stmt_info
, slp_node
,
12686 slp_node_instance
, true, NULL
);
12691 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
12697 /* Remove a group of stores (for SLP or interleaving), free their
12701 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
12703 stmt_vec_info next_stmt_info
= first_stmt_info
;
12705 while (next_stmt_info
)
12707 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
12708 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
12709 /* Free the attached stmt_vec_info and remove the stmt. */
12710 vinfo
->remove_stmt (next_stmt_info
);
12711 next_stmt_info
= tmp
;
12715 /* If NUNITS is nonzero, return a vector type that contains NUNITS
12716 elements of type SCALAR_TYPE, or null if the target doesn't support
12719 If NUNITS is zero, return a vector type that contains elements of
12720 type SCALAR_TYPE, choosing whichever vector size the target prefers.
12722 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
12723 for this vectorization region and want to "autodetect" the best choice.
12724 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
12725 and we want the new type to be interoperable with it. PREVAILING_MODE
12726 in this case can be a scalar integer mode or a vector mode; when it
12727 is a vector mode, the function acts like a tree-level version of
12728 related_vector_mode. */
12731 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
12732 tree scalar_type
, poly_uint64 nunits
)
12734 tree orig_scalar_type
= scalar_type
;
12735 scalar_mode inner_mode
;
12736 machine_mode simd_mode
;
12739 if ((!INTEGRAL_TYPE_P (scalar_type
)
12740 && !POINTER_TYPE_P (scalar_type
)
12741 && !SCALAR_FLOAT_TYPE_P (scalar_type
))
12742 || (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
12743 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
)))
12746 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
12748 /* Interoperability between modes requires one to be a constant multiple
12749 of the other, so that the number of vectors required for each operation
12750 is a compile-time constant. */
12751 if (prevailing_mode
!= VOIDmode
12752 && !constant_multiple_p (nunits
* nbytes
,
12753 GET_MODE_SIZE (prevailing_mode
))
12754 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode
),
12758 /* For vector types of elements whose mode precision doesn't
12759 match their types precision we use a element type of mode
12760 precision. The vectorization routines will have to make sure
12761 they support the proper result truncation/extension.
12762 We also make sure to build vector types with INTEGER_TYPE
12763 component type only. */
12764 if (INTEGRAL_TYPE_P (scalar_type
)
12765 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
12766 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
12767 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
12768 TYPE_UNSIGNED (scalar_type
));
12770 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
12771 When the component mode passes the above test simply use a type
12772 corresponding to that mode. The theory is that any use that
12773 would cause problems with this will disable vectorization anyway. */
12774 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
12775 && !INTEGRAL_TYPE_P (scalar_type
))
12776 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
12778 /* We can't build a vector type of elements with alignment bigger than
12780 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
12781 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
12782 TYPE_UNSIGNED (scalar_type
));
12784 /* If we felt back to using the mode fail if there was
12785 no scalar type for it. */
12786 if (scalar_type
== NULL_TREE
)
12789 /* If no prevailing mode was supplied, use the mode the target prefers.
12790 Otherwise lookup a vector mode based on the prevailing mode. */
12791 if (prevailing_mode
== VOIDmode
)
12793 gcc_assert (known_eq (nunits
, 0U));
12794 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
12795 if (SCALAR_INT_MODE_P (simd_mode
))
12797 /* Traditional behavior is not to take the integer mode
12798 literally, but simply to use it as a way of determining
12799 the vector size. It is up to mode_for_vector to decide
12800 what the TYPE_MODE should be.
12802 Note that nunits == 1 is allowed in order to support single
12803 element vector types. */
12804 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
12805 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
12809 else if (SCALAR_INT_MODE_P (prevailing_mode
)
12810 || !related_vector_mode (prevailing_mode
,
12811 inner_mode
, nunits
).exists (&simd_mode
))
12813 /* Fall back to using mode_for_vector, mostly in the hope of being
12814 able to use an integer mode. */
12815 if (known_eq (nunits
, 0U)
12816 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
12819 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
12823 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
12825 /* In cases where the mode was chosen by mode_for_vector, check that
12826 the target actually supports the chosen mode, or that it at least
12827 allows the vector mode to be replaced by a like-sized integer. */
12828 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
12829 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
12832 /* Re-attach the address-space qualifier if we canonicalized the scalar
12834 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
12835 return build_qualified_type
12836 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
12841 /* Function get_vectype_for_scalar_type.
12843 Returns the vector type corresponding to SCALAR_TYPE as supported
12844 by the target. If GROUP_SIZE is nonzero and we're performing BB
12845 vectorization, make sure that the number of elements in the vector
12846 is no bigger than GROUP_SIZE. */
12849 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
12850 unsigned int group_size
)
12852 /* For BB vectorization, we should always have a group size once we've
12853 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12854 are tentative requests during things like early data reference
12855 analysis and pattern recognition. */
12856 if (is_a
<bb_vec_info
> (vinfo
))
12857 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
12861 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
12863 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
12864 vinfo
->vector_mode
= TYPE_MODE (vectype
);
12866 /* Register the natural choice of vector type, before the group size
12867 has been applied. */
12869 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
12871 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
12872 try again with an explicit number of elements. */
12875 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
12877 /* Start with the biggest number of units that fits within
12878 GROUP_SIZE and halve it until we find a valid vector type.
12879 Usually either the first attempt will succeed or all will
12880 fail (in the latter case because GROUP_SIZE is too small
12881 for the target), but it's possible that a target could have
12882 a hole between supported vector types.
12884 If GROUP_SIZE is not a power of 2, this has the effect of
12885 trying the largest power of 2 that fits within the group,
12886 even though the group is not a multiple of that vector size.
12887 The BB vectorizer will then try to carve up the group into
12889 unsigned int nunits
= 1 << floor_log2 (group_size
);
12892 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
12893 scalar_type
, nunits
);
12896 while (nunits
> 1 && !vectype
);
12902 /* Return the vector type corresponding to SCALAR_TYPE as supported
12903 by the target. NODE, if nonnull, is the SLP tree node that will
12904 use the returned vector type. */
12907 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
12909 unsigned int group_size
= 0;
12911 group_size
= SLP_TREE_LANES (node
);
12912 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
12915 /* Function get_mask_type_for_scalar_type.
12917 Returns the mask type corresponding to a result of comparison
12918 of vectors of specified SCALAR_TYPE as supported by target.
12919 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12920 make sure that the number of elements in the vector is no bigger
12921 than GROUP_SIZE. */
12924 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
12925 unsigned int group_size
)
12927 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
12932 return truth_type_for (vectype
);
12935 /* Function get_same_sized_vectype
12937 Returns a vector type corresponding to SCALAR_TYPE of size
12938 VECTOR_TYPE if supported by the target. */
12941 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
12943 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
12944 return truth_type_for (vector_type
);
12946 poly_uint64 nunits
;
12947 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
12948 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
12951 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
12952 scalar_type
, nunits
);
12955 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
12956 would not change the chosen vector modes. */
12959 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
12961 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
12962 i
!= vinfo
->used_vector_modes
.end (); ++i
)
12963 if (!VECTOR_MODE_P (*i
)
12964 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
12969 /* Function vect_is_simple_use.
12972 VINFO - the vect info of the loop or basic block that is being vectorized.
12973 OPERAND - operand in the loop or bb.
12975 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
12976 case OPERAND is an SSA_NAME that is defined in the vectorizable region
12977 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
12978 the definition could be anywhere in the function
12979 DT - the type of definition
12981 Returns whether a stmt with OPERAND can be vectorized.
12982 For loops, supportable operands are constants, loop invariants, and operands
12983 that are defined by the current iteration of the loop. Unsupportable
12984 operands are those that are defined by a previous iteration of the loop (as
12985 is the case in reduction/induction computations).
12986 For basic blocks, supportable operands are constants and bb invariants.
12987 For now, operands defined outside the basic block are not supported. */
12990 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
12991 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
12993 if (def_stmt_info_out
)
12994 *def_stmt_info_out
= NULL
;
12996 *def_stmt_out
= NULL
;
12997 *dt
= vect_unknown_def_type
;
12999 if (dump_enabled_p ())
13001 dump_printf_loc (MSG_NOTE
, vect_location
,
13002 "vect_is_simple_use: operand ");
13003 if (TREE_CODE (operand
) == SSA_NAME
13004 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
13005 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
13007 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
13010 if (CONSTANT_CLASS_P (operand
))
13011 *dt
= vect_constant_def
;
13012 else if (is_gimple_min_invariant (operand
))
13013 *dt
= vect_external_def
;
13014 else if (TREE_CODE (operand
) != SSA_NAME
)
13015 *dt
= vect_unknown_def_type
;
13016 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
13017 *dt
= vect_external_def
;
13020 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
13021 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
13023 *dt
= vect_external_def
;
13026 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
13027 def_stmt
= stmt_vinfo
->stmt
;
13028 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
13029 if (def_stmt_info_out
)
13030 *def_stmt_info_out
= stmt_vinfo
;
13033 *def_stmt_out
= def_stmt
;
13036 if (dump_enabled_p ())
13038 dump_printf (MSG_NOTE
, ", type of def: ");
13041 case vect_uninitialized_def
:
13042 dump_printf (MSG_NOTE
, "uninitialized\n");
13044 case vect_constant_def
:
13045 dump_printf (MSG_NOTE
, "constant\n");
13047 case vect_external_def
:
13048 dump_printf (MSG_NOTE
, "external\n");
13050 case vect_internal_def
:
13051 dump_printf (MSG_NOTE
, "internal\n");
13053 case vect_induction_def
:
13054 dump_printf (MSG_NOTE
, "induction\n");
13056 case vect_reduction_def
:
13057 dump_printf (MSG_NOTE
, "reduction\n");
13059 case vect_double_reduction_def
:
13060 dump_printf (MSG_NOTE
, "double reduction\n");
13062 case vect_nested_cycle
:
13063 dump_printf (MSG_NOTE
, "nested cycle\n");
13065 case vect_first_order_recurrence
:
13066 dump_printf (MSG_NOTE
, "first order recurrence\n");
13068 case vect_unknown_def_type
:
13069 dump_printf (MSG_NOTE
, "unknown\n");
13074 if (*dt
== vect_unknown_def_type
)
13076 if (dump_enabled_p ())
13077 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
13078 "Unsupported pattern.\n");
13085 /* Function vect_is_simple_use.
13087 Same as vect_is_simple_use but also determines the vector operand
13088 type of OPERAND and stores it to *VECTYPE. If the definition of
13089 OPERAND is vect_uninitialized_def, vect_constant_def or
13090 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
13091 is responsible to compute the best suited vector type for the
13095 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
13096 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
13097 gimple
**def_stmt_out
)
13099 stmt_vec_info def_stmt_info
;
13101 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
13105 *def_stmt_out
= def_stmt
;
13106 if (def_stmt_info_out
)
13107 *def_stmt_info_out
= def_stmt_info
;
13109 /* Now get a vector type if the def is internal, otherwise supply
13110 NULL_TREE and leave it up to the caller to figure out a proper
13111 type for the use stmt. */
13112 if (*dt
== vect_internal_def
13113 || *dt
== vect_induction_def
13114 || *dt
== vect_reduction_def
13115 || *dt
== vect_double_reduction_def
13116 || *dt
== vect_nested_cycle
13117 || *dt
== vect_first_order_recurrence
)
13119 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
13120 gcc_assert (*vectype
!= NULL_TREE
);
13121 if (dump_enabled_p ())
13122 dump_printf_loc (MSG_NOTE
, vect_location
,
13123 "vect_is_simple_use: vectype %T\n", *vectype
);
13125 else if (*dt
== vect_uninitialized_def
13126 || *dt
== vect_constant_def
13127 || *dt
== vect_external_def
)
13128 *vectype
= NULL_TREE
;
13130 gcc_unreachable ();
13135 /* Function vect_is_simple_use.
13137 Same as vect_is_simple_use but determines the operand by operand
13138 position OPERAND from either STMT or SLP_NODE, filling in *OP
13139 and *SLP_DEF (when SLP_NODE is not NULL). */
13142 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
13143 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
13144 enum vect_def_type
*dt
,
13145 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
13149 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
13151 *vectype
= SLP_TREE_VECTYPE (child
);
13152 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
13154 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
13155 return vect_is_simple_use (*op
, vinfo
, dt
, def_stmt_info_out
);
13159 if (def_stmt_info_out
)
13160 *def_stmt_info_out
= NULL
;
13161 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
13162 *dt
= SLP_TREE_DEF_TYPE (child
);
13169 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
13171 if (gimple_assign_rhs_code (ass
) == COND_EXPR
13172 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
13175 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
13177 *op
= gimple_op (ass
, operand
);
13179 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
13180 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
13182 *op
= gimple_op (ass
, operand
+ 1);
13184 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
13185 *op
= gimple_call_arg (call
, operand
);
13187 gcc_unreachable ();
13188 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
13192 /* If OP is not NULL and is external or constant update its vector
13193 type with VECTYPE. Returns true if successful or false if not,
13194 for example when conflicting vector types are present. */
13197 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
13199 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
13201 if (SLP_TREE_VECTYPE (op
))
13202 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
13203 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
13204 should be handled by patters. Allow vect_constant_def for now. */
13205 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
13206 && SLP_TREE_DEF_TYPE (op
) == vect_external_def
)
13208 SLP_TREE_VECTYPE (op
) = vectype
;
13212 /* Function supportable_widening_operation
13214 Check whether an operation represented by the code CODE is a
13215 widening operation that is supported by the target platform in
13216 vector form (i.e., when operating on arguments of type VECTYPE_IN
13217 producing a result of type VECTYPE_OUT).
13219 Widening operations we currently support are NOP (CONVERT), FLOAT,
13220 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
13221 are supported by the target platform either directly (via vector
13222 tree-codes), or via target builtins.
13225 - CODE1 and CODE2 are codes of vector operations to be used when
13226 vectorizing the operation, if available.
13227 - MULTI_STEP_CVT determines the number of required intermediate steps in
13228 case of multi-step conversion (like char->short->int - in that case
13229 MULTI_STEP_CVT will be 1).
13230 - INTERM_TYPES contains the intermediate type required to perform the
13231 widening operation (short in the above example). */
13234 supportable_widening_operation (vec_info
*vinfo
,
13236 stmt_vec_info stmt_info
,
13237 tree vectype_out
, tree vectype_in
,
13238 code_helper
*code1
,
13239 code_helper
*code2
,
13240 int *multi_step_cvt
,
13241 vec
<tree
> *interm_types
)
13243 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
13244 class loop
*vect_loop
= NULL
;
13245 machine_mode vec_mode
;
13246 enum insn_code icode1
, icode2
;
13247 optab optab1
= unknown_optab
, optab2
= unknown_optab
;
13248 tree vectype
= vectype_in
;
13249 tree wide_vectype
= vectype_out
;
13250 tree_code c1
= MAX_TREE_CODES
, c2
= MAX_TREE_CODES
;
13252 tree prev_type
, intermediate_type
;
13253 machine_mode intermediate_mode
, prev_mode
;
13254 optab optab3
, optab4
;
13256 *multi_step_cvt
= 0;
13258 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
13260 switch (code
.safe_as_tree_code ())
13262 case MAX_TREE_CODES
:
13263 /* Don't set c1 and c2 if code is not a tree_code. */
13266 case WIDEN_MULT_EXPR
:
13267 /* The result of a vectorized widening operation usually requires
13268 two vectors (because the widened results do not fit into one vector).
13269 The generated vector results would normally be expected to be
13270 generated in the same order as in the original scalar computation,
13271 i.e. if 8 results are generated in each vector iteration, they are
13272 to be organized as follows:
13273 vect1: [res1,res2,res3,res4],
13274 vect2: [res5,res6,res7,res8].
13276 However, in the special case that the result of the widening
13277 operation is used in a reduction computation only, the order doesn't
13278 matter (because when vectorizing a reduction we change the order of
13279 the computation). Some targets can take advantage of this and
13280 generate more efficient code. For example, targets like Altivec,
13281 that support widen_mult using a sequence of {mult_even,mult_odd}
13282 generate the following vectors:
13283 vect1: [res1,res3,res5,res7],
13284 vect2: [res2,res4,res6,res8].
13286 When vectorizing outer-loops, we execute the inner-loop sequentially
13287 (each vectorized inner-loop iteration contributes to VF outer-loop
13288 iterations in parallel). We therefore don't allow to change the
13289 order of the computation in the inner-loop during outer-loop
13291 /* TODO: Another case in which order doesn't *really* matter is when we
13292 widen and then contract again, e.g. (short)((int)x * y >> 8).
13293 Normally, pack_trunc performs an even/odd permute, whereas the
13294 repack from an even/odd expansion would be an interleave, which
13295 would be significantly simpler for e.g. AVX2. */
13296 /* In any case, in order to avoid duplicating the code below, recurse
13297 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
13298 are properly set up for the caller. If we fail, we'll continue with
13299 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
13301 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
13302 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
13303 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
13304 stmt_info
, vectype_out
,
13306 code2
, multi_step_cvt
,
13309 /* Elements in a vector with vect_used_by_reduction property cannot
13310 be reordered if the use chain with this property does not have the
13311 same operation. One such an example is s += a * b, where elements
13312 in a and b cannot be reordered. Here we check if the vector defined
13313 by STMT is only directly used in the reduction statement. */
13314 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
13315 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
13317 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
13320 c1
= VEC_WIDEN_MULT_LO_EXPR
;
13321 c2
= VEC_WIDEN_MULT_HI_EXPR
;
13324 case DOT_PROD_EXPR
:
13325 c1
= DOT_PROD_EXPR
;
13326 c2
= DOT_PROD_EXPR
;
13334 case VEC_WIDEN_MULT_EVEN_EXPR
:
13335 /* Support the recursion induced just above. */
13336 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
13337 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
13340 case WIDEN_LSHIFT_EXPR
:
13341 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
13342 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
13346 c1
= VEC_UNPACK_LO_EXPR
;
13347 c2
= VEC_UNPACK_HI_EXPR
;
13351 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
13352 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
13355 case FIX_TRUNC_EXPR
:
13356 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
13357 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
13361 gcc_unreachable ();
13364 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
13365 std::swap (c1
, c2
);
13367 if (code
== FIX_TRUNC_EXPR
)
13369 /* The signedness is determined from output operand. */
13370 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
13371 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
13373 else if (CONVERT_EXPR_CODE_P (code
.safe_as_tree_code ())
13374 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
13375 && VECTOR_BOOLEAN_TYPE_P (vectype
)
13376 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
13377 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
13379 /* If the input and result modes are the same, a different optab
13380 is needed where we pass in the number of units in vectype. */
13381 optab1
= vec_unpacks_sbool_lo_optab
;
13382 optab2
= vec_unpacks_sbool_hi_optab
;
13385 vec_mode
= TYPE_MODE (vectype
);
13386 if (widening_fn_p (code
))
13388 /* If this is an internal fn then we must check whether the target
13389 supports either a low-high split or an even-odd split. */
13390 internal_fn ifn
= as_internal_fn ((combined_fn
) code
);
13392 internal_fn lo
, hi
, even
, odd
;
13393 lookup_hilo_internal_fn (ifn
, &lo
, &hi
);
13394 *code1
= as_combined_fn (lo
);
13395 *code2
= as_combined_fn (hi
);
13396 optab1
= direct_internal_fn_optab (lo
, {vectype
, vectype
});
13397 optab2
= direct_internal_fn_optab (hi
, {vectype
, vectype
});
13399 /* If we don't support low-high, then check for even-odd. */
13401 || (icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
13403 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
13405 lookup_evenodd_internal_fn (ifn
, &even
, &odd
);
13406 *code1
= as_combined_fn (even
);
13407 *code2
= as_combined_fn (odd
);
13408 optab1
= direct_internal_fn_optab (even
, {vectype
, vectype
});
13409 optab2
= direct_internal_fn_optab (odd
, {vectype
, vectype
});
13412 else if (code
.is_tree_code ())
13414 if (code
== FIX_TRUNC_EXPR
)
13416 /* The signedness is determined from output operand. */
13417 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
13418 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
13420 else if (CONVERT_EXPR_CODE_P ((tree_code
) code
.safe_as_tree_code ())
13421 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
13422 && VECTOR_BOOLEAN_TYPE_P (vectype
)
13423 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
13424 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
13426 /* If the input and result modes are the same, a different optab
13427 is needed where we pass in the number of units in vectype. */
13428 optab1
= vec_unpacks_sbool_lo_optab
;
13429 optab2
= vec_unpacks_sbool_hi_optab
;
13433 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
13434 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
13440 if (!optab1
|| !optab2
)
13443 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
13444 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
13448 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
13449 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
13451 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
13453 /* For scalar masks we may have different boolean
13454 vector types having the same QImode. Thus we
13455 add additional check for elements number. */
13456 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
13457 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
13461 /* Check if it's a multi-step conversion that can be done using intermediate
13464 prev_type
= vectype
;
13465 prev_mode
= vec_mode
;
13467 if (!CONVERT_EXPR_CODE_P (code
.safe_as_tree_code ()))
13470 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13471 intermediate steps in promotion sequence. We try
13472 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
13474 interm_types
->create (MAX_INTERM_CVT_STEPS
);
13475 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
13477 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
13478 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
13480 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
13481 else if (VECTOR_MODE_P (intermediate_mode
))
13483 tree intermediate_element_type
13484 = lang_hooks
.types
.type_for_mode (GET_MODE_INNER (intermediate_mode
),
13485 TYPE_UNSIGNED (prev_type
));
13487 = build_vector_type_for_mode (intermediate_element_type
,
13488 intermediate_mode
);
13492 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
13493 TYPE_UNSIGNED (prev_type
));
13495 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
13496 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
13497 && intermediate_mode
== prev_mode
13498 && SCALAR_INT_MODE_P (prev_mode
))
13500 /* If the input and result modes are the same, a different optab
13501 is needed where we pass in the number of units in vectype. */
13502 optab3
= vec_unpacks_sbool_lo_optab
;
13503 optab4
= vec_unpacks_sbool_hi_optab
;
13507 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
13508 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
13511 if (!optab3
|| !optab4
13512 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
13513 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
13514 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
13515 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
13516 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
13517 == CODE_FOR_nothing
)
13518 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
13519 == CODE_FOR_nothing
))
13522 interm_types
->quick_push (intermediate_type
);
13523 (*multi_step_cvt
)++;
13525 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
13526 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
13528 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
13530 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
13531 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
13535 prev_type
= intermediate_type
;
13536 prev_mode
= intermediate_mode
;
13539 interm_types
->release ();
13544 /* Function supportable_narrowing_operation
13546 Check whether an operation represented by the code CODE is a
13547 narrowing operation that is supported by the target platform in
13548 vector form (i.e., when operating on arguments of type VECTYPE_IN
13549 and producing a result of type VECTYPE_OUT).
13551 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
13552 and FLOAT. This function checks if these operations are supported by
13553 the target platform directly via vector tree-codes.
13556 - CODE1 is the code of a vector operation to be used when
13557 vectorizing the operation, if available.
13558 - MULTI_STEP_CVT determines the number of required intermediate steps in
13559 case of multi-step conversion (like int->short->char - in that case
13560 MULTI_STEP_CVT will be 1).
13561 - INTERM_TYPES contains the intermediate type required to perform the
13562 narrowing operation (short in the above example). */
13565 supportable_narrowing_operation (code_helper code
,
13566 tree vectype_out
, tree vectype_in
,
13567 code_helper
*code1
, int *multi_step_cvt
,
13568 vec
<tree
> *interm_types
)
13570 machine_mode vec_mode
;
13571 enum insn_code icode1
;
13572 optab optab1
, interm_optab
;
13573 tree vectype
= vectype_in
;
13574 tree narrow_vectype
= vectype_out
;
13576 tree intermediate_type
, prev_type
;
13577 machine_mode intermediate_mode
, prev_mode
;
13579 unsigned HOST_WIDE_INT n_elts
;
13582 if (!code
.is_tree_code ())
13585 *multi_step_cvt
= 0;
13586 switch ((tree_code
) code
)
13589 c1
= VEC_PACK_TRUNC_EXPR
;
13590 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
13591 && VECTOR_BOOLEAN_TYPE_P (vectype
)
13592 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
))
13593 && TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&n_elts
)
13594 && n_elts
< BITS_PER_UNIT
)
13595 optab1
= vec_pack_sbool_trunc_optab
;
13597 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
13600 case FIX_TRUNC_EXPR
:
13601 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
13602 /* The signedness is determined from output operand. */
13603 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
13607 c1
= VEC_PACK_FLOAT_EXPR
;
13608 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
13612 gcc_unreachable ();
13618 vec_mode
= TYPE_MODE (vectype
);
13619 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
13624 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
13626 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
13628 /* For scalar masks we may have different boolean
13629 vector types having the same QImode. Thus we
13630 add additional check for elements number. */
13631 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
13632 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
13636 if (code
== FLOAT_EXPR
)
13639 /* Check if it's a multi-step conversion that can be done using intermediate
13641 prev_mode
= vec_mode
;
13642 prev_type
= vectype
;
13643 if (code
== FIX_TRUNC_EXPR
)
13644 uns
= TYPE_UNSIGNED (vectype_out
);
13646 uns
= TYPE_UNSIGNED (vectype
);
13648 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
13649 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
13650 costly than signed. */
13651 if (code
== FIX_TRUNC_EXPR
&& uns
)
13653 enum insn_code icode2
;
13656 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
13658 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
13659 if (interm_optab
!= unknown_optab
13660 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
13661 && insn_data
[icode1
].operand
[0].mode
13662 == insn_data
[icode2
].operand
[0].mode
)
13665 optab1
= interm_optab
;
13670 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13671 intermediate steps in promotion sequence. We try
13672 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
13673 interm_types
->create (MAX_INTERM_CVT_STEPS
);
13674 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
13676 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
13677 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
13679 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
13682 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
13683 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
13684 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
13685 && SCALAR_INT_MODE_P (prev_mode
)
13686 && TYPE_VECTOR_SUBPARTS (intermediate_type
).is_constant (&n_elts
)
13687 && n_elts
< BITS_PER_UNIT
)
13688 interm_optab
= vec_pack_sbool_trunc_optab
;
13691 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
13694 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
13695 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
13696 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
13697 == CODE_FOR_nothing
))
13700 interm_types
->quick_push (intermediate_type
);
13701 (*multi_step_cvt
)++;
13703 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
13705 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
13707 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
13708 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
13712 prev_mode
= intermediate_mode
;
13713 prev_type
= intermediate_type
;
13714 optab1
= interm_optab
;
13717 interm_types
->release ();
13721 /* Generate and return a vector mask of MASK_TYPE such that
13722 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
13723 Add the statements to SEQ. */
13726 vect_gen_while (gimple_seq
*seq
, tree mask_type
, tree start_index
,
13727 tree end_index
, const char *name
)
13729 tree cmp_type
= TREE_TYPE (start_index
);
13730 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
13731 cmp_type
, mask_type
,
13732 OPTIMIZE_FOR_SPEED
));
13733 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
13734 start_index
, end_index
,
13735 build_zero_cst (mask_type
));
13738 tmp
= make_temp_ssa_name (mask_type
, NULL
, name
);
13740 tmp
= make_ssa_name (mask_type
);
13741 gimple_call_set_lhs (call
, tmp
);
13742 gimple_seq_add_stmt (seq
, call
);
13746 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
13747 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
13750 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
13753 tree tmp
= vect_gen_while (seq
, mask_type
, start_index
, end_index
);
13754 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
13757 /* Try to compute the vector types required to vectorize STMT_INFO,
13758 returning true on success and false if vectorization isn't possible.
13759 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13760 take sure that the number of elements in the vectors is no bigger
13765 - Set *STMT_VECTYPE_OUT to:
13766 - NULL_TREE if the statement doesn't need to be vectorized;
13767 - the equivalent of STMT_VINFO_VECTYPE otherwise.
13769 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
13770 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
13771 statement does not help to determine the overall number of units. */
13774 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
13775 tree
*stmt_vectype_out
,
13776 tree
*nunits_vectype_out
,
13777 unsigned int group_size
)
13779 gimple
*stmt
= stmt_info
->stmt
;
13781 /* For BB vectorization, we should always have a group size once we've
13782 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13783 are tentative requests during things like early data reference
13784 analysis and pattern recognition. */
13785 if (is_a
<bb_vec_info
> (vinfo
))
13786 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
13790 *stmt_vectype_out
= NULL_TREE
;
13791 *nunits_vectype_out
= NULL_TREE
;
13793 if (gimple_get_lhs (stmt
) == NULL_TREE
13794 /* MASK_STORE has no lhs, but is ok. */
13795 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
13797 if (is_a
<gcall
*> (stmt
))
13799 /* Ignore calls with no lhs. These must be calls to
13800 #pragma omp simd functions, and what vectorization factor
13801 it really needs can't be determined until
13802 vectorizable_simd_clone_call. */
13803 if (dump_enabled_p ())
13804 dump_printf_loc (MSG_NOTE
, vect_location
,
13805 "defer to SIMD clone analysis.\n");
13806 return opt_result::success ();
13809 return opt_result::failure_at (stmt
,
13810 "not vectorized: irregular stmt.%G", stmt
);
13814 tree scalar_type
= NULL_TREE
;
13815 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
13817 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
13818 if (dump_enabled_p ())
13819 dump_printf_loc (MSG_NOTE
, vect_location
,
13820 "precomputed vectype: %T\n", vectype
);
13822 else if (vect_use_mask_type_p (stmt_info
))
13824 unsigned int precision
= stmt_info
->mask_precision
;
13825 scalar_type
= build_nonstandard_integer_type (precision
, 1);
13826 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
13828 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
13829 " data-type %T\n", scalar_type
);
13830 if (dump_enabled_p ())
13831 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
13835 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
13836 scalar_type
= TREE_TYPE (DR_REF (dr
));
13837 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
13838 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
13840 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
13842 if (dump_enabled_p ())
13845 dump_printf_loc (MSG_NOTE
, vect_location
,
13846 "get vectype for scalar type (group size %d):"
13847 " %T\n", group_size
, scalar_type
);
13849 dump_printf_loc (MSG_NOTE
, vect_location
,
13850 "get vectype for scalar type: %T\n", scalar_type
);
13852 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
13854 return opt_result::failure_at (stmt
,
13856 " unsupported data-type %T\n",
13859 if (dump_enabled_p ())
13860 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
13863 if (scalar_type
&& VECTOR_MODE_P (TYPE_MODE (scalar_type
)))
13864 return opt_result::failure_at (stmt
,
13865 "not vectorized: vector stmt in loop:%G",
13868 *stmt_vectype_out
= vectype
;
13870 /* Don't try to compute scalar types if the stmt produces a boolean
13871 vector; use the existing vector type instead. */
13872 tree nunits_vectype
= vectype
;
13873 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
13875 /* The number of units is set according to the smallest scalar
13876 type (or the largest vector size, but we only support one
13877 vector size per vectorization). */
13878 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
13879 TREE_TYPE (vectype
));
13880 if (scalar_type
!= TREE_TYPE (vectype
))
13882 if (dump_enabled_p ())
13883 dump_printf_loc (MSG_NOTE
, vect_location
,
13884 "get vectype for smallest scalar type: %T\n",
13886 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
13888 if (!nunits_vectype
)
13889 return opt_result::failure_at
13890 (stmt
, "not vectorized: unsupported data-type %T\n",
13892 if (dump_enabled_p ())
13893 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
13898 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
13899 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)))
13900 return opt_result::failure_at (stmt
,
13901 "Not vectorized: Incompatible number "
13902 "of vector subparts between %T and %T\n",
13903 nunits_vectype
, *stmt_vectype_out
);
13905 if (dump_enabled_p ())
13907 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
13908 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
13909 dump_printf (MSG_NOTE
, "\n");
13912 *nunits_vectype_out
= nunits_vectype
;
13913 return opt_result::success ();
13916 /* Generate and return statement sequence that sets vector length LEN that is:
13918 min_of_start_and_end = min (START_INDEX, END_INDEX);
13919 left_len = END_INDEX - min_of_start_and_end;
13920 rhs = min (left_len, LEN_LIMIT);
13923 Note: the cost of the code generated by this function is modeled
13924 by vect_estimate_min_profitable_iters, so changes here may need
13925 corresponding changes there. */
13928 vect_gen_len (tree len
, tree start_index
, tree end_index
, tree len_limit
)
13930 gimple_seq stmts
= NULL
;
13931 tree len_type
= TREE_TYPE (len
);
13932 gcc_assert (TREE_TYPE (start_index
) == len_type
);
13934 tree min
= gimple_build (&stmts
, MIN_EXPR
, len_type
, start_index
, end_index
);
13935 tree left_len
= gimple_build (&stmts
, MINUS_EXPR
, len_type
, end_index
, min
);
13936 tree rhs
= gimple_build (&stmts
, MIN_EXPR
, len_type
, left_len
, len_limit
);
13937 gimple
* stmt
= gimple_build_assign (len
, rhs
);
13938 gimple_seq_add_stmt (&stmts
, stmt
);