1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
59 #include "optabs-libfuncs.h"
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
64 /* Return the vectorized type for the given statement. */
67 stmt_vectype (class _stmt_vec_info
*stmt_info
)
69 return STMT_VINFO_VECTYPE (stmt_info
);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
75 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
77 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
78 basic_block bb
= gimple_bb (stmt
);
79 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
85 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
87 return (bb
->loop_father
== loop
->inner
);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
95 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
96 enum vect_cost_for_stmt kind
,
97 stmt_vec_info stmt_info
, slp_tree node
,
98 tree vectype
, int misalign
,
99 enum vect_cost_model_location where
)
101 if ((kind
== vector_load
|| kind
== unaligned_load
)
102 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
103 kind
= vector_gather_load
;
104 if ((kind
== vector_store
|| kind
== unaligned_store
)
105 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
106 kind
= vector_scatter_store
;
108 stmt_info_for_cost si
109 = { count
, kind
, where
, stmt_info
, node
, vectype
, misalign
};
110 body_cost_vec
->safe_push (si
);
113 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
117 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
118 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
119 tree vectype
, int misalign
,
120 enum vect_cost_model_location where
)
122 return record_stmt_cost (body_cost_vec
, count
, kind
, stmt_info
, NULL
,
123 vectype
, misalign
, where
);
127 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
128 enum vect_cost_for_stmt kind
, slp_tree node
,
129 tree vectype
, int misalign
,
130 enum vect_cost_model_location where
)
132 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, node
,
133 vectype
, misalign
, where
);
137 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
138 enum vect_cost_for_stmt kind
,
139 enum vect_cost_model_location where
)
141 gcc_assert (kind
== cond_branch_taken
|| kind
== cond_branch_not_taken
142 || kind
== scalar_stmt
);
143 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, NULL
,
144 NULL_TREE
, 0, where
);
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
150 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
152 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
162 read_vector_array (vec_info
*vinfo
,
163 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
164 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
166 tree vect_type
, vect
, vect_name
, array_ref
;
169 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
170 vect_type
= TREE_TYPE (TREE_TYPE (array
));
171 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
172 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
173 build_int_cst (size_type_node
, n
),
174 NULL_TREE
, NULL_TREE
);
176 new_stmt
= gimple_build_assign (vect
, array_ref
);
177 vect_name
= make_ssa_name (vect
, new_stmt
);
178 gimple_assign_set_lhs (new_stmt
, vect_name
);
179 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
189 write_vector_array (vec_info
*vinfo
,
190 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
191 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
196 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
197 build_int_cst (size_type_node
, n
),
198 NULL_TREE
, NULL_TREE
);
200 new_stmt
= gimple_build_assign (array_ref
, vect
);
201 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
209 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
213 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
223 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
224 gimple_stmt_iterator
*gsi
, tree var
)
226 tree clobber
= build_clobber (TREE_TYPE (var
));
227 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
228 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
233 /* Function vect_mark_relevant.
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
238 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
239 enum vect_relevant relevant
, bool live_p
)
241 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
242 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE
, vect_location
,
246 "mark relevant %d, live %d: %G", relevant
, live_p
,
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE
, vect_location
,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
265 stmt_vec_info old_stmt_info
= stmt_info
;
266 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
268 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
269 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
271 if (live_p
&& relevant
== vect_unused_in_scope
)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE
, vect_location
,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
277 relevant
= vect_used_only_live
;
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE
, vect_location
,
282 "mark relevant %d, live %d: %G", relevant
, live_p
,
286 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
287 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
288 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
290 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE
, vect_location
,
295 "already marked relevant/live.\n");
299 worklist
->safe_push (stmt_info
);
303 /* Function is_simple_and_all_uses_invariant
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
309 loop_vec_info loop_vinfo
)
314 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
318 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
320 enum vect_def_type dt
= vect_uninitialized_def
;
322 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
326 "use not simple.\n");
330 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
336 /* Function vect_stmt_relevant_p.
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
346 CHECKME: what other side effects would the vectorizer allow? */
349 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
350 enum vect_relevant
*relevant
, bool *live_p
)
352 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
354 imm_use_iterator imm_iter
;
358 *relevant
= vect_unused_in_scope
;
361 /* cond stmt other than loop exit cond. */
362 if (is_ctrl_stmt (stmt_info
->stmt
)
363 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
364 *relevant
= vect_used_in_scope
;
366 /* changing memory. */
367 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
368 if (gimple_vdef (stmt_info
->stmt
)
369 && !gimple_clobber_p (stmt_info
->stmt
))
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE
, vect_location
,
373 "vec_stmt_relevant_p: stmt has vdefs.\n");
374 *relevant
= vect_used_in_scope
;
377 /* uses outside the loop. */
378 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
380 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
382 basic_block bb
= gimple_bb (USE_STMT (use_p
));
383 if (!flow_bb_inside_loop_p (loop
, bb
))
385 if (is_gimple_debug (USE_STMT (use_p
)))
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE
, vect_location
,
390 "vec_stmt_relevant_p: used out of loop.\n");
392 /* We expect all such uses to be in the loop exit phis
393 (because of loop closed form) */
394 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
395 gcc_assert (bb
== single_exit (loop
)->dest
);
402 if (*live_p
&& *relevant
== vect_unused_in_scope
403 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
405 if (dump_enabled_p ())
406 dump_printf_loc (MSG_NOTE
, vect_location
,
407 "vec_stmt_relevant_p: stmt live but not relevant.\n");
408 *relevant
= vect_used_only_live
;
411 return (*live_p
|| *relevant
);
415 /* Function exist_non_indexing_operands_for_use_p
417 USE is one of the uses attached to STMT_INFO. Check if USE is
418 used in STMT_INFO for anything other than indexing an array. */
421 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
425 /* USE corresponds to some operand in STMT. If there is no data
426 reference in STMT, then any operand that corresponds to USE
427 is not indexing an array. */
428 if (!STMT_VINFO_DATA_REF (stmt_info
))
431 /* STMT has a data_ref. FORNOW this means that its of one of
435 (This should have been verified in analyze_data_refs).
437 'var' in the second case corresponds to a def, not a use,
438 so USE cannot correspond to any operands that are not used
441 Therefore, all we need to check is if STMT falls into the
442 first case, and whether var corresponds to USE. */
444 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
445 if (!assign
|| !gimple_assign_copy_p (assign
))
447 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
448 if (call
&& gimple_call_internal_p (call
))
450 internal_fn ifn
= gimple_call_internal_fn (call
);
451 int mask_index
= internal_fn_mask_index (ifn
);
453 && use
== gimple_call_arg (call
, mask_index
))
455 int stored_value_index
= internal_fn_stored_value_index (ifn
);
456 if (stored_value_index
>= 0
457 && use
== gimple_call_arg (call
, stored_value_index
))
459 if (internal_gather_scatter_fn_p (ifn
)
460 && use
== gimple_call_arg (call
, 1))
466 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
468 operand
= gimple_assign_rhs1 (assign
);
469 if (TREE_CODE (operand
) != SSA_NAME
)
480 Function process_use.
483 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
484 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
485 that defined USE. This is done by calling mark_relevant and passing it
486 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
487 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
491 Generally, LIVE_P and RELEVANT are used to define the liveness and
492 relevance info of the DEF_STMT of this USE:
493 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
494 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
496 - case 1: If USE is used only for address computations (e.g. array indexing),
497 which does not need to be directly vectorized, then the liveness/relevance
498 of the respective DEF_STMT is left unchanged.
499 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
500 we skip DEF_STMT cause it had already been processed.
501 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
502 "relevant" will be modified accordingly.
504 Return true if everything is as expected. Return false otherwise. */
507 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
508 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
511 stmt_vec_info dstmt_vinfo
;
512 enum vect_def_type dt
;
514 /* case 1: we are only interested in uses that need to be vectorized. Uses
515 that are used for address computation are not considered relevant. */
516 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
517 return opt_result::success ();
519 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
520 return opt_result::failure_at (stmt_vinfo
->stmt
,
522 " unsupported use in stmt.\n");
525 return opt_result::success ();
527 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
528 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
530 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
531 We have to force the stmt live since the epilogue loop needs it to
532 continue computing the reduction. */
533 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
534 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
535 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
537 && bb
->loop_father
== def_bb
->loop_father
)
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_NOTE
, vect_location
,
541 "reduc-stmt defining reduc-phi in the same nest.\n");
542 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
543 return opt_result::success ();
546 /* case 3a: outer-loop stmt defining an inner-loop stmt:
547 outer-loop-header-bb:
553 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_NOTE
, vect_location
,
557 "outer-loop def-stmt defining inner-loop stmt.\n");
561 case vect_unused_in_scope
:
562 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
563 vect_used_in_scope
: vect_unused_in_scope
;
566 case vect_used_in_outer_by_reduction
:
567 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
568 relevant
= vect_used_by_reduction
;
571 case vect_used_in_outer
:
572 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
573 relevant
= vect_used_in_scope
;
576 case vect_used_in_scope
:
584 /* case 3b: inner-loop stmt defining an outer-loop stmt:
585 outer-loop-header-bb:
589 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
591 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
593 if (dump_enabled_p ())
594 dump_printf_loc (MSG_NOTE
, vect_location
,
595 "inner-loop def-stmt defining outer-loop stmt.\n");
599 case vect_unused_in_scope
:
600 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
601 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
602 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
605 case vect_used_by_reduction
:
606 case vect_used_only_live
:
607 relevant
= vect_used_in_outer_by_reduction
;
610 case vect_used_in_scope
:
611 relevant
= vect_used_in_outer
;
618 /* We are also not interested in uses on loop PHI backedges that are
619 inductions. Otherwise we'll needlessly vectorize the IV increment
620 and cause hybrid SLP for SLP inductions. Unless the PHI is live
622 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
623 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
624 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
625 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
626 loop_latch_edge (bb
->loop_father
))
629 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE
, vect_location
,
631 "induction value on backedge.\n");
632 return opt_result::success ();
636 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
637 return opt_result::success ();
641 /* Function vect_mark_stmts_to_be_vectorized.
643 Not all stmts in the loop need to be vectorized. For example:
652 Stmt 1 and 3 do not need to be vectorized, because loop control and
653 addressing of vectorized data-refs are handled differently.
655 This pass detects such stmts. */
658 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
660 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
661 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
662 unsigned int nbbs
= loop
->num_nodes
;
663 gimple_stmt_iterator si
;
667 enum vect_relevant relevant
;
669 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
671 auto_vec
<stmt_vec_info
, 64> worklist
;
673 /* 1. Init worklist. */
674 for (i
= 0; i
< nbbs
; i
++)
677 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
679 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
684 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
685 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
687 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
689 if (is_gimple_debug (gsi_stmt (si
)))
691 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
692 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE
, vect_location
,
694 "init: stmt relevant? %G", stmt_info
->stmt
);
696 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
697 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
701 /* 2. Process_worklist */
702 while (worklist
.length () > 0)
707 stmt_vec_info stmt_vinfo
= worklist
.pop ();
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE
, vect_location
,
710 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
712 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
713 (DEF_STMT) as relevant/irrelevant according to the relevance property
715 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
717 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
718 propagated as is to the DEF_STMTs of its USEs.
720 One exception is when STMT has been identified as defining a reduction
721 variable; in this case we set the relevance to vect_used_by_reduction.
722 This is because we distinguish between two kinds of relevant stmts -
723 those that are used by a reduction computation, and those that are
724 (also) used by a regular computation. This allows us later on to
725 identify stmts that are used solely by a reduction, and therefore the
726 order of the results that they produce does not have to be kept. */
728 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
730 case vect_reduction_def
:
731 gcc_assert (relevant
!= vect_unused_in_scope
);
732 if (relevant
!= vect_unused_in_scope
733 && relevant
!= vect_used_in_scope
734 && relevant
!= vect_used_by_reduction
735 && relevant
!= vect_used_only_live
)
736 return opt_result::failure_at
737 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
740 case vect_nested_cycle
:
741 if (relevant
!= vect_unused_in_scope
742 && relevant
!= vect_used_in_outer_by_reduction
743 && relevant
!= vect_used_in_outer
)
744 return opt_result::failure_at
745 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
748 case vect_double_reduction_def
:
749 if (relevant
!= vect_unused_in_scope
750 && relevant
!= vect_used_by_reduction
751 && relevant
!= vect_used_only_live
)
752 return opt_result::failure_at
753 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
760 if (is_pattern_stmt_p (stmt_vinfo
))
762 /* Pattern statements are not inserted into the code, so
763 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
764 have to scan the RHS or function arguments instead. */
765 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
767 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
768 tree op
= gimple_assign_rhs1 (assign
);
771 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
774 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
775 loop_vinfo
, relevant
, &worklist
, false);
778 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
779 loop_vinfo
, relevant
, &worklist
, false);
784 for (; i
< gimple_num_ops (assign
); i
++)
786 op
= gimple_op (assign
, i
);
787 if (TREE_CODE (op
) == SSA_NAME
)
790 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
797 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
799 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
801 tree arg
= gimple_call_arg (call
, i
);
803 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
811 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
813 tree op
= USE_FROM_PTR (use_p
);
815 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
821 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
823 gather_scatter_info gs_info
;
824 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
827 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
836 } /* while worklist */
838 return opt_result::success ();
841 /* Function vect_model_simple_cost.
843 Models cost for simple operations, i.e. those that only emit ncopies of a
844 single op. Right now, this does not account for multiple insns that could
845 be generated for the single vector op. We will handle that shortly. */
848 vect_model_simple_cost (vec_info
*,
849 stmt_vec_info stmt_info
, int ncopies
,
850 enum vect_def_type
*dt
,
853 stmt_vector_for_cost
*cost_vec
,
854 vect_cost_for_stmt kind
= vector_stmt
)
856 int inside_cost
= 0, prologue_cost
= 0;
858 gcc_assert (cost_vec
!= NULL
);
860 /* ??? Somehow we need to fix this at the callers. */
862 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
865 /* Cost the "broadcast" of a scalar operand in to a vector operand.
866 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
868 for (int i
= 0; i
< ndts
; i
++)
869 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
870 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
871 stmt_info
, 0, vect_prologue
);
873 /* Pass the inside-of-loop statements to the target-specific cost model. */
874 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
875 stmt_info
, 0, vect_body
);
877 if (dump_enabled_p ())
878 dump_printf_loc (MSG_NOTE
, vect_location
,
879 "vect_model_simple_cost: inside_cost = %d, "
880 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
884 /* Model cost for type demotion and promotion operations. PWR is
885 normally zero for single-step promotions and demotions. It will be
886 one if two-step promotion/demotion is required, and so on. NCOPIES
887 is the number of vector results (and thus number of instructions)
888 for the narrowest end of the operation chain. Each additional
889 step doubles the number of instructions required. If WIDEN_ARITH
890 is true the stmt is doing widening arithmetic. */
893 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
894 enum vect_def_type
*dt
,
895 unsigned int ncopies
, int pwr
,
896 stmt_vector_for_cost
*cost_vec
,
900 int inside_cost
= 0, prologue_cost
= 0;
902 for (i
= 0; i
< pwr
+ 1; i
++)
904 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
,
906 ? vector_stmt
: vec_promote_demote
,
907 stmt_info
, 0, vect_body
);
911 /* FORNOW: Assuming maximum 2 args per stmts. */
912 for (i
= 0; i
< 2; i
++)
913 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
914 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
915 stmt_info
, 0, vect_prologue
);
917 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE
, vect_location
,
919 "vect_model_promotion_demotion_cost: inside_cost = %d, "
920 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
923 /* Returns true if the current function returns DECL. */
926 cfun_returns (tree decl
)
930 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
932 greturn
*ret
= safe_dyn_cast
<greturn
*> (*gsi_last_bb (e
->src
));
935 if (gimple_return_retval (ret
) == decl
)
937 /* We often end up with an aggregate copy to the result decl,
938 handle that case as well. First skip intermediate clobbers
943 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
945 while (gimple_clobber_p (def
));
946 if (is_a
<gassign
*> (def
)
947 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
948 && gimple_assign_rhs1 (def
) == decl
)
954 /* Calculate cost of DR's memory access. */
956 vect_get_store_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
957 dr_alignment_support alignment_support_scheme
,
959 unsigned int *inside_cost
,
960 stmt_vector_for_cost
*body_cost_vec
)
962 switch (alignment_support_scheme
)
966 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
967 vector_store
, stmt_info
, 0,
970 if (dump_enabled_p ())
971 dump_printf_loc (MSG_NOTE
, vect_location
,
972 "vect_model_store_cost: aligned.\n");
976 case dr_unaligned_supported
:
978 /* Here, we assign an additional cost for the unaligned store. */
979 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
980 unaligned_store
, stmt_info
,
981 misalignment
, vect_body
);
982 if (dump_enabled_p ())
983 dump_printf_loc (MSG_NOTE
, vect_location
,
984 "vect_model_store_cost: unaligned supported by "
989 case dr_unaligned_unsupported
:
991 *inside_cost
= VECT_MAX_COST
;
993 if (dump_enabled_p ())
994 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
995 "vect_model_store_cost: unsupported access.\n");
1004 /* Calculate cost of DR's memory access. */
1006 vect_get_load_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1007 dr_alignment_support alignment_support_scheme
,
1009 bool add_realign_cost
, unsigned int *inside_cost
,
1010 unsigned int *prologue_cost
,
1011 stmt_vector_for_cost
*prologue_cost_vec
,
1012 stmt_vector_for_cost
*body_cost_vec
,
1013 bool record_prologue_costs
)
1015 switch (alignment_support_scheme
)
1019 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1020 stmt_info
, 0, vect_body
);
1022 if (dump_enabled_p ())
1023 dump_printf_loc (MSG_NOTE
, vect_location
,
1024 "vect_model_load_cost: aligned.\n");
1028 case dr_unaligned_supported
:
1030 /* Here, we assign an additional cost for the unaligned load. */
1031 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1032 unaligned_load
, stmt_info
,
1033 misalignment
, vect_body
);
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_NOTE
, vect_location
,
1037 "vect_model_load_cost: unaligned supported by "
1042 case dr_explicit_realign
:
1044 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1045 vector_load
, stmt_info
, 0, vect_body
);
1046 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1047 vec_perm
, stmt_info
, 0, vect_body
);
1049 /* FIXME: If the misalignment remains fixed across the iterations of
1050 the containing loop, the following cost should be added to the
1052 if (targetm
.vectorize
.builtin_mask_for_load
)
1053 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1054 stmt_info
, 0, vect_body
);
1056 if (dump_enabled_p ())
1057 dump_printf_loc (MSG_NOTE
, vect_location
,
1058 "vect_model_load_cost: explicit realign\n");
1062 case dr_explicit_realign_optimized
:
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_NOTE
, vect_location
,
1066 "vect_model_load_cost: unaligned software "
1069 /* Unaligned software pipeline has a load of an address, an initial
1070 load, and possibly a mask operation to "prime" the loop. However,
1071 if this is an access in a group of loads, which provide grouped
1072 access, then the above cost should only be considered for one
1073 access in the group. Inside the loop, there is a load op
1074 and a realignment op. */
1076 if (add_realign_cost
&& record_prologue_costs
)
1078 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1079 vector_stmt
, stmt_info
,
1081 if (targetm
.vectorize
.builtin_mask_for_load
)
1082 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1083 vector_stmt
, stmt_info
,
1087 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1088 stmt_info
, 0, vect_body
);
1089 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1090 stmt_info
, 0, vect_body
);
1092 if (dump_enabled_p ())
1093 dump_printf_loc (MSG_NOTE
, vect_location
,
1094 "vect_model_load_cost: explicit realign optimized"
1100 case dr_unaligned_unsupported
:
1102 *inside_cost
= VECT_MAX_COST
;
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1106 "vect_model_load_cost: unsupported access.\n");
1115 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1116 the loop preheader for the vectorized stmt STMT_VINFO. */
1119 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1120 gimple_stmt_iterator
*gsi
)
1123 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1125 vinfo
->insert_on_entry (stmt_vinfo
, new_stmt
);
1127 if (dump_enabled_p ())
1128 dump_printf_loc (MSG_NOTE
, vect_location
,
1129 "created new init_stmt: %G", new_stmt
);
1132 /* Function vect_init_vector.
1134 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1135 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1136 vector type a vector with all elements equal to VAL is created first.
1137 Place the initialization at GSI if it is not NULL. Otherwise, place the
1138 initialization at the loop preheader.
1139 Return the DEF of INIT_STMT.
1140 It will be used in the vectorization of STMT_INFO. */
1143 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1144 gimple_stmt_iterator
*gsi
)
1149 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1150 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1152 gcc_assert (VECTOR_TYPE_P (type
));
1153 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1155 /* Scalar boolean value should be transformed into
1156 all zeros or all ones value before building a vector. */
1157 if (VECTOR_BOOLEAN_TYPE_P (type
))
1159 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1160 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1162 if (CONSTANT_CLASS_P (val
))
1163 val
= integer_zerop (val
) ? false_val
: true_val
;
1166 new_temp
= make_ssa_name (TREE_TYPE (type
));
1167 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1168 val
, true_val
, false_val
);
1169 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1175 gimple_seq stmts
= NULL
;
1176 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1177 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1178 TREE_TYPE (type
), val
);
1180 /* ??? Condition vectorization expects us to do
1181 promotion of invariant/external defs. */
1182 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1183 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1184 !gsi_end_p (gsi2
); )
1186 init_stmt
= gsi_stmt (gsi2
);
1187 gsi_remove (&gsi2
, false);
1188 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1192 val
= build_vector_from_val (type
, val
);
1195 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1196 init_stmt
= gimple_build_assign (new_temp
, val
);
1197 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1202 /* Function vect_get_vec_defs_for_operand.
1204 OP is an operand in STMT_VINFO. This function returns a vector of
1205 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1207 In the case that OP is an SSA_NAME which is defined in the loop, then
1208 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1210 In case OP is an invariant or constant, a new stmt that creates a vector def
1211 needs to be introduced. VECTYPE may be used to specify a required type for
1212 vector invariant. */
1215 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1217 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1220 enum vect_def_type dt
;
1222 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE
, vect_location
,
1226 "vect_get_vec_defs_for_operand: %T\n", op
);
1228 stmt_vec_info def_stmt_info
;
1229 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1230 &def_stmt_info
, &def_stmt
);
1231 gcc_assert (is_simple_use
);
1232 if (def_stmt
&& dump_enabled_p ())
1233 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1235 vec_oprnds
->create (ncopies
);
1236 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1238 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1242 vector_type
= vectype
;
1243 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1244 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1245 vector_type
= truth_type_for (stmt_vectype
);
1247 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1249 gcc_assert (vector_type
);
1250 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1252 vec_oprnds
->quick_push (vop
);
1256 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1257 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1258 for (unsigned i
= 0; i
< ncopies
; ++i
)
1259 vec_oprnds
->quick_push (gimple_get_lhs
1260 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1265 /* Get vectorized definitions for OP0 and OP1. */
1268 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1270 tree op0
, vec
<tree
> *vec_oprnds0
, tree vectype0
,
1271 tree op1
, vec
<tree
> *vec_oprnds1
, tree vectype1
,
1272 tree op2
, vec
<tree
> *vec_oprnds2
, tree vectype2
,
1273 tree op3
, vec
<tree
> *vec_oprnds3
, tree vectype3
)
1278 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1280 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1282 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1284 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1289 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1290 op0
, vec_oprnds0
, vectype0
);
1292 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1293 op1
, vec_oprnds1
, vectype1
);
1295 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1296 op2
, vec_oprnds2
, vectype2
);
1298 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1299 op3
, vec_oprnds3
, vectype3
);
1304 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1306 tree op0
, vec
<tree
> *vec_oprnds0
,
1307 tree op1
, vec
<tree
> *vec_oprnds1
,
1308 tree op2
, vec
<tree
> *vec_oprnds2
,
1309 tree op3
, vec
<tree
> *vec_oprnds3
)
1311 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1312 op0
, vec_oprnds0
, NULL_TREE
,
1313 op1
, vec_oprnds1
, NULL_TREE
,
1314 op2
, vec_oprnds2
, NULL_TREE
,
1315 op3
, vec_oprnds3
, NULL_TREE
);
1318 /* Helper function called by vect_finish_replace_stmt and
1319 vect_finish_stmt_generation. Set the location of the new
1320 statement and create and return a stmt_vec_info for it. */
1323 vect_finish_stmt_generation_1 (vec_info
*,
1324 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1326 if (dump_enabled_p ())
1327 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1331 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1333 /* While EH edges will generally prevent vectorization, stmt might
1334 e.g. be in a must-not-throw region. Ensure newly created stmts
1335 that could throw are part of the same region. */
1336 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1337 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1338 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1341 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1344 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1345 which sets the same scalar result as STMT_INFO did. Create and return a
1346 stmt_vec_info for VEC_STMT. */
1349 vect_finish_replace_stmt (vec_info
*vinfo
,
1350 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1352 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1353 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1355 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1356 gsi_replace (&gsi
, vec_stmt
, true);
1358 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1361 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1362 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1365 vect_finish_stmt_generation (vec_info
*vinfo
,
1366 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1367 gimple_stmt_iterator
*gsi
)
1369 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1371 if (!gsi_end_p (*gsi
)
1372 && gimple_has_mem_ops (vec_stmt
))
1374 gimple
*at_stmt
= gsi_stmt (*gsi
);
1375 tree vuse
= gimple_vuse (at_stmt
);
1376 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1378 tree vdef
= gimple_vdef (at_stmt
);
1379 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1380 gimple_set_modified (vec_stmt
, true);
1381 /* If we have an SSA vuse and insert a store, update virtual
1382 SSA form to avoid triggering the renamer. Do so only
1383 if we can easily see all uses - which is what almost always
1384 happens with the way vectorized stmts are inserted. */
1385 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1386 && ((is_gimple_assign (vec_stmt
)
1387 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1388 || (is_gimple_call (vec_stmt
)
1389 && (!(gimple_call_flags (vec_stmt
)
1390 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
))
1391 || (gimple_call_lhs (vec_stmt
)
1392 && !is_gimple_reg (gimple_call_lhs (vec_stmt
)))))))
1394 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1395 gimple_set_vdef (vec_stmt
, new_vdef
);
1396 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1400 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1401 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1404 /* We want to vectorize a call to combined function CFN with function
1405 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1406 as the types of all inputs. Check whether this is possible using
1407 an internal function, returning its code if so or IFN_LAST if not. */
1410 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1411 tree vectype_out
, tree vectype_in
)
1414 if (internal_fn_p (cfn
))
1415 ifn
= as_internal_fn (cfn
);
1417 ifn
= associated_internal_fn (fndecl
);
1418 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1420 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1421 if (info
.vectorizable
)
1423 bool same_size_p
= TYPE_SIZE (vectype_in
) == TYPE_SIZE (vectype_out
);
1424 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1425 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1427 /* The type size of both the vectype_in and vectype_out should be
1428 exactly the same when vectype_out isn't participating the optab.
1429 While there is no restriction for type size when vectype_out
1430 is part of the optab query. */
1431 if (type0
!= vectype_out
&& type1
!= vectype_out
&& !same_size_p
)
1434 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1435 OPTIMIZE_FOR_SPEED
))
1443 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1444 gimple_stmt_iterator
*);
1446 /* Check whether a load or store statement in the loop described by
1447 LOOP_VINFO is possible in a loop using partial vectors. This is
1448 testing whether the vectorizer pass has the appropriate support,
1449 as well as whether the target does.
1451 VLS_TYPE says whether the statement is a load or store and VECTYPE
1452 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1453 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1454 says how the load or store is going to be implemented and GROUP_SIZE
1455 is the number of load or store statements in the containing group.
1456 If the access is a gather load or scatter store, GS_INFO describes
1457 its arguments. If the load or store is conditional, SCALAR_MASK is the
1458 condition under which it occurs.
1460 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1461 vectors is not supported, otherwise record the required rgroup control
1465 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo
, tree vectype
,
1467 vec_load_store_type vls_type
,
1469 vect_memory_access_type
1471 gather_scatter_info
*gs_info
,
1474 /* Invariant loads need no special support. */
1475 if (memory_access_type
== VMAT_INVARIANT
)
1478 unsigned int nvectors
;
1480 nvectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1482 nvectors
= vect_get_num_copies (loop_vinfo
, vectype
);
1484 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1485 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
1486 machine_mode vecmode
= TYPE_MODE (vectype
);
1487 bool is_load
= (vls_type
== VLS_LOAD
);
1488 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1491 = (is_load
? vect_load_lanes_supported (vectype
, group_size
, true)
1492 : vect_store_lanes_supported (vectype
, group_size
, true));
1493 if (ifn
== IFN_MASK_LEN_LOAD_LANES
|| ifn
== IFN_MASK_LEN_STORE_LANES
)
1494 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, 1);
1495 else if (ifn
== IFN_MASK_LOAD_LANES
|| ifn
== IFN_MASK_STORE_LANES
)
1496 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1500 if (dump_enabled_p ())
1501 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1502 "can't operate on partial vectors because"
1503 " the target doesn't have an appropriate"
1504 " load/store-lanes instruction.\n");
1505 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1510 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1512 internal_fn ifn
= (is_load
1513 ? IFN_MASK_GATHER_LOAD
1514 : IFN_MASK_SCATTER_STORE
);
1515 internal_fn len_ifn
= (is_load
1516 ? IFN_MASK_LEN_GATHER_LOAD
1517 : IFN_MASK_LEN_SCATTER_STORE
);
1518 if (internal_gather_scatter_fn_supported_p (len_ifn
, vectype
,
1519 gs_info
->memory_type
,
1520 gs_info
->offset_vectype
,
1522 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, 1);
1523 else if (internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1524 gs_info
->memory_type
,
1525 gs_info
->offset_vectype
,
1527 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1531 if (dump_enabled_p ())
1532 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1533 "can't operate on partial vectors because"
1534 " the target doesn't have an appropriate"
1535 " gather load or scatter store instruction.\n");
1536 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1541 if (memory_access_type
!= VMAT_CONTIGUOUS
1542 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1544 /* Element X of the data must come from iteration i * VF + X of the
1545 scalar loop. We need more work to support other mappings. */
1546 if (dump_enabled_p ())
1547 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1548 "can't operate on partial vectors because an"
1549 " access isn't contiguous.\n");
1550 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1554 if (!VECTOR_MODE_P (vecmode
))
1556 if (dump_enabled_p ())
1557 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1558 "can't operate on partial vectors when emulating"
1559 " vector operations.\n");
1560 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1564 /* We might load more scalars than we need for permuting SLP loads.
1565 We checked in get_group_load_store_type that the extra elements
1566 don't leak into a new vector. */
1567 auto group_memory_nvectors
= [](poly_uint64 size
, poly_uint64 nunits
)
1569 unsigned int nvectors
;
1570 if (can_div_away_from_zero_p (size
, nunits
, &nvectors
))
1575 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1576 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1577 machine_mode mask_mode
;
1579 bool using_partial_vectors_p
= false;
1580 if (get_len_load_store_mode (vecmode
, is_load
).exists (&vmode
))
1582 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1583 unsigned factor
= (vecmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vecmode
);
1584 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, factor
);
1585 using_partial_vectors_p
= true;
1587 else if (targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1588 && can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1590 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1591 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1592 using_partial_vectors_p
= true;
1595 if (!using_partial_vectors_p
)
1597 if (dump_enabled_p ())
1598 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1599 "can't operate on partial vectors because the"
1600 " target doesn't have the appropriate partial"
1601 " vectorization load or store.\n");
1602 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1606 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1607 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1608 that needs to be applied to all loads and stores in a vectorized loop.
1609 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1610 otherwise return VEC_MASK & LOOP_MASK.
1612 MASK_TYPE is the type of both masks. If new statements are needed,
1613 insert them before GSI. */
1616 prepare_vec_mask (loop_vec_info loop_vinfo
, tree mask_type
, tree loop_mask
,
1617 tree vec_mask
, gimple_stmt_iterator
*gsi
)
1619 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1623 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1625 if (loop_vinfo
->vec_cond_masked_set
.contains ({ vec_mask
, loop_mask
}))
1628 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1629 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1630 vec_mask
, loop_mask
);
1632 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1636 /* Determine whether we can use a gather load or scatter store to vectorize
1637 strided load or store STMT_INFO by truncating the current offset to a
1638 smaller width. We need to be able to construct an offset vector:
1640 { 0, X, X*2, X*3, ... }
1642 without loss of precision, where X is STMT_INFO's DR_STEP.
1644 Return true if this is possible, describing the gather load or scatter
1645 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1648 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1649 loop_vec_info loop_vinfo
, bool masked_p
,
1650 gather_scatter_info
*gs_info
)
1652 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1653 data_reference
*dr
= dr_info
->dr
;
1654 tree step
= DR_STEP (dr
);
1655 if (TREE_CODE (step
) != INTEGER_CST
)
1657 /* ??? Perhaps we could use range information here? */
1658 if (dump_enabled_p ())
1659 dump_printf_loc (MSG_NOTE
, vect_location
,
1660 "cannot truncate variable step.\n");
1664 /* Get the number of bits in an element. */
1665 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1666 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1667 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1669 /* Set COUNT to the upper limit on the number of elements - 1.
1670 Start with the maximum vectorization factor. */
1671 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1673 /* Try lowering COUNT to the number of scalar latch iterations. */
1674 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1675 widest_int max_iters
;
1676 if (max_loop_iterations (loop
, &max_iters
)
1677 && max_iters
< count
)
1678 count
= max_iters
.to_shwi ();
1680 /* Try scales of 1 and the element size. */
1681 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1682 wi::overflow_type overflow
= wi::OVF_NONE
;
1683 for (int i
= 0; i
< 2; ++i
)
1685 int scale
= scales
[i
];
1687 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1690 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1691 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1694 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1695 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1697 /* Find the narrowest viable offset type. */
1698 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1699 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1702 /* See whether the target supports the operation with an offset
1703 no narrower than OFFSET_TYPE. */
1704 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1705 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1706 vectype
, memory_type
, offset_type
, scale
,
1707 &gs_info
->ifn
, &gs_info
->offset_vectype
)
1708 || gs_info
->ifn
== IFN_LAST
)
1711 gs_info
->decl
= NULL_TREE
;
1712 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1713 but we don't need to store that here. */
1714 gs_info
->base
= NULL_TREE
;
1715 gs_info
->element_type
= TREE_TYPE (vectype
);
1716 gs_info
->offset
= fold_convert (offset_type
, step
);
1717 gs_info
->offset_dt
= vect_constant_def
;
1718 gs_info
->scale
= scale
;
1719 gs_info
->memory_type
= memory_type
;
1723 if (overflow
&& dump_enabled_p ())
1724 dump_printf_loc (MSG_NOTE
, vect_location
,
1725 "truncating gather/scatter offset to %d bits"
1726 " might change its value.\n", element_bits
);
1731 /* Return true if we can use gather/scatter internal functions to
1732 vectorize STMT_INFO, which is a grouped or strided load or store.
1733 MASKED_P is true if load or store is conditional. When returning
1734 true, fill in GS_INFO with the information required to perform the
1738 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1739 loop_vec_info loop_vinfo
, bool masked_p
,
1740 gather_scatter_info
*gs_info
)
1742 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1743 || gs_info
->ifn
== IFN_LAST
)
1744 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1747 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1748 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1750 gcc_assert (TYPE_PRECISION (new_offset_type
)
1751 >= TYPE_PRECISION (old_offset_type
));
1752 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1754 if (dump_enabled_p ())
1755 dump_printf_loc (MSG_NOTE
, vect_location
,
1756 "using gather/scatter for strided/grouped access,"
1757 " scale = %d\n", gs_info
->scale
);
1762 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1763 elements with a known constant step. Return -1 if that step
1764 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1767 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
1769 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1770 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
1774 /* If the target supports a permute mask that reverses the elements in
1775 a vector of type VECTYPE, return that mask, otherwise return null. */
1778 perm_mask_for_reverse (tree vectype
)
1780 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1782 /* The encoding has a single stepped pattern. */
1783 vec_perm_builder
sel (nunits
, 1, 3);
1784 for (int i
= 0; i
< 3; ++i
)
1785 sel
.quick_push (nunits
- 1 - i
);
1787 vec_perm_indices
indices (sel
, 1, nunits
);
1788 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), TYPE_MODE (vectype
),
1791 return vect_gen_perm_mask_checked (vectype
, indices
);
1794 /* A subroutine of get_load_store_type, with a subset of the same
1795 arguments. Handle the case where STMT_INFO is a load or store that
1796 accesses consecutive elements with a negative step. Sets *POFFSET
1797 to the offset to be applied to the DR for the first access. */
1799 static vect_memory_access_type
1800 get_negative_load_store_type (vec_info
*vinfo
,
1801 stmt_vec_info stmt_info
, tree vectype
,
1802 vec_load_store_type vls_type
,
1803 unsigned int ncopies
, poly_int64
*poffset
)
1805 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1806 dr_alignment_support alignment_support_scheme
;
1810 if (dump_enabled_p ())
1811 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1812 "multiple types with negative step.\n");
1813 return VMAT_ELEMENTWISE
;
1816 /* For backward running DRs the first access in vectype actually is
1817 N-1 elements before the address of the DR. */
1818 *poffset
= ((-TYPE_VECTOR_SUBPARTS (vectype
) + 1)
1819 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype
))));
1821 int misalignment
= dr_misalignment (dr_info
, vectype
, *poffset
);
1822 alignment_support_scheme
1823 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
, misalignment
);
1824 if (alignment_support_scheme
!= dr_aligned
1825 && alignment_support_scheme
!= dr_unaligned_supported
)
1827 if (dump_enabled_p ())
1828 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1829 "negative step but alignment required.\n");
1831 return VMAT_ELEMENTWISE
;
1834 if (vls_type
== VLS_STORE_INVARIANT
)
1836 if (dump_enabled_p ())
1837 dump_printf_loc (MSG_NOTE
, vect_location
,
1838 "negative step with invariant source;"
1839 " no permute needed.\n");
1840 return VMAT_CONTIGUOUS_DOWN
;
1843 if (!perm_mask_for_reverse (vectype
))
1845 if (dump_enabled_p ())
1846 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1847 "negative step and reversing not supported.\n");
1849 return VMAT_ELEMENTWISE
;
1852 return VMAT_CONTIGUOUS_REVERSE
;
1855 /* STMT_INFO is either a masked or unconditional store. Return the value
1859 vect_get_store_rhs (stmt_vec_info stmt_info
)
1861 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1863 gcc_assert (gimple_assign_single_p (assign
));
1864 return gimple_assign_rhs1 (assign
);
1866 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1868 internal_fn ifn
= gimple_call_internal_fn (call
);
1869 int index
= internal_fn_stored_value_index (ifn
);
1870 gcc_assert (index
>= 0);
1871 return gimple_call_arg (call
, index
);
1876 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1878 This function returns a vector type which can be composed with NETLS pieces,
1879 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1880 same vector size as the return vector. It checks target whether supports
1881 pieces-size vector mode for construction firstly, if target fails to, check
1882 pieces-size scalar mode for construction further. It returns NULL_TREE if
1883 fails to find the available composition.
1885 For example, for (vtype=V16QI, nelts=4), we can probably get:
1886 - V16QI with PTYPE V4QI.
1887 - V4SI with PTYPE SI.
1891 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
1893 gcc_assert (VECTOR_TYPE_P (vtype
));
1894 gcc_assert (known_gt (nelts
, 0U));
1896 machine_mode vmode
= TYPE_MODE (vtype
);
1897 if (!VECTOR_MODE_P (vmode
))
1900 /* When we are asked to compose the vector from its components let
1901 that happen directly. */
1902 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype
), nelts
))
1904 *ptype
= TREE_TYPE (vtype
);
1908 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
1909 unsigned int pbsize
;
1910 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
1912 /* First check if vec_init optab supports construction from
1913 vector pieces directly. */
1914 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
1915 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
1917 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
1918 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
1919 != CODE_FOR_nothing
))
1921 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
1925 /* Otherwise check if exists an integer type of the same piece size and
1926 if vec_init optab supports construction from it directly. */
1927 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
1928 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
1929 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
1930 != CODE_FOR_nothing
))
1932 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
1933 return build_vector_type (*ptype
, nelts
);
1940 /* A subroutine of get_load_store_type, with a subset of the same
1941 arguments. Handle the case where STMT_INFO is part of a grouped load
1944 For stores, the statements in the group are all consecutive
1945 and there is no gap at the end. For loads, the statements in the
1946 group might not be consecutive; there can be gaps between statements
1947 as well as at the end. */
1950 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
1951 tree vectype
, slp_tree slp_node
,
1952 bool masked_p
, vec_load_store_type vls_type
,
1953 vect_memory_access_type
*memory_access_type
,
1954 poly_int64
*poffset
,
1955 dr_alignment_support
*alignment_support_scheme
,
1957 gather_scatter_info
*gs_info
,
1958 internal_fn
*lanes_ifn
)
1960 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1961 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
1962 stmt_vec_info first_stmt_info
;
1963 unsigned int group_size
;
1964 unsigned HOST_WIDE_INT gap
;
1965 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1967 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1968 group_size
= DR_GROUP_SIZE (first_stmt_info
);
1969 gap
= DR_GROUP_GAP (first_stmt_info
);
1973 first_stmt_info
= stmt_info
;
1977 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
1978 bool single_element_p
= (stmt_info
== first_stmt_info
1979 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
1980 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1982 /* True if the vectorized statements would access beyond the last
1983 statement in the group. */
1984 bool overrun_p
= false;
1986 /* True if we can cope with such overrun by peeling for gaps, so that
1987 there is at least one final scalar iteration after the vector loop. */
1988 bool can_overrun_p
= (!masked_p
1989 && vls_type
== VLS_LOAD
1993 /* There can only be a gap at the end of the group if the stride is
1994 known at compile time. */
1995 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
1997 /* Stores can't yet have gaps. */
1998 gcc_assert (slp_node
|| vls_type
== VLS_LOAD
|| gap
== 0);
2002 /* For SLP vectorization we directly vectorize a subchain
2003 without permutation. */
2004 if (! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
2006 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node
)[0]);
2007 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2009 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2010 separated by the stride, until we have a complete vector.
2011 Fall back to scalar accesses if that isn't possible. */
2012 if (multiple_p (nunits
, group_size
))
2013 *memory_access_type
= VMAT_STRIDED_SLP
;
2015 *memory_access_type
= VMAT_ELEMENTWISE
;
2019 overrun_p
= loop_vinfo
&& gap
!= 0;
2020 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2022 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2023 "Grouped store with gaps requires"
2024 " non-consecutive accesses\n");
2027 /* An overrun is fine if the trailing elements are smaller
2028 than the alignment boundary B. Every vector access will
2029 be a multiple of B and so we are guaranteed to access a
2030 non-gap element in the same B-sized block. */
2032 && gap
< (vect_known_alignment_in_bytes (first_dr_info
,
2034 / vect_get_scalar_dr_size (first_dr_info
)))
2037 /* If the gap splits the vector in half and the target
2038 can do half-vector operations avoid the epilogue peeling
2039 by simply loading half of the vector only. Usually
2040 the construction with an upper zero half will be elided. */
2041 dr_alignment_support alss
;
2042 int misalign
= dr_misalignment (first_dr_info
, vectype
);
2046 && (((alss
= vect_supportable_dr_alignment (vinfo
, first_dr_info
,
2047 vectype
, misalign
)))
2049 || alss
== dr_unaligned_supported
)
2050 && known_eq (nunits
, (group_size
- gap
) * 2)
2051 && known_eq (nunits
, group_size
)
2052 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2056 if (overrun_p
&& !can_overrun_p
)
2058 if (dump_enabled_p ())
2059 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2060 "Peeling for outer loop is not supported\n");
2063 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2066 if (single_element_p
)
2067 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2068 only correct for single element "interleaving" SLP. */
2069 *memory_access_type
= get_negative_load_store_type
2070 (vinfo
, stmt_info
, vectype
, vls_type
, 1, poffset
);
2073 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2074 separated by the stride, until we have a complete vector.
2075 Fall back to scalar accesses if that isn't possible. */
2076 if (multiple_p (nunits
, group_size
))
2077 *memory_access_type
= VMAT_STRIDED_SLP
;
2079 *memory_access_type
= VMAT_ELEMENTWISE
;
2082 else if (cmp
== 0 && loop_vinfo
)
2084 gcc_assert (vls_type
== VLS_LOAD
);
2085 *memory_access_type
= VMAT_INVARIANT
;
2086 /* Invariant accesses perform only component accesses, alignment
2087 is irrelevant for them. */
2088 *alignment_support_scheme
= dr_unaligned_supported
;
2091 *memory_access_type
= VMAT_CONTIGUOUS
;
2093 /* When we have a contiguous access across loop iterations
2094 but the access in the loop doesn't cover the full vector
2095 we can end up with no gap recorded but still excess
2096 elements accessed, see PR103116. Make sure we peel for
2097 gaps if necessary and sufficient and give up if not.
2099 If there is a combination of the access not covering the full
2100 vector and a gap recorded then we may need to peel twice. */
2102 && *memory_access_type
== VMAT_CONTIGUOUS
2103 && SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
2104 && !multiple_p (group_size
* LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
2107 unsigned HOST_WIDE_INT cnunits
, cvf
;
2109 || !nunits
.is_constant (&cnunits
)
2110 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&cvf
)
2111 /* Peeling for gaps assumes that a single scalar iteration
2112 is enough to make sure the last vector iteration doesn't
2113 access excess elements.
2114 ??? Enhancements include peeling multiple iterations
2115 or using masked loads with a static mask. */
2116 || (group_size
* cvf
) % cnunits
+ group_size
- gap
< cnunits
)
2118 if (dump_enabled_p ())
2119 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2120 "peeling for gaps insufficient for "
2130 /* We can always handle this case using elementwise accesses,
2131 but see if something more efficient is available. */
2132 *memory_access_type
= VMAT_ELEMENTWISE
;
2134 /* If there is a gap at the end of the group then these optimizations
2135 would access excess elements in the last iteration. */
2136 bool would_overrun_p
= (gap
!= 0);
2137 /* An overrun is fine if the trailing elements are smaller than the
2138 alignment boundary B. Every vector access will be a multiple of B
2139 and so we are guaranteed to access a non-gap element in the
2140 same B-sized block. */
2143 && gap
< (vect_known_alignment_in_bytes (first_dr_info
, vectype
)
2144 / vect_get_scalar_dr_size (first_dr_info
)))
2145 would_overrun_p
= false;
2147 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2148 && (can_overrun_p
|| !would_overrun_p
)
2149 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2151 /* First cope with the degenerate case of a single-element
2153 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2158 /* Otherwise try using LOAD/STORE_LANES. */
2160 = vls_type
== VLS_LOAD
2161 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2162 : vect_store_lanes_supported (vectype
, group_size
,
2164 if (*lanes_ifn
!= IFN_LAST
)
2166 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2167 overrun_p
= would_overrun_p
;
2170 /* If that fails, try using permuting loads. */
2171 else if (vls_type
== VLS_LOAD
2172 ? vect_grouped_load_supported (vectype
,
2175 : vect_grouped_store_supported (vectype
, group_size
))
2177 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2178 overrun_p
= would_overrun_p
;
2183 /* As a last resort, trying using a gather load or scatter store.
2185 ??? Although the code can handle all group sizes correctly,
2186 it probably isn't a win to use separate strided accesses based
2187 on nearby locations. Or, even if it's a win over scalar code,
2188 it might not be a win over vectorizing at a lower VF, if that
2189 allows us to use contiguous accesses. */
2190 if (*memory_access_type
== VMAT_ELEMENTWISE
2193 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2195 *memory_access_type
= VMAT_GATHER_SCATTER
;
2198 if (*memory_access_type
== VMAT_GATHER_SCATTER
2199 || *memory_access_type
== VMAT_ELEMENTWISE
)
2201 *alignment_support_scheme
= dr_unaligned_supported
;
2202 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2206 *misalignment
= dr_misalignment (first_dr_info
, vectype
, *poffset
);
2207 *alignment_support_scheme
2208 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, vectype
,
2212 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2214 /* STMT is the leader of the group. Check the operands of all the
2215 stmts of the group. */
2216 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2217 while (next_stmt_info
)
2219 tree op
= vect_get_store_rhs (next_stmt_info
);
2220 enum vect_def_type dt
;
2221 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2223 if (dump_enabled_p ())
2224 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2225 "use not simple.\n");
2228 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2234 gcc_assert (can_overrun_p
);
2235 if (dump_enabled_p ())
2236 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2237 "Data access with gaps requires scalar "
2239 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2245 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2246 if there is a memory access type that the vectorized form can use,
2247 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2248 or scatters, fill in GS_INFO accordingly. In addition
2249 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2250 the target does not support the alignment scheme. *MISALIGNMENT
2251 is set according to the alignment of the access (including
2252 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2254 SLP says whether we're performing SLP rather than loop vectorization.
2255 MASKED_P is true if the statement is conditional on a vectorized mask.
2256 VECTYPE is the vector type that the vectorized statements will use.
2257 NCOPIES is the number of vector statements that will be needed. */
2260 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2261 tree vectype
, slp_tree slp_node
,
2262 bool masked_p
, vec_load_store_type vls_type
,
2263 unsigned int ncopies
,
2264 vect_memory_access_type
*memory_access_type
,
2265 poly_int64
*poffset
,
2266 dr_alignment_support
*alignment_support_scheme
,
2268 gather_scatter_info
*gs_info
,
2269 internal_fn
*lanes_ifn
)
2271 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2272 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2273 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2275 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2277 *memory_access_type
= VMAT_GATHER_SCATTER
;
2278 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2280 /* When using internal functions, we rely on pattern recognition
2281 to convert the type of the offset to the type that the target
2282 requires, with the result being a call to an internal function.
2283 If that failed for some reason (e.g. because another pattern
2284 took priority), just handle cases in which the offset already
2285 has the right type. */
2286 else if (gs_info
->ifn
!= IFN_LAST
2287 && !is_gimple_call (stmt_info
->stmt
)
2288 && !tree_nop_conversion_p (TREE_TYPE (gs_info
->offset
),
2289 TREE_TYPE (gs_info
->offset_vectype
)))
2291 if (dump_enabled_p ())
2292 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2293 "%s offset requires a conversion\n",
2294 vls_type
== VLS_LOAD
? "gather" : "scatter");
2297 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2298 &gs_info
->offset_dt
,
2299 &gs_info
->offset_vectype
))
2301 if (dump_enabled_p ())
2302 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2303 "%s index use not simple.\n",
2304 vls_type
== VLS_LOAD
? "gather" : "scatter");
2307 else if (gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
2309 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant ()
2310 || !TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
).is_constant ()
2311 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2312 (gs_info
->offset_vectype
),
2313 TYPE_VECTOR_SUBPARTS (vectype
)))
2315 if (dump_enabled_p ())
2316 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2317 "unsupported vector types for emulated "
2322 /* Gather-scatter accesses perform only component accesses, alignment
2323 is irrelevant for them. */
2324 *alignment_support_scheme
= dr_unaligned_supported
;
2326 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) || slp_node
)
2328 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
,
2330 vls_type
, memory_access_type
, poffset
,
2331 alignment_support_scheme
,
2332 misalignment
, gs_info
, lanes_ifn
))
2335 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2337 gcc_assert (!slp_node
);
2339 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2341 *memory_access_type
= VMAT_GATHER_SCATTER
;
2343 *memory_access_type
= VMAT_ELEMENTWISE
;
2344 /* Alignment is irrelevant here. */
2345 *alignment_support_scheme
= dr_unaligned_supported
;
2349 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2352 gcc_assert (vls_type
== VLS_LOAD
);
2353 *memory_access_type
= VMAT_INVARIANT
;
2354 /* Invariant accesses perform only component accesses, alignment
2355 is irrelevant for them. */
2356 *alignment_support_scheme
= dr_unaligned_supported
;
2361 *memory_access_type
= get_negative_load_store_type
2362 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
, poffset
);
2364 *memory_access_type
= VMAT_CONTIGUOUS
;
2365 *misalignment
= dr_misalignment (STMT_VINFO_DR_INFO (stmt_info
),
2367 *alignment_support_scheme
2368 = vect_supportable_dr_alignment (vinfo
,
2369 STMT_VINFO_DR_INFO (stmt_info
),
2370 vectype
, *misalignment
);
2374 if ((*memory_access_type
== VMAT_ELEMENTWISE
2375 || *memory_access_type
== VMAT_STRIDED_SLP
)
2376 && !nunits
.is_constant ())
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2380 "Not using elementwise accesses due to variable "
2381 "vectorization factor.\n");
2385 if (*alignment_support_scheme
== dr_unaligned_unsupported
)
2387 if (dump_enabled_p ())
2388 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2389 "unsupported unaligned access\n");
2393 /* FIXME: At the moment the cost model seems to underestimate the
2394 cost of using elementwise accesses. This check preserves the
2395 traditional behavior until that can be fixed. */
2396 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2397 if (!first_stmt_info
)
2398 first_stmt_info
= stmt_info
;
2399 if (*memory_access_type
== VMAT_ELEMENTWISE
2400 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2401 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2402 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2403 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2407 "not falling back to elementwise accesses\n");
2413 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2414 conditional operation STMT_INFO. When returning true, store the mask
2415 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2416 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2417 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2420 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2421 slp_tree slp_node
, unsigned mask_index
,
2422 tree
*mask
, slp_tree
*mask_node
,
2423 vect_def_type
*mask_dt_out
, tree
*mask_vectype_out
)
2425 enum vect_def_type mask_dt
;
2427 slp_tree mask_node_1
;
2428 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, mask_index
,
2429 mask
, &mask_node_1
, &mask_dt
, &mask_vectype
))
2431 if (dump_enabled_p ())
2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2433 "mask use not simple.\n");
2437 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask
)))
2439 if (dump_enabled_p ())
2440 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2441 "mask argument is not a boolean.\n");
2445 /* If the caller is not prepared for adjusting an external/constant
2446 SLP mask vector type fail. */
2449 && SLP_TREE_DEF_TYPE (mask_node_1
) != vect_internal_def
)
2451 if (dump_enabled_p ())
2452 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2453 "SLP mask argument is not vectorized.\n");
2457 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2459 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
),
2462 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2464 if (dump_enabled_p ())
2465 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2466 "could not find an appropriate vector mask type.\n");
2470 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2471 TYPE_VECTOR_SUBPARTS (vectype
)))
2473 if (dump_enabled_p ())
2474 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2475 "vector mask type %T"
2476 " does not match vector data type %T.\n",
2477 mask_vectype
, vectype
);
2482 *mask_dt_out
= mask_dt
;
2483 *mask_vectype_out
= mask_vectype
;
2485 *mask_node
= mask_node_1
;
2489 /* Return true if stored value is suitable for vectorizing store
2490 statement STMT_INFO. When returning true, store the scalar stored
2491 in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
2492 the type of the vectorized store value in
2493 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2496 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2497 slp_tree slp_node
, tree
*rhs
, slp_tree
*rhs_node
,
2498 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2499 vec_load_store_type
*vls_type_out
)
2502 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2504 if (gimple_call_internal_p (call
)
2505 && internal_store_fn_p (gimple_call_internal_fn (call
)))
2506 op_no
= internal_fn_stored_value_index (gimple_call_internal_fn (call
));
2509 op_no
= vect_slp_child_index_for_operand
2510 (stmt_info
->stmt
, op_no
, STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
2512 enum vect_def_type rhs_dt
;
2514 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, op_no
,
2515 rhs
, rhs_node
, &rhs_dt
, &rhs_vectype
))
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2519 "use not simple.\n");
2523 /* In the case this is a store from a constant make sure
2524 native_encode_expr can handle it. */
2525 if (CONSTANT_CLASS_P (*rhs
) && native_encode_expr (*rhs
, NULL
, 64) == 0)
2527 if (dump_enabled_p ())
2528 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2529 "cannot encode constant as a byte sequence.\n");
2533 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2534 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2536 if (dump_enabled_p ())
2537 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2538 "incompatible vector types.\n");
2542 *rhs_dt_out
= rhs_dt
;
2543 *rhs_vectype_out
= rhs_vectype
;
2544 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2545 *vls_type_out
= VLS_STORE_INVARIANT
;
2547 *vls_type_out
= VLS_STORE
;
2551 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2552 Note that we support masks with floating-point type, in which case the
2553 floats are interpreted as a bitmask. */
2556 vect_build_all_ones_mask (vec_info
*vinfo
,
2557 stmt_vec_info stmt_info
, tree masktype
)
2559 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2560 return build_int_cst (masktype
, -1);
2561 else if (VECTOR_BOOLEAN_TYPE_P (masktype
)
2562 || TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2564 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2565 mask
= build_vector_from_val (masktype
, mask
);
2566 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2568 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2572 for (int j
= 0; j
< 6; ++j
)
2574 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2575 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2576 mask
= build_vector_from_val (masktype
, mask
);
2577 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2582 /* Build an all-zero merge value of type VECTYPE while vectorizing
2583 STMT_INFO as a gather load. */
2586 vect_build_zero_merge_argument (vec_info
*vinfo
,
2587 stmt_vec_info stmt_info
, tree vectype
)
2590 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2591 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2592 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2596 for (int j
= 0; j
< 6; ++j
)
2598 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2599 merge
= build_real (TREE_TYPE (vectype
), r
);
2603 merge
= build_vector_from_val (vectype
, merge
);
2604 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2607 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2608 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2609 the gather load operation. If the load is conditional, MASK is the
2610 vectorized condition, otherwise MASK is null. PTR is the base
2611 pointer and OFFSET is the vectorized offset. */
2614 vect_build_one_gather_load_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2615 gimple_stmt_iterator
*gsi
,
2616 gather_scatter_info
*gs_info
,
2617 tree ptr
, tree offset
, tree mask
)
2619 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2620 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2621 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2622 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2623 /* ptrtype */ arglist
= TREE_CHAIN (arglist
);
2624 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2625 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2626 tree scaletype
= TREE_VALUE (arglist
);
2628 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2630 || TREE_CODE (masktype
) == INTEGER_TYPE
2631 || types_compatible_p (srctype
, masktype
)));
2634 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2636 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2637 TYPE_VECTOR_SUBPARTS (idxtype
)));
2638 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2639 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2640 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2641 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2645 tree src_op
= NULL_TREE
;
2646 tree mask_op
= NULL_TREE
;
2649 if (!useless_type_conversion_p (masktype
, TREE_TYPE (mask
)))
2651 tree utype
, optype
= TREE_TYPE (mask
);
2652 if (VECTOR_TYPE_P (masktype
)
2653 || TYPE_MODE (masktype
) == TYPE_MODE (optype
))
2656 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2657 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2658 tree mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask
);
2660 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2661 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2663 if (!useless_type_conversion_p (masktype
, utype
))
2665 gcc_assert (TYPE_PRECISION (utype
)
2666 <= TYPE_PRECISION (masktype
));
2667 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
2668 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2669 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2672 src_op
= build_zero_cst (srctype
);
2683 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2684 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2687 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2688 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2691 if (!useless_type_conversion_p (vectype
, rettype
))
2693 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2694 TYPE_VECTOR_SUBPARTS (rettype
)));
2695 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2696 gimple_call_set_lhs (new_stmt
, op
);
2697 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2698 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2699 new_stmt
= gimple_build_assign (NULL_TREE
, VIEW_CONVERT_EXPR
, op
);
2705 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2706 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2707 the scatter store operation. If the store is conditional, MASK is the
2708 unvectorized condition, otherwise MASK is null. */
2711 vect_build_scatter_store_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2712 gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2713 gather_scatter_info
*gs_info
, tree mask
,
2714 stmt_vector_for_cost
*cost_vec
)
2716 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2717 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2718 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2719 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2720 enum { NARROW
, NONE
, WIDEN
} modifier
;
2721 poly_uint64 scatter_off_nunits
2722 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2724 /* FIXME: Keep the previous costing way in vect_model_store_cost by
2725 costing N scalar stores, but it should be tweaked to use target
2726 specific costs on related scatter store calls. */
2729 tree op
= vect_get_store_rhs (stmt_info
);
2730 enum vect_def_type dt
;
2731 gcc_assert (vect_is_simple_use (op
, vinfo
, &dt
));
2732 unsigned int inside_cost
, prologue_cost
= 0;
2733 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
2734 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
2735 stmt_info
, 0, vect_prologue
);
2736 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
2737 inside_cost
= record_stmt_cost (cost_vec
, ncopies
* assumed_nunits
,
2738 scalar_store
, stmt_info
, 0, vect_body
);
2740 if (dump_enabled_p ())
2741 dump_printf_loc (MSG_NOTE
, vect_location
,
2742 "vect_model_store_cost: inside_cost = %d, "
2743 "prologue_cost = %d .\n",
2744 inside_cost
, prologue_cost
);
2748 tree perm_mask
= NULL_TREE
, mask_halfvectype
= NULL_TREE
;
2749 if (known_eq (nunits
, scatter_off_nunits
))
2751 else if (known_eq (nunits
* 2, scatter_off_nunits
))
2755 /* Currently gathers and scatters are only supported for
2756 fixed-length vectors. */
2757 unsigned int count
= scatter_off_nunits
.to_constant ();
2758 vec_perm_builder
sel (count
, count
, 1);
2759 for (unsigned i
= 0; i
< (unsigned int) count
; ++i
)
2760 sel
.quick_push (i
| (count
/ 2));
2762 vec_perm_indices
indices (sel
, 1, count
);
2763 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
, indices
);
2764 gcc_assert (perm_mask
!= NULL_TREE
);
2766 else if (known_eq (nunits
, scatter_off_nunits
* 2))
2770 /* Currently gathers and scatters are only supported for
2771 fixed-length vectors. */
2772 unsigned int count
= nunits
.to_constant ();
2773 vec_perm_builder
sel (count
, count
, 1);
2774 for (unsigned i
= 0; i
< (unsigned int) count
; ++i
)
2775 sel
.quick_push (i
| (count
/ 2));
2777 vec_perm_indices
indices (sel
, 2, count
);
2778 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2779 gcc_assert (perm_mask
!= NULL_TREE
);
2783 mask_halfvectype
= truth_type_for (gs_info
->offset_vectype
);
2788 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2789 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2790 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2791 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2792 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2793 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2794 tree scaletype
= TREE_VALUE (arglist
);
2796 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
2797 && TREE_CODE (rettype
) == VOID_TYPE
);
2799 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2800 if (!is_gimple_min_invariant (ptr
))
2803 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2804 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2805 edge pe
= loop_preheader_edge (loop
);
2806 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2807 gcc_assert (!new_bb
);
2810 tree mask_arg
= NULL_TREE
;
2811 if (mask
== NULL_TREE
)
2813 mask_arg
= build_int_cst (masktype
, -1);
2814 mask_arg
= vect_init_vector (vinfo
, stmt_info
, mask_arg
, masktype
, NULL
);
2817 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2819 auto_vec
<tree
> vec_oprnds0
;
2820 auto_vec
<tree
> vec_oprnds1
;
2821 auto_vec
<tree
> vec_masks
;
2824 tree mask_vectype
= truth_type_for (vectype
);
2825 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2826 modifier
== NARROW
? ncopies
/ 2 : ncopies
,
2827 mask
, &vec_masks
, mask_vectype
);
2829 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2830 modifier
== WIDEN
? ncopies
/ 2 : ncopies
,
2831 gs_info
->offset
, &vec_oprnds0
);
2832 tree op
= vect_get_store_rhs (stmt_info
);
2833 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2834 modifier
== NARROW
? ncopies
/ 2 : ncopies
, op
,
2837 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2838 tree mask_op
= NULL_TREE
;
2840 for (int j
= 0; j
< ncopies
; ++j
)
2842 if (modifier
== WIDEN
)
2845 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
, perm_mask
,
2848 op
= vec_oprnd0
= vec_oprnds0
[j
/ 2];
2849 src
= vec_oprnd1
= vec_oprnds1
[j
];
2851 mask_op
= vec_mask
= vec_masks
[j
];
2853 else if (modifier
== NARROW
)
2856 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
2857 perm_mask
, stmt_info
, gsi
);
2859 src
= vec_oprnd1
= vec_oprnds1
[j
/ 2];
2860 op
= vec_oprnd0
= vec_oprnds0
[j
];
2862 mask_op
= vec_mask
= vec_masks
[j
/ 2];
2866 op
= vec_oprnd0
= vec_oprnds0
[j
];
2867 src
= vec_oprnd1
= vec_oprnds1
[j
];
2869 mask_op
= vec_mask
= vec_masks
[j
];
2872 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
2874 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
2875 TYPE_VECTOR_SUBPARTS (srctype
)));
2876 tree var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
2877 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
2878 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
2879 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2883 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2885 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2886 TYPE_VECTOR_SUBPARTS (idxtype
)));
2887 tree var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2888 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2889 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2890 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2898 if (modifier
== NARROW
)
2901 = vect_get_new_ssa_name (mask_halfvectype
, vect_simple_var
);
2903 = gimple_build_assign (var
,
2904 (j
& 1) ? VEC_UNPACK_HI_EXPR
2905 : VEC_UNPACK_LO_EXPR
,
2907 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2910 tree optype
= TREE_TYPE (mask_arg
);
2911 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
2914 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2915 tree var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2916 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
2918 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2919 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2921 if (!useless_type_conversion_p (masktype
, utype
))
2923 gcc_assert (TYPE_PRECISION (utype
) <= TYPE_PRECISION (masktype
));
2924 tree var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
2925 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2926 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2932 = gimple_build_call (gs_info
->decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
2933 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2935 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
2937 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
2940 /* Prepare the base and offset in GS_INFO for vectorization.
2941 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2942 to the vectorized offset argument for the first copy of STMT_INFO.
2943 STMT_INFO is the statement described by GS_INFO and LOOP is the
2947 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo
,
2948 class loop
*loop
, stmt_vec_info stmt_info
,
2949 slp_tree slp_node
, gather_scatter_info
*gs_info
,
2950 tree
*dataref_ptr
, vec
<tree
> *vec_offset
)
2952 gimple_seq stmts
= NULL
;
2953 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2957 edge pe
= loop_preheader_edge (loop
);
2958 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2959 gcc_assert (!new_bb
);
2962 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_offset
);
2966 = vect_get_num_copies (loop_vinfo
, gs_info
->offset_vectype
);
2967 vect_get_vec_defs_for_operand (loop_vinfo
, stmt_info
, ncopies
,
2968 gs_info
->offset
, vec_offset
,
2969 gs_info
->offset_vectype
);
2973 /* Prepare to implement a grouped or strided load or store using
2974 the gather load or scatter store operation described by GS_INFO.
2975 STMT_INFO is the load or store statement.
2977 Set *DATAREF_BUMP to the amount that should be added to the base
2978 address after each copy of the vectorized statement. Set *VEC_OFFSET
2979 to an invariant offset vector in which element I has the value
2980 I * DR_STEP / SCALE. */
2983 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2984 loop_vec_info loop_vinfo
,
2985 gimple_stmt_iterator
*gsi
,
2986 gather_scatter_info
*gs_info
,
2987 tree
*dataref_bump
, tree
*vec_offset
,
2988 vec_loop_lens
*loop_lens
)
2990 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2991 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2993 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
))
2995 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
2996 ivtmp_8 = _31 * 16 (step in bytes);
2997 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2998 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
3000 = vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
, 1, vectype
, 0, 0);
3002 = fold_build2 (MULT_EXPR
, sizetype
,
3003 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
3005 *dataref_bump
= force_gimple_operand_gsi (gsi
, tmp
, true, NULL_TREE
, true,
3011 = size_binop (MULT_EXPR
,
3012 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
3013 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
3014 *dataref_bump
= cse_and_gimplify_to_preheader (loop_vinfo
, bump
);
3017 /* The offset given in GS_INFO can have pointer type, so use the element
3018 type of the vector instead. */
3019 tree offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
3021 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3022 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
3023 ssize_int (gs_info
->scale
));
3024 step
= fold_convert (offset_type
, step
);
3026 /* Create {0, X, X*2, X*3, ...}. */
3027 tree offset
= fold_build2 (VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
3028 build_zero_cst (offset_type
), step
);
3029 *vec_offset
= cse_and_gimplify_to_preheader (loop_vinfo
, offset
);
3032 /* Prepare the pointer IVs which needs to be updated by a variable amount.
3033 Such variable amount is the outcome of .SELECT_VL. In this case, we can
3034 allow each iteration process the flexible number of elements as long as
3035 the number <= vf elments.
3037 Return data reference according to SELECT_VL.
3038 If new statements are needed, insert them before GSI. */
3041 vect_get_loop_variant_data_ptr_increment (
3042 vec_info
*vinfo
, tree aggr_type
, gimple_stmt_iterator
*gsi
,
3043 vec_loop_lens
*loop_lens
, dr_vec_info
*dr_info
,
3044 vect_memory_access_type memory_access_type
)
3046 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3047 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3049 /* gather/scatter never reach here. */
3050 gcc_assert (memory_access_type
!= VMAT_GATHER_SCATTER
);
3052 /* When we support SELECT_VL pattern, we dynamic adjust
3053 the memory address by .SELECT_VL result.
3055 The result of .SELECT_VL is the number of elements to
3056 be processed of each iteration. So the memory address
3057 adjustment operation should be:
3059 addr = addr + .SELECT_VL (ARG..) * step;
3062 = vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
, 1, aggr_type
, 0, 0);
3063 tree len_type
= TREE_TYPE (loop_len
);
3064 /* Since the outcome of .SELECT_VL is element size, we should adjust
3065 it into bytesize so that it can be used in address pointer variable
3066 amount IVs adjustment. */
3067 tree tmp
= fold_build2 (MULT_EXPR
, len_type
, loop_len
,
3068 wide_int_to_tree (len_type
, wi::to_widest (step
)));
3069 tree bump
= make_temp_ssa_name (len_type
, NULL
, "ivtmp");
3070 gassign
*assign
= gimple_build_assign (bump
, tmp
);
3071 gsi_insert_before (gsi
, assign
, GSI_SAME_STMT
);
3075 /* Return the amount that should be added to a vector pointer to move
3076 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3077 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3081 vect_get_data_ptr_increment (vec_info
*vinfo
, gimple_stmt_iterator
*gsi
,
3082 dr_vec_info
*dr_info
, tree aggr_type
,
3083 vect_memory_access_type memory_access_type
,
3084 vec_loop_lens
*loop_lens
= nullptr)
3086 if (memory_access_type
== VMAT_INVARIANT
)
3087 return size_zero_node
;
3089 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3090 if (loop_vinfo
&& LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
))
3091 return vect_get_loop_variant_data_ptr_increment (vinfo
, aggr_type
, gsi
,
3093 memory_access_type
);
3095 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3096 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3097 if (tree_int_cst_sgn (step
) == -1)
3098 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3102 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3105 vectorizable_bswap (vec_info
*vinfo
,
3106 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3107 gimple
**vec_stmt
, slp_tree slp_node
,
3109 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3112 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3113 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3116 op
= gimple_call_arg (stmt
, 0);
3117 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3118 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3120 /* Multiple types in SLP are handled by creating the appropriate number of
3121 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3126 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3128 gcc_assert (ncopies
>= 1);
3130 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3134 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3135 unsigned word_bytes
;
3136 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3139 /* The encoding uses one stepped pattern for each byte in the word. */
3140 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3141 for (unsigned i
= 0; i
< 3; ++i
)
3142 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3143 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3145 vec_perm_indices
indices (elts
, 1, num_bytes
);
3146 machine_mode vmode
= TYPE_MODE (char_vectype
);
3147 if (!can_vec_perm_const_p (vmode
, vmode
, indices
))
3153 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3155 if (dump_enabled_p ())
3156 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3157 "incompatible vector types for invariants\n");
3161 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3162 DUMP_VECT_SCOPE ("vectorizable_bswap");
3163 record_stmt_cost (cost_vec
,
3164 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3165 record_stmt_cost (cost_vec
,
3167 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
,
3168 vec_perm
, stmt_info
, 0, vect_body
);
3172 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3175 vec
<tree
> vec_oprnds
= vNULL
;
3176 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
3178 /* Arguments are ready. create the new vector stmt. */
3181 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3184 tree tem
= make_ssa_name (char_vectype
);
3185 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3186 char_vectype
, vop
));
3187 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3188 tree tem2
= make_ssa_name (char_vectype
);
3189 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3190 tem
, tem
, bswap_vconst
);
3191 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3192 tem
= make_ssa_name (vectype
);
3193 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3195 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3197 slp_node
->push_vec_def (new_stmt
);
3199 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3203 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3205 vec_oprnds
.release ();
3209 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3210 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3211 in a single step. On success, store the binary pack code in
3215 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3216 code_helper
*convert_code
)
3218 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3219 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3223 int multi_step_cvt
= 0;
3224 auto_vec
<tree
, 8> interm_types
;
3225 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3226 &code
, &multi_step_cvt
, &interm_types
)
3230 *convert_code
= code
;
3234 /* Function vectorizable_call.
3236 Check if STMT_INFO performs a function call that can be vectorized.
3237 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3238 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3239 Return true if STMT_INFO is vectorizable in this way. */
3242 vectorizable_call (vec_info
*vinfo
,
3243 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3244 gimple
**vec_stmt
, slp_tree slp_node
,
3245 stmt_vector_for_cost
*cost_vec
)
3251 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3252 tree vectype_out
, vectype_in
;
3253 poly_uint64 nunits_in
;
3254 poly_uint64 nunits_out
;
3255 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3256 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3257 tree fndecl
, new_temp
, rhs_type
;
3258 enum vect_def_type dt
[4]
3259 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3260 vect_unknown_def_type
};
3261 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3262 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3263 int ndts
= ARRAY_SIZE (dt
);
3265 auto_vec
<tree
, 8> vargs
;
3266 enum { NARROW
, NONE
, WIDEN
} modifier
;
3270 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3273 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3277 /* Is STMT_INFO a vectorizable call? */
3278 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3282 if (gimple_call_internal_p (stmt
)
3283 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3284 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3285 /* Handled by vectorizable_load and vectorizable_store. */
3288 if (gimple_call_lhs (stmt
) == NULL_TREE
3289 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3292 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3294 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3296 /* Process function arguments. */
3297 rhs_type
= NULL_TREE
;
3298 vectype_in
= NULL_TREE
;
3299 nargs
= gimple_call_num_args (stmt
);
3301 /* Bail out if the function has more than four arguments, we do not have
3302 interesting builtin functions to vectorize with more than two arguments
3303 except for fma. No arguments is also not good. */
3304 if (nargs
== 0 || nargs
> 4)
3307 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3308 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3309 if (cfn
== CFN_GOMP_SIMD_LANE
)
3312 rhs_type
= unsigned_type_node
;
3316 if (internal_fn_p (cfn
))
3317 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3319 for (i
= 0; i
< nargs
; i
++)
3321 if ((int) i
== mask_opno
)
3323 if (!vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_opno
,
3324 &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3329 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3330 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3332 if (dump_enabled_p ())
3333 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3334 "use not simple.\n");
3338 /* We can only handle calls with arguments of the same type. */
3340 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3342 if (dump_enabled_p ())
3343 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3344 "argument types differ.\n");
3348 rhs_type
= TREE_TYPE (op
);
3351 vectype_in
= vectypes
[i
];
3352 else if (vectypes
[i
]
3353 && !types_compatible_p (vectypes
[i
], vectype_in
))
3355 if (dump_enabled_p ())
3356 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3357 "argument vector types differ.\n");
3361 /* If all arguments are external or constant defs, infer the vector type
3362 from the scalar type. */
3364 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3366 gcc_assert (vectype_in
);
3369 if (dump_enabled_p ())
3370 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3371 "no vectype for scalar type %T\n", rhs_type
);
3376 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3377 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3379 if (dump_enabled_p ())
3380 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3381 "mixed mask and nonmask vector types\n");
3385 if (vect_emulated_vector_p (vectype_in
) || vect_emulated_vector_p (vectype_out
))
3387 if (dump_enabled_p ())
3388 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3389 "use emulated vector type for call\n");
3394 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3395 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3396 if (known_eq (nunits_in
* 2, nunits_out
))
3398 else if (known_eq (nunits_out
, nunits_in
))
3400 else if (known_eq (nunits_out
* 2, nunits_in
))
3405 /* We only handle functions that do not read or clobber memory. */
3406 if (gimple_vuse (stmt
))
3408 if (dump_enabled_p ())
3409 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3410 "function reads from or writes to memory.\n");
3414 /* For now, we only vectorize functions if a target specific builtin
3415 is available. TODO -- in some cases, it might be profitable to
3416 insert the calls for pieces of the vector, in order to be able
3417 to vectorize other operations in the loop. */
3419 internal_fn ifn
= IFN_LAST
;
3420 tree callee
= gimple_call_fndecl (stmt
);
3422 /* First try using an internal function. */
3423 code_helper convert_code
= MAX_TREE_CODES
;
3425 && (modifier
== NONE
3426 || (modifier
== NARROW
3427 && simple_integer_narrowing (vectype_out
, vectype_in
,
3429 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3432 /* If that fails, try asking for a target-specific built-in function. */
3433 if (ifn
== IFN_LAST
)
3435 if (cfn
!= CFN_LAST
)
3436 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3437 (cfn
, vectype_out
, vectype_in
);
3438 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3439 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3440 (callee
, vectype_out
, vectype_in
);
3443 if (ifn
== IFN_LAST
&& !fndecl
)
3445 if (cfn
== CFN_GOMP_SIMD_LANE
3448 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3449 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3450 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3451 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3453 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3454 { 0, 1, 2, ... vf - 1 } vector. */
3455 gcc_assert (nargs
== 0);
3457 else if (modifier
== NONE
3458 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3459 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3460 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3461 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3462 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3463 slp_op
, vectype_in
, cost_vec
);
3466 if (dump_enabled_p ())
3467 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3468 "function is not vectorizable.\n");
3475 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3476 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3478 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3480 /* Sanity check: make sure that at least one copy of the vectorized stmt
3481 needs to be generated. */
3482 gcc_assert (ncopies
>= 1);
3484 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
3485 internal_fn cond_fn
= get_conditional_internal_fn (ifn
);
3486 internal_fn cond_len_fn
= get_len_internal_fn (ifn
);
3487 int len_opno
= internal_fn_len_index (cond_len_fn
);
3488 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3489 vec_loop_lens
*lens
= (loop_vinfo
? &LOOP_VINFO_LENS (loop_vinfo
) : NULL
);
3490 if (!vec_stmt
) /* transformation not required. */
3493 for (i
= 0; i
< nargs
; ++i
)
3494 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
],
3496 ? vectypes
[i
] : vectype_in
))
3498 if (dump_enabled_p ())
3499 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3500 "incompatible vector types for invariants\n");
3503 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3504 DUMP_VECT_SCOPE ("vectorizable_call");
3505 vect_model_simple_cost (vinfo
, stmt_info
,
3506 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3507 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3508 record_stmt_cost (cost_vec
, ncopies
/ 2,
3509 vec_promote_demote
, stmt_info
, 0, vect_body
);
3512 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
3513 && (reduc_idx
>= 0 || mask_opno
>= 0))
3516 && (cond_fn
== IFN_LAST
3517 || !direct_internal_fn_supported_p (cond_fn
, vectype_out
,
3518 OPTIMIZE_FOR_SPEED
))
3519 && (cond_len_fn
== IFN_LAST
3520 || !direct_internal_fn_supported_p (cond_len_fn
, vectype_out
,
3521 OPTIMIZE_FOR_SPEED
)))
3523 if (dump_enabled_p ())
3524 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3525 "can't use a fully-masked loop because no"
3526 " conditional operation is available.\n");
3527 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
3531 unsigned int nvectors
3533 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3535 tree scalar_mask
= NULL_TREE
;
3537 scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3538 if (cond_len_fn
!= IFN_LAST
3539 && direct_internal_fn_supported_p (cond_len_fn
, vectype_out
,
3540 OPTIMIZE_FOR_SPEED
))
3541 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype_out
,
3544 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype_out
,
3553 if (dump_enabled_p ())
3554 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3557 scalar_dest
= gimple_call_lhs (stmt
);
3558 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3560 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3561 bool len_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
);
3562 unsigned int vect_nargs
= nargs
;
3568 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3571 else if (reduc_idx
>= 0)
3574 else if (masked_loop_p
&& reduc_idx
>= 0)
3580 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3582 tree prev_res
= NULL_TREE
;
3583 vargs
.safe_grow (vect_nargs
, true);
3584 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3585 for (j
= 0; j
< ncopies
; ++j
)
3587 /* Build argument list for the vectorized call. */
3590 vec
<tree
> vec_oprnds0
;
3592 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3593 vec_oprnds0
= vec_defs
[0];
3595 /* Arguments are ready. Create the new vector stmt. */
3596 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3599 if (masked_loop_p
&& reduc_idx
>= 0)
3601 unsigned int vec_num
= vec_oprnds0
.length ();
3602 /* Always true for SLP. */
3603 gcc_assert (ncopies
== 1);
3604 vargs
[varg
++] = vect_get_loop_mask (loop_vinfo
,
3605 gsi
, masks
, vec_num
,
3609 for (k
= 0; k
< nargs
; k
++)
3611 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3612 vargs
[varg
++] = vec_oprndsk
[i
];
3614 if (masked_loop_p
&& reduc_idx
>= 0)
3615 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3617 if (modifier
== NARROW
)
3619 /* We don't define any narrowing conditional functions
3621 gcc_assert (mask_opno
< 0);
3622 tree half_res
= make_ssa_name (vectype_in
);
3624 = gimple_build_call_internal_vec (ifn
, vargs
);
3625 gimple_call_set_lhs (call
, half_res
);
3626 gimple_call_set_nothrow (call
, true);
3627 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3630 prev_res
= half_res
;
3633 new_temp
= make_ssa_name (vec_dest
);
3634 new_stmt
= vect_gimple_build (new_temp
, convert_code
,
3635 prev_res
, half_res
);
3636 vect_finish_stmt_generation (vinfo
, stmt_info
,
3641 if (len_opno
>= 0 && len_loop_p
)
3643 unsigned int vec_num
= vec_oprnds0
.length ();
3644 /* Always true for SLP. */
3645 gcc_assert (ncopies
== 1);
3647 = vect_get_loop_len (loop_vinfo
, gsi
, lens
, vec_num
,
3650 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
3651 tree bias
= build_int_cst (intQI_type_node
, biasval
);
3652 vargs
[len_opno
] = len
;
3653 vargs
[len_opno
+ 1] = bias
;
3655 else if (mask_opno
>= 0 && masked_loop_p
)
3657 unsigned int vec_num
= vec_oprnds0
.length ();
3658 /* Always true for SLP. */
3659 gcc_assert (ncopies
== 1);
3660 tree mask
= vect_get_loop_mask (loop_vinfo
,
3661 gsi
, masks
, vec_num
,
3663 vargs
[mask_opno
] = prepare_vec_mask
3664 (loop_vinfo
, TREE_TYPE (mask
), mask
,
3665 vargs
[mask_opno
], gsi
);
3669 if (ifn
!= IFN_LAST
)
3670 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3672 call
= gimple_build_call_vec (fndecl
, vargs
);
3673 new_temp
= make_ssa_name (vec_dest
, call
);
3674 gimple_call_set_lhs (call
, new_temp
);
3675 gimple_call_set_nothrow (call
, true);
3676 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3679 slp_node
->push_vec_def (new_stmt
);
3685 if (masked_loop_p
&& reduc_idx
>= 0)
3686 vargs
[varg
++] = vect_get_loop_mask (loop_vinfo
, gsi
, masks
, ncopies
,
3688 for (i
= 0; i
< nargs
; i
++)
3690 op
= gimple_call_arg (stmt
, i
);
3693 vec_defs
.quick_push (vNULL
);
3694 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3698 vargs
[varg
++] = vec_defs
[i
][j
];
3700 if (masked_loop_p
&& reduc_idx
>= 0)
3701 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3703 if (len_opno
>= 0 && len_loop_p
)
3705 tree len
= vect_get_loop_len (loop_vinfo
, gsi
, lens
, ncopies
,
3708 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
3709 tree bias
= build_int_cst (intQI_type_node
, biasval
);
3710 vargs
[len_opno
] = len
;
3711 vargs
[len_opno
+ 1] = bias
;
3713 else if (mask_opno
>= 0 && masked_loop_p
)
3715 tree mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
, ncopies
,
3718 = prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
3719 vargs
[mask_opno
], gsi
);
3723 if (cfn
== CFN_GOMP_SIMD_LANE
)
3725 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3727 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3728 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3729 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3730 new_temp
= make_ssa_name (vec_dest
);
3731 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3732 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3734 else if (modifier
== NARROW
)
3736 /* We don't define any narrowing conditional functions at
3738 gcc_assert (mask_opno
< 0);
3739 tree half_res
= make_ssa_name (vectype_in
);
3740 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3741 gimple_call_set_lhs (call
, half_res
);
3742 gimple_call_set_nothrow (call
, true);
3743 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3746 prev_res
= half_res
;
3749 new_temp
= make_ssa_name (vec_dest
);
3750 new_stmt
= vect_gimple_build (new_temp
, convert_code
, prev_res
,
3752 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3757 if (ifn
!= IFN_LAST
)
3758 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3760 call
= gimple_build_call_vec (fndecl
, vargs
);
3761 new_temp
= make_ssa_name (vec_dest
, call
);
3762 gimple_call_set_lhs (call
, new_temp
);
3763 gimple_call_set_nothrow (call
, true);
3764 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3768 if (j
== (modifier
== NARROW
? 1 : 0))
3769 *vec_stmt
= new_stmt
;
3770 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3772 for (i
= 0; i
< nargs
; i
++)
3774 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3775 vec_oprndsi
.release ();
3778 else if (modifier
== NARROW
)
3780 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3781 /* We don't define any narrowing conditional functions at present. */
3782 gcc_assert (mask_opno
< 0);
3783 for (j
= 0; j
< ncopies
; ++j
)
3785 /* Build argument list for the vectorized call. */
3787 vargs
.create (nargs
* 2);
3793 vec
<tree
> vec_oprnds0
;
3795 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3796 vec_oprnds0
= vec_defs
[0];
3798 /* Arguments are ready. Create the new vector stmt. */
3799 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3803 for (k
= 0; k
< nargs
; k
++)
3805 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3806 vargs
.quick_push (vec_oprndsk
[i
]);
3807 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3810 if (ifn
!= IFN_LAST
)
3811 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3813 call
= gimple_build_call_vec (fndecl
, vargs
);
3814 new_temp
= make_ssa_name (vec_dest
, call
);
3815 gimple_call_set_lhs (call
, new_temp
);
3816 gimple_call_set_nothrow (call
, true);
3817 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3818 slp_node
->push_vec_def (call
);
3823 for (i
= 0; i
< nargs
; i
++)
3825 op
= gimple_call_arg (stmt
, i
);
3828 vec_defs
.quick_push (vNULL
);
3829 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
3830 op
, &vec_defs
[i
], vectypes
[i
]);
3832 vec_oprnd0
= vec_defs
[i
][2*j
];
3833 vec_oprnd1
= vec_defs
[i
][2*j
+1];
3835 vargs
.quick_push (vec_oprnd0
);
3836 vargs
.quick_push (vec_oprnd1
);
3839 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3840 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3841 gimple_call_set_lhs (new_stmt
, new_temp
);
3842 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3844 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3848 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3850 for (i
= 0; i
< nargs
; i
++)
3852 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3853 vec_oprndsi
.release ();
3857 /* No current target implements this case. */
3862 /* The call in STMT might prevent it from being removed in dce.
3863 We however cannot remove it here, due to the way the ssa name
3864 it defines is mapped to the new definition. So just replace
3865 rhs of the statement with something harmless. */
3870 stmt_info
= vect_orig_stmt (stmt_info
);
3871 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3874 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3875 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3881 struct simd_call_arg_info
3885 HOST_WIDE_INT linear_step
;
3886 enum vect_def_type dt
;
3888 bool simd_lane_linear
;
3891 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3892 is linear within simd lane (but not within whole loop), note it in
3896 vect_simd_lane_linear (tree op
, class loop
*loop
,
3897 struct simd_call_arg_info
*arginfo
)
3899 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3901 if (!is_gimple_assign (def_stmt
)
3902 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3903 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3906 tree base
= gimple_assign_rhs1 (def_stmt
);
3907 HOST_WIDE_INT linear_step
= 0;
3908 tree v
= gimple_assign_rhs2 (def_stmt
);
3909 while (TREE_CODE (v
) == SSA_NAME
)
3912 def_stmt
= SSA_NAME_DEF_STMT (v
);
3913 if (is_gimple_assign (def_stmt
))
3914 switch (gimple_assign_rhs_code (def_stmt
))
3917 t
= gimple_assign_rhs2 (def_stmt
);
3918 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3920 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3921 v
= gimple_assign_rhs1 (def_stmt
);
3924 t
= gimple_assign_rhs2 (def_stmt
);
3925 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3927 linear_step
= tree_to_shwi (t
);
3928 v
= gimple_assign_rhs1 (def_stmt
);
3931 t
= gimple_assign_rhs1 (def_stmt
);
3932 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3933 || (TYPE_PRECISION (TREE_TYPE (v
))
3934 < TYPE_PRECISION (TREE_TYPE (t
))))
3943 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3945 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3946 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3951 arginfo
->linear_step
= linear_step
;
3953 arginfo
->simd_lane_linear
= true;
3959 /* Function vectorizable_simd_clone_call.
3961 Check if STMT_INFO performs a function call that can be vectorized
3962 by calling a simd clone of the function.
3963 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3964 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3965 Return true if STMT_INFO is vectorizable in this way. */
3968 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3969 gimple_stmt_iterator
*gsi
,
3970 gimple
**vec_stmt
, slp_tree slp_node
,
3971 stmt_vector_for_cost
*)
3976 tree vec_oprnd0
= NULL_TREE
;
3979 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3980 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3981 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3982 tree fndecl
, new_temp
;
3984 auto_vec
<simd_call_arg_info
> arginfo
;
3985 vec
<tree
> vargs
= vNULL
;
3987 tree lhs
, rtype
, ratype
;
3988 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3989 int masked_call_offset
= 0;
3991 /* Is STMT a vectorizable call? */
3992 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3996 fndecl
= gimple_call_fndecl (stmt
);
3997 if (fndecl
== NULL_TREE
3998 && gimple_call_internal_p (stmt
, IFN_MASK_CALL
))
4000 fndecl
= gimple_call_arg (stmt
, 0);
4001 gcc_checking_assert (TREE_CODE (fndecl
) == ADDR_EXPR
);
4002 fndecl
= TREE_OPERAND (fndecl
, 0);
4003 gcc_checking_assert (TREE_CODE (fndecl
) == FUNCTION_DECL
);
4004 masked_call_offset
= 1;
4006 if (fndecl
== NULL_TREE
)
4009 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
4010 if (node
== NULL
|| node
->simd_clones
== NULL
)
4013 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4016 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4020 if (gimple_call_lhs (stmt
)
4021 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
4024 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
4026 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4028 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
4031 /* Process function arguments. */
4032 nargs
= gimple_call_num_args (stmt
) - masked_call_offset
;
4034 /* Bail out if the function has zero arguments. */
4038 vec
<tree
>& simd_clone_info
= (slp_node
? SLP_TREE_SIMD_CLONE_INFO (slp_node
)
4039 : STMT_VINFO_SIMD_CLONE_INFO (stmt_info
));
4040 arginfo
.reserve (nargs
, true);
4041 auto_vec
<slp_tree
> slp_op
;
4042 slp_op
.safe_grow_cleared (nargs
);
4044 for (i
= 0; i
< nargs
; i
++)
4046 simd_call_arg_info thisarginfo
;
4049 thisarginfo
.linear_step
= 0;
4050 thisarginfo
.align
= 0;
4051 thisarginfo
.op
= NULL_TREE
;
4052 thisarginfo
.simd_lane_linear
= false;
4054 int op_no
= i
+ masked_call_offset
;
4056 op_no
= vect_slp_child_index_for_operand (stmt
, op_no
, false);
4057 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
4058 op_no
, &op
, &slp_op
[i
],
4059 &thisarginfo
.dt
, &thisarginfo
.vectype
)
4060 || thisarginfo
.dt
== vect_uninitialized_def
)
4062 if (dump_enabled_p ())
4063 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4064 "use not simple.\n");
4068 if (thisarginfo
.dt
== vect_constant_def
4069 || thisarginfo
.dt
== vect_external_def
)
4071 /* With SLP we determine the vector type of constants/externals
4072 at analysis time, handling conflicts via
4073 vect_maybe_update_slp_op_vectype. At transform time
4074 we have a vector type recorded for SLP. */
4075 gcc_assert (!vec_stmt
4077 || thisarginfo
.vectype
!= NULL_TREE
);
4079 thisarginfo
.vectype
= get_vectype_for_scalar_type (vinfo
,
4084 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
4086 /* For linear arguments, the analyze phase should have saved
4087 the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. */
4088 if (i
* 3 + 4 <= simd_clone_info
.length ()
4089 && simd_clone_info
[i
* 3 + 2])
4091 gcc_assert (vec_stmt
);
4092 thisarginfo
.linear_step
= tree_to_shwi (simd_clone_info
[i
* 3 + 2]);
4093 thisarginfo
.op
= simd_clone_info
[i
* 3 + 1];
4094 thisarginfo
.simd_lane_linear
4095 = (simd_clone_info
[i
* 3 + 3] == boolean_true_node
);
4096 /* If loop has been peeled for alignment, we need to adjust it. */
4097 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
4098 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
4099 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
4101 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
4102 tree step
= simd_clone_info
[i
* 3 + 2];
4103 tree opt
= TREE_TYPE (thisarginfo
.op
);
4104 bias
= fold_convert (TREE_TYPE (step
), bias
);
4105 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
4107 = fold_build2 (POINTER_TYPE_P (opt
)
4108 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
4109 thisarginfo
.op
, bias
);
4113 && thisarginfo
.dt
!= vect_constant_def
4114 && thisarginfo
.dt
!= vect_external_def
4116 && TREE_CODE (op
) == SSA_NAME
4117 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
4119 && tree_fits_shwi_p (iv
.step
))
4121 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
4122 thisarginfo
.op
= iv
.base
;
4124 else if ((thisarginfo
.dt
== vect_constant_def
4125 || thisarginfo
.dt
== vect_external_def
)
4126 && POINTER_TYPE_P (TREE_TYPE (op
)))
4127 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
4128 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4130 if (POINTER_TYPE_P (TREE_TYPE (op
))
4131 && !thisarginfo
.linear_step
4133 && thisarginfo
.dt
!= vect_constant_def
4134 && thisarginfo
.dt
!= vect_external_def
4136 && TREE_CODE (op
) == SSA_NAME
)
4137 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
4139 arginfo
.quick_push (thisarginfo
);
4142 poly_uint64 vf
= loop_vinfo
? LOOP_VINFO_VECT_FACTOR (loop_vinfo
) : 1;
4143 unsigned group_size
= slp_node
? SLP_TREE_LANES (slp_node
) : 1;
4144 unsigned int badness
= 0;
4145 struct cgraph_node
*bestn
= NULL
;
4146 if (simd_clone_info
.exists ())
4147 bestn
= cgraph_node::get (simd_clone_info
[0]);
4149 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4150 n
= n
->simdclone
->next_clone
)
4152 unsigned int this_badness
= 0;
4153 unsigned int num_calls
;
4154 /* The number of arguments in the call and the number of parameters in
4155 the simdclone should match. However, when the simdclone is
4156 'inbranch', it could have one more paramater than nargs when using
4157 an inbranch simdclone to call a non-inbranch call, either in a
4158 non-masked loop using a all true constant mask, or inside a masked
4159 loop using it's mask. */
4160 size_t simd_nargs
= n
->simdclone
->nargs
;
4161 if (!masked_call_offset
&& n
->simdclone
->inbranch
)
4163 if (!constant_multiple_p (vf
* group_size
, n
->simdclone
->simdlen
,
4165 || (!n
->simdclone
->inbranch
&& (masked_call_offset
> 0))
4166 || (nargs
!= simd_nargs
))
4169 this_badness
+= exact_log2 (num_calls
) * 4096;
4170 if (n
->simdclone
->inbranch
)
4171 this_badness
+= 8192;
4172 int target_badness
= targetm
.simd_clone
.usable (n
);
4173 if (target_badness
< 0)
4175 this_badness
+= target_badness
* 512;
4176 for (i
= 0; i
< nargs
; i
++)
4178 switch (n
->simdclone
->args
[i
].arg_type
)
4180 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4181 if (!useless_type_conversion_p
4182 (n
->simdclone
->args
[i
].orig_type
,
4183 TREE_TYPE (gimple_call_arg (stmt
,
4184 i
+ masked_call_offset
))))
4186 else if (arginfo
[i
].dt
== vect_constant_def
4187 || arginfo
[i
].dt
== vect_external_def
4188 || arginfo
[i
].linear_step
)
4191 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4192 if (arginfo
[i
].dt
!= vect_constant_def
4193 && arginfo
[i
].dt
!= vect_external_def
)
4196 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4197 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4198 if (arginfo
[i
].dt
== vect_constant_def
4199 || arginfo
[i
].dt
== vect_external_def
4200 || (arginfo
[i
].linear_step
4201 != n
->simdclone
->args
[i
].linear_step
))
4204 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4205 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4206 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4207 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4208 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4209 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4213 case SIMD_CLONE_ARG_TYPE_MASK
:
4214 /* While we can create a traditional data vector from
4215 an incoming integer mode mask we have no good way to
4216 force generate an integer mode mask from a traditional
4217 boolean vector input. */
4218 if (SCALAR_INT_MODE_P (n
->simdclone
->mask_mode
)
4219 && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo
[i
].vectype
)))
4221 else if (!SCALAR_INT_MODE_P (n
->simdclone
->mask_mode
)
4222 && SCALAR_INT_MODE_P (TYPE_MODE (arginfo
[i
].vectype
)))
4223 this_badness
+= 2048;
4226 if (i
== (size_t) -1)
4228 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4233 if (arginfo
[i
].align
)
4234 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4235 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4237 if (i
== (size_t) -1)
4239 if (masked_call_offset
== 0
4240 && n
->simdclone
->inbranch
4241 && n
->simdclone
->nargs
> nargs
)
4243 gcc_assert (n
->simdclone
->args
[n
->simdclone
->nargs
- 1].arg_type
==
4244 SIMD_CLONE_ARG_TYPE_MASK
);
4245 /* Penalize using a masked SIMD clone in a non-masked loop, that is
4246 not in a branch, as we'd have to construct an all-true mask. */
4247 if (!loop_vinfo
|| !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
4250 if (bestn
== NULL
|| this_badness
< badness
)
4253 badness
= this_badness
;
4260 unsigned int num_mask_args
= 0;
4261 if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4262 for (i
= 0; i
< nargs
; i
++)
4263 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_MASK
)
4266 for (i
= 0; i
< nargs
; i
++)
4268 if ((arginfo
[i
].dt
== vect_constant_def
4269 || arginfo
[i
].dt
== vect_external_def
)
4270 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4272 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
,
4273 i
+ masked_call_offset
));
4274 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4276 if (arginfo
[i
].vectype
== NULL
4277 || !constant_multiple_p (bestn
->simdclone
->simdlen
,
4278 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4282 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
4283 && VECTOR_BOOLEAN_TYPE_P (bestn
->simdclone
->args
[i
].vector_type
))
4285 if (dump_enabled_p ())
4286 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4287 "vector mask arguments are not supported.\n");
4291 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_MASK
)
4293 tree clone_arg_vectype
= bestn
->simdclone
->args
[i
].vector_type
;
4294 if (bestn
->simdclone
->mask_mode
== VOIDmode
)
4296 if (maybe_ne (TYPE_VECTOR_SUBPARTS (clone_arg_vectype
),
4297 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4299 /* FORNOW we only have partial support for vector-type masks
4300 that can't hold all of simdlen. */
4301 if (dump_enabled_p ())
4302 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4304 "in-branch vector clones are not yet"
4305 " supported for mismatched vector sizes.\n");
4309 else if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4311 if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo
[i
].vectype
))
4312 || maybe_ne (exact_div (bestn
->simdclone
->simdlen
,
4314 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4316 /* FORNOW we only have partial support for integer-type masks
4317 that represent the same number of lanes as the
4318 vectorized mask inputs. */
4319 if (dump_enabled_p ())
4320 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4322 "in-branch vector clones are not yet "
4323 "supported for mismatched vector sizes.\n");
4329 if (dump_enabled_p ())
4330 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4332 "in-branch vector clones not supported"
4333 " on this target.\n");
4339 fndecl
= bestn
->decl
;
4340 nunits
= bestn
->simdclone
->simdlen
;
4342 ncopies
= vector_unroll_factor (vf
* group_size
, nunits
);
4344 ncopies
= vector_unroll_factor (vf
, nunits
);
4346 /* If the function isn't const, only allow it in simd loops where user
4347 has asserted that at least nunits consecutive iterations can be
4348 performed using SIMD instructions. */
4349 if ((loop
== NULL
|| maybe_lt ((unsigned) loop
->safelen
, nunits
))
4350 && gimple_vuse (stmt
))
4353 /* Sanity check: make sure that at least one copy of the vectorized stmt
4354 needs to be generated. */
4355 gcc_assert (ncopies
>= 1);
4357 if (!vec_stmt
) /* transformation not required. */
4360 for (unsigned i
= 0; i
< nargs
; ++i
)
4361 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
], arginfo
[i
].vectype
))
4363 if (dump_enabled_p ())
4364 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4365 "incompatible vector types for invariants\n");
4368 /* When the original call is pure or const but the SIMD ABI dictates
4369 an aggregate return we will have to use a virtual definition and
4370 in a loop eventually even need to add a virtual PHI. That's
4371 not straight-forward so allow to fix this up via renaming. */
4372 if (gimple_call_lhs (stmt
)
4373 && !gimple_vdef (stmt
)
4374 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn
->decl
))) == ARRAY_TYPE
)
4375 vinfo
->any_known_not_updated_vssa
= true;
4376 /* ??? For SLP code-gen we end up inserting after the last
4377 vector argument def rather than at the original call position
4378 so automagic virtual operand updating doesn't work. */
4379 if (gimple_vuse (stmt
) && slp_node
)
4380 vinfo
->any_known_not_updated_vssa
= true;
4381 simd_clone_info
.safe_push (bestn
->decl
);
4382 for (i
= 0; i
< bestn
->simdclone
->nargs
; i
++)
4384 switch (bestn
->simdclone
->args
[i
].arg_type
)
4388 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4389 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4391 simd_clone_info
.safe_grow_cleared (i
* 3 + 1, true);
4392 simd_clone_info
.safe_push (arginfo
[i
].op
);
4393 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4394 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4395 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4396 simd_clone_info
.safe_push (ls
);
4397 tree sll
= arginfo
[i
].simd_lane_linear
4398 ? boolean_true_node
: boolean_false_node
;
4399 simd_clone_info
.safe_push (sll
);
4402 case SIMD_CLONE_ARG_TYPE_MASK
:
4404 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
4405 vect_record_loop_mask (loop_vinfo
,
4406 &LOOP_VINFO_MASKS (loop_vinfo
),
4407 ncopies
, vectype
, op
);
4413 if (!bestn
->simdclone
->inbranch
&& loop_vinfo
)
4415 if (dump_enabled_p ()
4416 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
4417 dump_printf_loc (MSG_NOTE
, vect_location
,
4418 "can't use a fully-masked loop because a"
4419 " non-masked simd clone was selected.\n");
4420 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
4423 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4424 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4425 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4426 dt, slp_node, cost_vec); */
4432 if (dump_enabled_p ())
4433 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4436 scalar_dest
= gimple_call_lhs (stmt
);
4437 vec_dest
= NULL_TREE
;
4442 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4443 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4444 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4447 rtype
= TREE_TYPE (ratype
);
4451 auto_vec
<vec
<tree
> > vec_oprnds
;
4452 auto_vec
<unsigned> vec_oprnds_i
;
4453 vec_oprnds_i
.safe_grow_cleared (nargs
, true);
4456 vec_oprnds
.reserve_exact (nargs
);
4457 vect_get_slp_defs (vinfo
, slp_node
, &vec_oprnds
);
4460 vec_oprnds
.safe_grow_cleared (nargs
, true);
4461 for (j
= 0; j
< ncopies
; ++j
)
4463 poly_uint64 callee_nelements
;
4464 poly_uint64 caller_nelements
;
4465 /* Build argument list for the vectorized call. */
4467 vargs
.create (nargs
);
4471 for (i
= 0; i
< nargs
; i
++)
4473 unsigned int k
, l
, m
, o
;
4475 op
= gimple_call_arg (stmt
, i
+ masked_call_offset
);
4476 switch (bestn
->simdclone
->args
[i
].arg_type
)
4478 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4479 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4480 caller_nelements
= TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
);
4481 callee_nelements
= TYPE_VECTOR_SUBPARTS (atype
);
4482 o
= vector_unroll_factor (nunits
, callee_nelements
);
4483 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4485 if (known_lt (callee_nelements
, caller_nelements
))
4487 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4488 if (!constant_multiple_p (caller_nelements
,
4489 callee_nelements
, &k
))
4492 gcc_assert ((k
& (k
- 1)) == 0);
4496 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4497 ncopies
* o
/ k
, op
,
4499 vec_oprnds_i
[i
] = 0;
4500 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4504 vec_oprnd0
= arginfo
[i
].op
;
4505 if ((m
& (k
- 1)) == 0)
4506 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4508 arginfo
[i
].op
= vec_oprnd0
;
4510 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4512 bitsize_int ((m
& (k
- 1)) * prec
));
4514 = gimple_build_assign (make_ssa_name (atype
),
4516 vect_finish_stmt_generation (vinfo
, stmt_info
,
4518 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4522 if (!constant_multiple_p (callee_nelements
,
4523 caller_nelements
, &k
))
4525 gcc_assert ((k
& (k
- 1)) == 0);
4526 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4528 vec_alloc (ctor_elts
, k
);
4531 for (l
= 0; l
< k
; l
++)
4533 if (m
== 0 && l
== 0)
4536 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4540 vec_oprnds_i
[i
] = 0;
4541 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4544 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4545 arginfo
[i
].op
= vec_oprnd0
;
4548 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4552 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0
),
4555 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, atype
,
4558 = gimple_build_assign (make_ssa_name (atype
),
4560 vect_finish_stmt_generation (vinfo
, stmt_info
,
4562 vargs
.safe_push (gimple_get_lhs (new_stmt
));
4565 vargs
.safe_push (vec_oprnd0
);
4568 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4570 = gimple_build_assign (make_ssa_name (atype
),
4572 vect_finish_stmt_generation (vinfo
, stmt_info
,
4574 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4579 case SIMD_CLONE_ARG_TYPE_MASK
:
4580 if (bestn
->simdclone
->mask_mode
== VOIDmode
)
4582 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4583 tree elt_type
= TREE_TYPE (atype
);
4584 tree one
= fold_convert (elt_type
, integer_one_node
);
4585 tree zero
= fold_convert (elt_type
, integer_zero_node
);
4586 callee_nelements
= TYPE_VECTOR_SUBPARTS (atype
);
4587 caller_nelements
= TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
);
4588 o
= vector_unroll_factor (nunits
, callee_nelements
);
4589 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4591 if (maybe_lt (callee_nelements
, caller_nelements
))
4593 /* The mask type has fewer elements than simdlen. */
4598 else if (known_eq (callee_nelements
, caller_nelements
))
4600 /* The SIMD clone function has the same number of
4601 elements as the current function. */
4605 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4609 vec_oprnds_i
[i
] = 0;
4611 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4613 && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
4615 vec_loop_masks
*loop_masks
4616 = &LOOP_VINFO_MASKS (loop_vinfo
);
4618 = vect_get_loop_mask (loop_vinfo
, gsi
,
4619 loop_masks
, ncopies
,
4622 = prepare_vec_mask (loop_vinfo
,
4623 TREE_TYPE (loop_mask
),
4624 loop_mask
, vec_oprnd0
,
4626 loop_vinfo
->vec_cond_masked_set
.add ({ vec_oprnd0
,
4631 = build3 (VEC_COND_EXPR
, atype
, vec_oprnd0
,
4632 build_vector_from_val (atype
, one
),
4633 build_vector_from_val (atype
, zero
));
4635 = gimple_build_assign (make_ssa_name (atype
),
4637 vect_finish_stmt_generation (vinfo
, stmt_info
,
4639 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4643 /* The mask type has more elements than simdlen. */
4650 else if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4652 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4653 /* Guess the number of lanes represented by atype. */
4654 poly_uint64 atype_subparts
4655 = exact_div (bestn
->simdclone
->simdlen
,
4657 o
= vector_unroll_factor (nunits
, atype_subparts
);
4658 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4663 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4667 vec_oprnds_i
[i
] = 0;
4669 if (maybe_lt (atype_subparts
,
4670 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4672 /* The mask argument has fewer elements than the
4677 else if (known_eq (atype_subparts
,
4678 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4680 /* The vector mask argument matches the input
4681 in the number of lanes, but not necessarily
4683 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4684 tree st
= lang_hooks
.types
.type_for_mode
4685 (TYPE_MODE (TREE_TYPE (vec_oprnd0
)), 1);
4686 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, st
,
4689 = gimple_build_assign (make_ssa_name (st
),
4691 vect_finish_stmt_generation (vinfo
, stmt_info
,
4693 if (!types_compatible_p (atype
, st
))
4696 = gimple_build_assign (make_ssa_name (atype
),
4700 vect_finish_stmt_generation (vinfo
, stmt_info
,
4703 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4707 /* The mask argument has more elements than the
4717 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4718 vargs
.safe_push (op
);
4720 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4721 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4726 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4727 &stmts
, true, NULL_TREE
);
4731 edge pe
= loop_preheader_edge (loop
);
4732 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4733 gcc_assert (!new_bb
);
4735 if (arginfo
[i
].simd_lane_linear
)
4737 vargs
.safe_push (arginfo
[i
].op
);
4740 tree phi_res
= copy_ssa_name (op
);
4741 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4742 add_phi_arg (new_phi
, arginfo
[i
].op
,
4743 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4745 = POINTER_TYPE_P (TREE_TYPE (op
))
4746 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4747 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4748 ? sizetype
: TREE_TYPE (op
);
4750 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4752 tree tcst
= wide_int_to_tree (type
, cst
);
4753 tree phi_arg
= copy_ssa_name (op
);
4755 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4756 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4757 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4758 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4760 arginfo
[i
].op
= phi_res
;
4761 vargs
.safe_push (phi_res
);
4766 = POINTER_TYPE_P (TREE_TYPE (op
))
4767 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4768 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4769 ? sizetype
: TREE_TYPE (op
);
4771 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4773 tree tcst
= wide_int_to_tree (type
, cst
);
4774 new_temp
= make_ssa_name (TREE_TYPE (op
));
4776 = gimple_build_assign (new_temp
, code
,
4777 arginfo
[i
].op
, tcst
);
4778 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4779 vargs
.safe_push (new_temp
);
4782 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4783 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4784 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4785 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4786 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4787 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4793 if (masked_call_offset
== 0
4794 && bestn
->simdclone
->inbranch
4795 && bestn
->simdclone
->nargs
> nargs
)
4798 size_t mask_i
= bestn
->simdclone
->nargs
- 1;
4800 gcc_assert (bestn
->simdclone
->args
[mask_i
].arg_type
==
4801 SIMD_CLONE_ARG_TYPE_MASK
);
4803 tree masktype
= bestn
->simdclone
->args
[mask_i
].vector_type
;
4804 callee_nelements
= TYPE_VECTOR_SUBPARTS (masktype
);
4805 o
= vector_unroll_factor (nunits
, callee_nelements
);
4806 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4808 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
4810 vec_loop_masks
*loop_masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
4811 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
4812 ncopies
, vectype
, j
);
4815 mask
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
4818 if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4820 /* This means we are dealing with integer mask modes.
4821 First convert to an integer type with the same size as
4822 the current vector type. */
4823 unsigned HOST_WIDE_INT intermediate_size
4824 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask
)));
4826 build_nonstandard_integer_type (intermediate_size
, 1);
4827 mask
= build1 (VIEW_CONVERT_EXPR
, mid_int_type
, mask
);
4829 = gimple_build_assign (make_ssa_name (mid_int_type
),
4831 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
4832 /* Then zero-extend to the mask mode. */
4833 mask
= fold_build1 (NOP_EXPR
, masktype
,
4834 gimple_get_lhs (new_stmt
));
4836 else if (bestn
->simdclone
->mask_mode
== VOIDmode
)
4838 tree one
= fold_convert (TREE_TYPE (masktype
),
4840 tree zero
= fold_convert (TREE_TYPE (masktype
),
4842 mask
= build3 (VEC_COND_EXPR
, masktype
, mask
,
4843 build_vector_from_val (masktype
, one
),
4844 build_vector_from_val (masktype
, zero
));
4849 new_stmt
= gimple_build_assign (make_ssa_name (masktype
), mask
);
4850 vect_finish_stmt_generation (vinfo
, stmt_info
,
4852 mask
= gimple_assign_lhs (new_stmt
);
4853 vargs
.safe_push (mask
);
4857 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4861 || known_eq (TYPE_VECTOR_SUBPARTS (rtype
), nunits
));
4863 new_temp
= create_tmp_var (ratype
);
4864 else if (useless_type_conversion_p (vectype
, rtype
))
4865 new_temp
= make_ssa_name (vec_dest
, new_call
);
4867 new_temp
= make_ssa_name (rtype
, new_call
);
4868 gimple_call_set_lhs (new_call
, new_temp
);
4870 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4871 gimple
*new_stmt
= new_call
;
4875 if (!multiple_p (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
4878 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4879 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4880 k
= vector_unroll_factor (nunits
,
4881 TYPE_VECTOR_SUBPARTS (vectype
));
4882 gcc_assert ((k
& (k
- 1)) == 0);
4883 for (l
= 0; l
< k
; l
++)
4888 t
= build_fold_addr_expr (new_temp
);
4889 t
= build2 (MEM_REF
, vectype
, t
,
4890 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4893 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4894 bitsize_int (prec
), bitsize_int (l
* prec
));
4895 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4896 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4898 if (j
== 0 && l
== 0)
4899 *vec_stmt
= new_stmt
;
4901 SLP_TREE_VEC_DEFS (slp_node
)
4902 .quick_push (gimple_assign_lhs (new_stmt
));
4904 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4908 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4911 else if (!multiple_p (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
4914 if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype
),
4915 TYPE_VECTOR_SUBPARTS (rtype
), &k
))
4917 gcc_assert ((k
& (k
- 1)) == 0);
4918 if ((j
& (k
- 1)) == 0)
4919 vec_alloc (ret_ctor_elts
, k
);
4923 o
= vector_unroll_factor (nunits
,
4924 TYPE_VECTOR_SUBPARTS (rtype
));
4925 for (m
= 0; m
< o
; m
++)
4927 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4928 size_int (m
), NULL_TREE
, NULL_TREE
);
4929 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4931 vect_finish_stmt_generation (vinfo
, stmt_info
,
4933 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4934 gimple_assign_lhs (new_stmt
));
4936 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4939 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4940 if ((j
& (k
- 1)) != k
- 1)
4942 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4944 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4945 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4947 if ((unsigned) j
== k
- 1)
4948 *vec_stmt
= new_stmt
;
4950 SLP_TREE_VEC_DEFS (slp_node
)
4951 .quick_push (gimple_assign_lhs (new_stmt
));
4953 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4958 tree t
= build_fold_addr_expr (new_temp
);
4959 t
= build2 (MEM_REF
, vectype
, t
,
4960 build_int_cst (TREE_TYPE (t
), 0));
4961 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4962 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4963 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4965 else if (!useless_type_conversion_p (vectype
, rtype
))
4967 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
4969 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4970 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4975 *vec_stmt
= new_stmt
;
4977 SLP_TREE_VEC_DEFS (slp_node
).quick_push (gimple_get_lhs (new_stmt
));
4979 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4982 for (i
= 0; i
< nargs
; ++i
)
4984 vec
<tree
> oprndsi
= vec_oprnds
[i
];
4989 /* Mark the clone as no longer being a candidate for GC. */
4990 bestn
->gc_candidate
= false;
4992 /* The call in STMT might prevent it from being removed in dce.
4993 We however cannot remove it here, due to the way the ssa name
4994 it defines is mapped to the new definition. So just replace
4995 rhs of the statement with something harmless. */
5003 type
= TREE_TYPE (scalar_dest
);
5004 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
5005 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
5008 new_stmt
= gimple_build_nop ();
5009 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
5010 unlink_stmt_vdef (stmt
);
5016 /* Function vect_gen_widened_results_half
5018 Create a vector stmt whose code, type, number of arguments, and result
5019 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
5020 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
5021 In the case that CODE is a CALL_EXPR, this means that a call to DECL
5022 needs to be created (DECL is a function-decl of a target-builtin).
5023 STMT_INFO is the original scalar stmt that we are vectorizing. */
5026 vect_gen_widened_results_half (vec_info
*vinfo
, code_helper ch
,
5027 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
5028 tree vec_dest
, gimple_stmt_iterator
*gsi
,
5029 stmt_vec_info stmt_info
)
5034 /* Generate half of the widened result: */
5035 if (op_type
!= binary_op
)
5037 new_stmt
= vect_gimple_build (vec_dest
, ch
, vec_oprnd0
, vec_oprnd1
);
5038 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5039 gimple_set_lhs (new_stmt
, new_temp
);
5040 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5046 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
5047 For multi-step conversions store the resulting vectors and call the function
5048 recursively. When NARROW_SRC_P is true, there's still a conversion after
5049 narrowing, don't store the vectors in the SLP_NODE or in vector info of
5050 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
5053 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
5055 stmt_vec_info stmt_info
,
5056 vec
<tree
> &vec_dsts
,
5057 gimple_stmt_iterator
*gsi
,
5058 slp_tree slp_node
, code_helper code
,
5062 tree vop0
, vop1
, new_tmp
, vec_dest
;
5064 vec_dest
= vec_dsts
.pop ();
5066 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
5068 /* Create demotion operation. */
5069 vop0
= (*vec_oprnds
)[i
];
5070 vop1
= (*vec_oprnds
)[i
+ 1];
5071 gimple
*new_stmt
= vect_gimple_build (vec_dest
, code
, vop0
, vop1
);
5072 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
5073 gimple_set_lhs (new_stmt
, new_tmp
);
5074 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5075 if (multi_step_cvt
|| narrow_src_p
)
5076 /* Store the resulting vector for next recursive call,
5077 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
5078 (*vec_oprnds
)[i
/2] = new_tmp
;
5081 /* This is the last step of the conversion sequence. Store the
5082 vectors in SLP_NODE or in vector info of the scalar statement
5083 (or in STMT_VINFO_RELATED_STMT chain). */
5085 slp_node
->push_vec_def (new_stmt
);
5087 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5091 /* For multi-step demotion operations we first generate demotion operations
5092 from the source type to the intermediate types, and then combine the
5093 results (stored in VEC_OPRNDS) in demotion operation to the destination
5097 /* At each level of recursion we have half of the operands we had at the
5099 vec_oprnds
->truncate ((i
+1)/2);
5100 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
5102 stmt_info
, vec_dsts
, gsi
,
5103 slp_node
, VEC_PACK_TRUNC_EXPR
,
5107 vec_dsts
.quick_push (vec_dest
);
5111 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
5112 and VEC_OPRNDS1, for a binary operation associated with scalar statement
5113 STMT_INFO. For multi-step conversions store the resulting vectors and
5114 call the function recursively. */
5117 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
5118 vec
<tree
> *vec_oprnds0
,
5119 vec
<tree
> *vec_oprnds1
,
5120 stmt_vec_info stmt_info
, tree vec_dest
,
5121 gimple_stmt_iterator
*gsi
,
5123 code_helper ch2
, int op_type
)
5126 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
5127 gimple
*new_stmt1
, *new_stmt2
;
5128 vec
<tree
> vec_tmp
= vNULL
;
5130 vec_tmp
.create (vec_oprnds0
->length () * 2);
5131 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
5133 if (op_type
== binary_op
)
5134 vop1
= (*vec_oprnds1
)[i
];
5138 /* Generate the two halves of promotion operation. */
5139 new_stmt1
= vect_gen_widened_results_half (vinfo
, ch1
, vop0
, vop1
,
5140 op_type
, vec_dest
, gsi
,
5142 new_stmt2
= vect_gen_widened_results_half (vinfo
, ch2
, vop0
, vop1
,
5143 op_type
, vec_dest
, gsi
,
5145 if (is_gimple_call (new_stmt1
))
5147 new_tmp1
= gimple_call_lhs (new_stmt1
);
5148 new_tmp2
= gimple_call_lhs (new_stmt2
);
5152 new_tmp1
= gimple_assign_lhs (new_stmt1
);
5153 new_tmp2
= gimple_assign_lhs (new_stmt2
);
5156 /* Store the results for the next step. */
5157 vec_tmp
.quick_push (new_tmp1
);
5158 vec_tmp
.quick_push (new_tmp2
);
5161 vec_oprnds0
->release ();
5162 *vec_oprnds0
= vec_tmp
;
5165 /* Create vectorized promotion stmts for widening stmts using only half the
5166 potential vector size for input. */
5168 vect_create_half_widening_stmts (vec_info
*vinfo
,
5169 vec
<tree
> *vec_oprnds0
,
5170 vec
<tree
> *vec_oprnds1
,
5171 stmt_vec_info stmt_info
, tree vec_dest
,
5172 gimple_stmt_iterator
*gsi
,
5181 vec
<tree
> vec_tmp
= vNULL
;
5183 vec_tmp
.create (vec_oprnds0
->length ());
5184 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
5186 tree new_tmp1
, new_tmp2
, new_tmp3
, out_type
;
5188 gcc_assert (op_type
== binary_op
);
5189 vop1
= (*vec_oprnds1
)[i
];
5191 /* Widen the first vector input. */
5192 out_type
= TREE_TYPE (vec_dest
);
5193 new_tmp1
= make_ssa_name (out_type
);
5194 new_stmt1
= gimple_build_assign (new_tmp1
, NOP_EXPR
, vop0
);
5195 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt1
, gsi
);
5196 if (VECTOR_TYPE_P (TREE_TYPE (vop1
)))
5198 /* Widen the second vector input. */
5199 new_tmp2
= make_ssa_name (out_type
);
5200 new_stmt2
= gimple_build_assign (new_tmp2
, NOP_EXPR
, vop1
);
5201 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt2
, gsi
);
5202 /* Perform the operation. With both vector inputs widened. */
5203 new_stmt3
= vect_gimple_build (vec_dest
, code1
, new_tmp1
, new_tmp2
);
5207 /* Perform the operation. With the single vector input widened. */
5208 new_stmt3
= vect_gimple_build (vec_dest
, code1
, new_tmp1
, vop1
);
5211 new_tmp3
= make_ssa_name (vec_dest
, new_stmt3
);
5212 gimple_assign_set_lhs (new_stmt3
, new_tmp3
);
5213 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt3
, gsi
);
5215 /* Store the results for the next step. */
5216 vec_tmp
.quick_push (new_tmp3
);
5219 vec_oprnds0
->release ();
5220 *vec_oprnds0
= vec_tmp
;
5224 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5225 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5226 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5227 Return true if STMT_INFO is vectorizable in this way. */
5230 vectorizable_conversion (vec_info
*vinfo
,
5231 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5232 gimple
**vec_stmt
, slp_tree slp_node
,
5233 stmt_vector_for_cost
*cost_vec
)
5235 tree vec_dest
, cvt_op
= NULL_TREE
;
5237 tree op0
, op1
= NULL_TREE
;
5238 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5240 code_helper code
, code1
, code2
;
5241 code_helper codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
5243 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5245 poly_uint64 nunits_in
;
5246 poly_uint64 nunits_out
;
5247 tree vectype_out
, vectype_in
;
5249 tree lhs_type
, rhs_type
;
5250 /* For conversions between floating point and integer, there're 2 NARROW
5251 cases. NARROW_SRC is for FLOAT_EXPR, means
5252 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5253 This is safe when the range of the source integer can fit into the lower
5254 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5255 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5256 For other conversions, when there's narrowing, NARROW_DST is used as
5258 enum { NARROW_SRC
, NARROW_DST
, NONE
, WIDEN
} modifier
;
5259 vec
<tree
> vec_oprnds0
= vNULL
;
5260 vec
<tree
> vec_oprnds1
= vNULL
;
5262 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5263 int multi_step_cvt
= 0;
5264 vec
<tree
> interm_types
= vNULL
;
5265 tree intermediate_type
, cvt_type
= NULL_TREE
;
5267 unsigned short fltsz
;
5269 /* Is STMT a vectorizable conversion? */
5271 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5274 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5278 gimple
* stmt
= stmt_info
->stmt
;
5279 if (!(is_gimple_assign (stmt
) || is_gimple_call (stmt
)))
5282 if (gimple_get_lhs (stmt
) == NULL_TREE
5283 || TREE_CODE (gimple_get_lhs (stmt
)) != SSA_NAME
)
5286 if (TREE_CODE (gimple_get_lhs (stmt
)) != SSA_NAME
)
5289 if (is_gimple_assign (stmt
))
5291 code
= gimple_assign_rhs_code (stmt
);
5292 op_type
= TREE_CODE_LENGTH ((tree_code
) code
);
5294 else if (gimple_call_internal_p (stmt
))
5296 code
= gimple_call_internal_fn (stmt
);
5297 op_type
= gimple_call_num_args (stmt
);
5302 bool widen_arith
= (code
== WIDEN_MULT_EXPR
5303 || code
== WIDEN_LSHIFT_EXPR
5304 || widening_fn_p (code
));
5307 && !CONVERT_EXPR_CODE_P (code
)
5308 && code
!= FIX_TRUNC_EXPR
5309 && code
!= FLOAT_EXPR
)
5312 /* Check types of lhs and rhs. */
5313 scalar_dest
= gimple_get_lhs (stmt
);
5314 lhs_type
= TREE_TYPE (scalar_dest
);
5315 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5317 /* Check the operands of the operation. */
5318 slp_tree slp_op0
, slp_op1
= NULL
;
5319 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5320 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
5322 if (dump_enabled_p ())
5323 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5324 "use not simple.\n");
5328 rhs_type
= TREE_TYPE (op0
);
5329 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
5330 && !((INTEGRAL_TYPE_P (lhs_type
)
5331 && INTEGRAL_TYPE_P (rhs_type
))
5332 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
5333 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
5336 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5337 && ((INTEGRAL_TYPE_P (lhs_type
)
5338 && !type_has_mode_precision_p (lhs_type
))
5339 || (INTEGRAL_TYPE_P (rhs_type
)
5340 && !type_has_mode_precision_p (rhs_type
))))
5342 if (dump_enabled_p ())
5343 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5344 "type conversion to/from bit-precision unsupported."
5349 if (op_type
== binary_op
)
5351 gcc_assert (code
== WIDEN_MULT_EXPR
5352 || code
== WIDEN_LSHIFT_EXPR
5353 || widening_fn_p (code
));
5355 op1
= is_gimple_assign (stmt
) ? gimple_assign_rhs2 (stmt
) :
5356 gimple_call_arg (stmt
, 0);
5358 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
5359 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
5361 if (dump_enabled_p ())
5362 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5363 "use not simple.\n");
5366 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5369 vectype_in
= vectype1_in
;
5372 /* If op0 is an external or constant def, infer the vector type
5373 from the scalar type. */
5375 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
5377 gcc_assert (vectype_in
);
5380 if (dump_enabled_p ())
5381 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5382 "no vectype for scalar type %T\n", rhs_type
);
5387 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5388 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5390 if (dump_enabled_p ())
5391 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5392 "can't convert between boolean and non "
5393 "boolean vectors %T\n", rhs_type
);
5398 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
5399 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5400 if (known_eq (nunits_out
, nunits_in
))
5405 else if (multiple_p (nunits_out
, nunits_in
))
5406 modifier
= NARROW_DST
;
5409 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
5413 /* Multiple types in SLP are handled by creating the appropriate number of
5414 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5418 else if (modifier
== NARROW_DST
)
5419 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
5421 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
5423 /* Sanity check: make sure that at least one copy of the vectorized stmt
5424 needs to be generated. */
5425 gcc_assert (ncopies
>= 1);
5427 bool found_mode
= false;
5428 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
5429 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
5430 opt_scalar_mode rhs_mode_iter
;
5432 /* Supportable by target? */
5436 if (code
!= FIX_TRUNC_EXPR
5437 && code
!= FLOAT_EXPR
5438 && !CONVERT_EXPR_CODE_P (code
))
5440 gcc_assert (code
.is_tree_code ());
5441 if (supportable_convert_operation ((tree_code
) code
, vectype_out
,
5448 /* For conversions between float and integer types try whether
5449 we can use intermediate signed integer types to support the
5451 if (GET_MODE_SIZE (lhs_mode
) != GET_MODE_SIZE (rhs_mode
)
5452 && (code
== FLOAT_EXPR
||
5453 (code
== FIX_TRUNC_EXPR
&& !flag_trapping_math
)))
5455 bool demotion
= GET_MODE_SIZE (rhs_mode
) > GET_MODE_SIZE (lhs_mode
);
5456 bool float_expr_p
= code
== FLOAT_EXPR
;
5457 unsigned short target_size
;
5458 scalar_mode intermediate_mode
;
5461 intermediate_mode
= lhs_mode
;
5462 target_size
= GET_MODE_SIZE (rhs_mode
);
5466 target_size
= GET_MODE_SIZE (lhs_mode
);
5467 if (!int_mode_for_size
5468 (GET_MODE_BITSIZE (rhs_mode
), 0).exists (&intermediate_mode
))
5471 code1
= float_expr_p
? code
: NOP_EXPR
;
5472 codecvt1
= float_expr_p
? NOP_EXPR
: code
;
5473 opt_scalar_mode mode_iter
;
5474 FOR_EACH_2XWIDER_MODE (mode_iter
, intermediate_mode
)
5476 intermediate_mode
= mode_iter
.require ();
5478 if (GET_MODE_SIZE (intermediate_mode
) > target_size
)
5481 scalar_mode cvt_mode
;
5482 if (!int_mode_for_size
5483 (GET_MODE_BITSIZE (intermediate_mode
), 0).exists (&cvt_mode
))
5486 cvt_type
= build_nonstandard_integer_type
5487 (GET_MODE_BITSIZE (cvt_mode
), 0);
5489 /* Check if the intermediate type can hold OP0's range.
5490 When converting from float to integer this is not necessary
5491 because values that do not fit the (smaller) target type are
5492 unspecified anyway. */
5493 if (demotion
&& float_expr_p
)
5495 wide_int op_min_value
, op_max_value
;
5496 if (!vect_get_range_info (op0
, &op_min_value
, &op_max_value
))
5499 if (cvt_type
== NULL_TREE
5500 || (wi::min_precision (op_max_value
, SIGNED
)
5501 > TYPE_PRECISION (cvt_type
))
5502 || (wi::min_precision (op_min_value
, SIGNED
)
5503 > TYPE_PRECISION (cvt_type
)))
5507 cvt_type
= get_vectype_for_scalar_type (vinfo
, cvt_type
, slp_node
);
5508 /* This should only happened for SLP as long as loop vectorizer
5509 only supports same-sized vector. */
5510 if (cvt_type
== NULL_TREE
5511 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type
), nunits_in
)
5512 || !supportable_convert_operation ((tree_code
) code1
,
5515 || !supportable_convert_operation ((tree_code
) codecvt1
,
5527 interm_types
.safe_push (cvt_type
);
5528 cvt_type
= NULL_TREE
;
5536 if (dump_enabled_p ())
5537 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5538 "conversion not supported by target.\n");
5542 if (known_eq (nunits_in
, nunits_out
))
5544 if (!(code
.is_tree_code ()
5545 && supportable_half_widening_operation ((tree_code
) code
,
5546 vectype_out
, vectype_in
,
5550 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5553 if (supportable_widening_operation (vinfo
, code
, stmt_info
,
5554 vectype_out
, vectype_in
, &code1
,
5555 &code2
, &multi_step_cvt
,
5558 /* Binary widening operation can only be supported directly by the
5560 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5564 if (code
!= FLOAT_EXPR
5565 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
5568 fltsz
= GET_MODE_SIZE (lhs_mode
);
5569 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
5571 rhs_mode
= rhs_mode_iter
.require ();
5572 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
5576 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5577 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5578 if (cvt_type
== NULL_TREE
)
5581 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5584 gcc_assert (code
.is_tree_code ());
5585 if (!supportable_convert_operation ((tree_code
) code
, vectype_out
,
5590 else if (!supportable_widening_operation (vinfo
, code
,
5591 stmt_info
, vectype_out
,
5592 cvt_type
, &codecvt1
,
5593 &codecvt2
, &multi_step_cvt
,
5597 gcc_assert (multi_step_cvt
== 0);
5599 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
5602 &code2
, &multi_step_cvt
,
5613 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5614 codecvt2
= ERROR_MARK
;
5618 interm_types
.safe_push (cvt_type
);
5619 cvt_type
= NULL_TREE
;
5624 gcc_assert (op_type
== unary_op
);
5625 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
5626 &code1
, &multi_step_cvt
,
5630 if (GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
5633 if (code
== FIX_TRUNC_EXPR
)
5636 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5637 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5638 if (cvt_type
== NULL_TREE
)
5640 if (supportable_convert_operation ((tree_code
) code
, cvt_type
, vectype_in
,
5645 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
5646 &code1
, &multi_step_cvt
,
5650 /* If op0 can be represented with low precision integer,
5651 truncate it to cvt_type and the do FLOAT_EXPR. */
5652 else if (code
== FLOAT_EXPR
)
5654 wide_int op_min_value
, op_max_value
;
5655 if (!vect_get_range_info (op0
, &op_min_value
, &op_max_value
))
5659 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode
), 0);
5660 if (cvt_type
== NULL_TREE
5661 || (wi::min_precision (op_max_value
, SIGNED
)
5662 > TYPE_PRECISION (cvt_type
))
5663 || (wi::min_precision (op_min_value
, SIGNED
)
5664 > TYPE_PRECISION (cvt_type
)))
5667 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_out
);
5668 if (cvt_type
== NULL_TREE
)
5670 if (!supportable_narrowing_operation (NOP_EXPR
, cvt_type
, vectype_in
,
5671 &code1
, &multi_step_cvt
,
5674 if (supportable_convert_operation ((tree_code
) code
, vectype_out
,
5678 modifier
= NARROW_SRC
;
5689 if (!vec_stmt
) /* transformation not required. */
5692 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
5693 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
5695 if (dump_enabled_p ())
5696 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5697 "incompatible vector types for invariants\n");
5700 DUMP_VECT_SCOPE ("vectorizable_conversion");
5701 if (modifier
== NONE
)
5703 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
5704 vect_model_simple_cost (vinfo
, stmt_info
,
5705 ncopies
* (1 + multi_step_cvt
),
5706 dt
, ndts
, slp_node
, cost_vec
);
5708 else if (modifier
== NARROW_SRC
|| modifier
== NARROW_DST
)
5710 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
5711 /* The final packing step produces one vector result per copy. */
5712 unsigned int nvectors
5713 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
5714 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5715 multi_step_cvt
, cost_vec
,
5720 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5721 /* The initial unpacking step produces two vector results
5722 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5723 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5724 unsigned int nvectors
5726 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5728 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5729 multi_step_cvt
, cost_vec
,
5732 interm_types
.release ();
5737 if (dump_enabled_p ())
5738 dump_printf_loc (MSG_NOTE
, vect_location
,
5739 "transform conversion. ncopies = %d.\n", ncopies
);
5741 if (op_type
== binary_op
)
5743 if (CONSTANT_CLASS_P (op0
))
5744 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5745 else if (CONSTANT_CLASS_P (op1
))
5746 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5749 /* In case of multi-step conversion, we first generate conversion operations
5750 to the intermediate types, and then from that types to the final one.
5751 We create vector destinations for the intermediate type (TYPES) received
5752 from supportable_*_operation, and store them in the correct order
5753 for future use in vect_create_vectorized_*_stmts (). */
5754 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5755 bool widen_or_narrow_float_p
5756 = cvt_type
&& (modifier
== WIDEN
|| modifier
== NARROW_SRC
);
5757 vec_dest
= vect_create_destination_var (scalar_dest
,
5758 widen_or_narrow_float_p
5759 ? cvt_type
: vectype_out
);
5760 vec_dsts
.quick_push (vec_dest
);
5764 for (i
= interm_types
.length () - 1;
5765 interm_types
.iterate (i
, &intermediate_type
); i
--)
5767 vec_dest
= vect_create_destination_var (scalar_dest
,
5769 vec_dsts
.quick_push (vec_dest
);
5774 vec_dest
= vect_create_destination_var (scalar_dest
,
5775 widen_or_narrow_float_p
5776 ? vectype_out
: cvt_type
);
5781 if (modifier
== WIDEN
)
5783 else if (modifier
== NARROW_SRC
|| modifier
== NARROW_DST
)
5786 ninputs
= vect_pow2 (multi_step_cvt
);
5794 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5796 /* vec_dest is intermediate type operand when multi_step_cvt. */
5800 vec_dest
= vec_dsts
[0];
5803 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5805 /* Arguments are ready, create the new vector stmt. */
5809 gcc_assert (multi_step_cvt
== 1);
5810 new_stmt
= vect_gimple_build (cvt_op
, codecvt1
, vop0
);
5811 new_temp
= make_ssa_name (cvt_op
, new_stmt
);
5812 gimple_assign_set_lhs (new_stmt
, new_temp
);
5813 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5816 new_stmt
= vect_gimple_build (vec_dest
, code1
, vop0
);
5817 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5818 gimple_set_lhs (new_stmt
, new_temp
);
5819 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5822 slp_node
->push_vec_def (new_stmt
);
5824 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5829 /* In case the vectorization factor (VF) is bigger than the number
5830 of elements that we can fit in a vectype (nunits), we have to
5831 generate more than one vector stmt - i.e - we need to "unroll"
5832 the vector stmt by a factor VF/nunits. */
5833 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5835 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
5837 if (code
== WIDEN_LSHIFT_EXPR
)
5839 int oprnds_size
= vec_oprnds0
.length ();
5840 vec_oprnds1
.create (oprnds_size
);
5841 for (i
= 0; i
< oprnds_size
; ++i
)
5842 vec_oprnds1
.quick_push (op1
);
5844 /* Arguments are ready. Create the new vector stmts. */
5845 for (i
= multi_step_cvt
; i
>= 0; i
--)
5847 tree this_dest
= vec_dsts
[i
];
5848 code_helper c1
= code1
, c2
= code2
;
5849 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5854 if (known_eq (nunits_out
, nunits_in
))
5855 vect_create_half_widening_stmts (vinfo
, &vec_oprnds0
, &vec_oprnds1
,
5856 stmt_info
, this_dest
, gsi
, c1
,
5859 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5860 &vec_oprnds1
, stmt_info
,
5865 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5870 new_temp
= make_ssa_name (vec_dest
);
5871 new_stmt
= vect_gimple_build (new_temp
, codecvt1
, vop0
);
5872 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5875 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
5878 slp_node
->push_vec_def (new_stmt
);
5880 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5886 /* In case the vectorization factor (VF) is bigger than the number
5887 of elements that we can fit in a vectype (nunits), we have to
5888 generate more than one vector stmt - i.e - we need to "unroll"
5889 the vector stmt by a factor VF/nunits. */
5890 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5892 /* Arguments are ready. Create the new vector stmts. */
5893 if (cvt_type
&& modifier
== NARROW_DST
)
5894 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5896 new_temp
= make_ssa_name (vec_dest
);
5897 gimple
*new_stmt
= vect_gimple_build (new_temp
, codecvt1
, vop0
);
5898 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5899 vec_oprnds0
[i
] = new_temp
;
5902 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5904 stmt_info
, vec_dsts
, gsi
,
5906 modifier
== NARROW_SRC
);
5907 /* After demoting op0 to cvt_type, convert it to dest. */
5908 if (cvt_type
&& code
== FLOAT_EXPR
)
5910 for (unsigned int i
= 0; i
!= vec_oprnds0
.length() / 2; i
++)
5912 /* Arguments are ready, create the new vector stmt. */
5913 gcc_assert (TREE_CODE_LENGTH ((tree_code
) codecvt1
) == unary_op
);
5915 = vect_gimple_build (vec_dest
, codecvt1
, vec_oprnds0
[i
]);
5916 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5917 gimple_set_lhs (new_stmt
, new_temp
);
5918 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5920 /* This is the last step of the conversion sequence. Store the
5921 vectors in SLP_NODE or in vector info of the scalar statement
5922 (or in STMT_VINFO_RELATED_STMT chain). */
5924 slp_node
->push_vec_def (new_stmt
);
5926 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5932 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5934 vec_oprnds0
.release ();
5935 vec_oprnds1
.release ();
5936 interm_types
.release ();
5941 /* Return true if we can assume from the scalar form of STMT_INFO that
5942 neither the scalar nor the vector forms will generate code. STMT_INFO
5943 is known not to involve a data reference. */
5946 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5948 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5952 tree lhs
= gimple_assign_lhs (stmt
);
5953 tree_code code
= gimple_assign_rhs_code (stmt
);
5954 tree rhs
= gimple_assign_rhs1 (stmt
);
5956 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5959 if (CONVERT_EXPR_CODE_P (code
))
5960 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5965 /* Function vectorizable_assignment.
5967 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5968 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5969 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5970 Return true if STMT_INFO is vectorizable in this way. */
5973 vectorizable_assignment (vec_info
*vinfo
,
5974 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5975 gimple
**vec_stmt
, slp_tree slp_node
,
5976 stmt_vector_for_cost
*cost_vec
)
5981 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5983 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5987 vec
<tree
> vec_oprnds
= vNULL
;
5989 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5990 enum tree_code code
;
5993 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5996 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6000 /* Is vectorizable assignment? */
6001 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6005 scalar_dest
= gimple_assign_lhs (stmt
);
6006 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6009 if (STMT_VINFO_DATA_REF (stmt_info
))
6012 code
= gimple_assign_rhs_code (stmt
);
6013 if (!(gimple_assign_single_p (stmt
)
6014 || code
== PAREN_EXPR
6015 || CONVERT_EXPR_CODE_P (code
)))
6018 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6019 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6021 /* Multiple types in SLP are handled by creating the appropriate number of
6022 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6027 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6029 gcc_assert (ncopies
>= 1);
6032 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
6033 &dt
[0], &vectype_in
))
6035 if (dump_enabled_p ())
6036 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6037 "use not simple.\n");
6041 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
6043 /* We can handle NOP_EXPR conversions that do not change the number
6044 of elements or the vector size. */
6045 if ((CONVERT_EXPR_CODE_P (code
)
6046 || code
== VIEW_CONVERT_EXPR
)
6048 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
6049 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
6050 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
6053 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
6055 if (dump_enabled_p ())
6056 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6057 "can't convert between boolean and non "
6058 "boolean vectors %T\n", TREE_TYPE (op
));
6063 /* We do not handle bit-precision changes. */
6064 if ((CONVERT_EXPR_CODE_P (code
)
6065 || code
== VIEW_CONVERT_EXPR
)
6066 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
6067 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
6068 || (INTEGRAL_TYPE_P (TREE_TYPE (op
))
6069 && !type_has_mode_precision_p (TREE_TYPE (op
))))
6070 /* But a conversion that does not change the bit-pattern is ok. */
6071 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
6072 && INTEGRAL_TYPE_P (TREE_TYPE (op
))
6073 && (((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
6074 > TYPE_PRECISION (TREE_TYPE (op
)))
6075 && TYPE_UNSIGNED (TREE_TYPE (op
)))
6076 || (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
6077 == TYPE_PRECISION (TREE_TYPE (op
))))))
6079 if (dump_enabled_p ())
6080 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6081 "type conversion to/from bit-precision "
6086 if (!vec_stmt
) /* transformation not required. */
6089 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
6091 if (dump_enabled_p ())
6092 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6093 "incompatible vector types for invariants\n");
6096 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
6097 DUMP_VECT_SCOPE ("vectorizable_assignment");
6098 if (!vect_nop_conversion_p (stmt_info
))
6099 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
6105 if (dump_enabled_p ())
6106 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
6109 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6112 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
6114 /* Arguments are ready. create the new vector stmt. */
6115 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
6117 if (CONVERT_EXPR_CODE_P (code
)
6118 || code
== VIEW_CONVERT_EXPR
)
6119 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
6120 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
6121 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6122 gimple_assign_set_lhs (new_stmt
, new_temp
);
6123 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6125 slp_node
->push_vec_def (new_stmt
);
6127 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6130 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6132 vec_oprnds
.release ();
6137 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6138 either as shift by a scalar or by a vector. */
6141 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
6144 machine_mode vec_mode
;
6149 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
6153 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
6155 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
6157 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
6159 || (optab_handler (optab
, TYPE_MODE (vectype
))
6160 == CODE_FOR_nothing
))
6164 vec_mode
= TYPE_MODE (vectype
);
6165 icode
= (int) optab_handler (optab
, vec_mode
);
6166 if (icode
== CODE_FOR_nothing
)
6173 /* Function vectorizable_shift.
6175 Check if STMT_INFO performs a shift operation that can be vectorized.
6176 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6177 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6178 Return true if STMT_INFO is vectorizable in this way. */
6181 vectorizable_shift (vec_info
*vinfo
,
6182 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6183 gimple
**vec_stmt
, slp_tree slp_node
,
6184 stmt_vector_for_cost
*cost_vec
)
6188 tree op0
, op1
= NULL
;
6189 tree vec_oprnd1
= NULL_TREE
;
6191 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6192 enum tree_code code
;
6193 machine_mode vec_mode
;
6197 machine_mode optab_op2_mode
;
6198 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
6200 poly_uint64 nunits_in
;
6201 poly_uint64 nunits_out
;
6206 vec
<tree
> vec_oprnds0
= vNULL
;
6207 vec
<tree
> vec_oprnds1
= vNULL
;
6210 bool scalar_shift_arg
= true;
6211 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6212 bool incompatible_op1_vectype_p
= false;
6214 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6217 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6218 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
6222 /* Is STMT a vectorizable binary/unary operation? */
6223 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6227 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6230 code
= gimple_assign_rhs_code (stmt
);
6232 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
6233 || code
== RROTATE_EXPR
))
6236 scalar_dest
= gimple_assign_lhs (stmt
);
6237 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6238 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
6240 if (dump_enabled_p ())
6241 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6242 "bit-precision shifts not supported.\n");
6247 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6248 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6250 if (dump_enabled_p ())
6251 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6252 "use not simple.\n");
6255 /* If op0 is an external or constant def, infer the vector type
6256 from the scalar type. */
6258 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
6260 gcc_assert (vectype
);
6263 if (dump_enabled_p ())
6264 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6265 "no vectype for scalar type\n");
6269 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6270 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6271 if (maybe_ne (nunits_out
, nunits_in
))
6274 stmt_vec_info op1_def_stmt_info
;
6276 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
6277 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
6279 if (dump_enabled_p ())
6280 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6281 "use not simple.\n");
6285 /* Multiple types in SLP are handled by creating the appropriate number of
6286 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6291 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6293 gcc_assert (ncopies
>= 1);
6295 /* Determine whether the shift amount is a vector, or scalar. If the
6296 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6298 if ((dt
[1] == vect_internal_def
6299 || dt
[1] == vect_induction_def
6300 || dt
[1] == vect_nested_cycle
)
6302 scalar_shift_arg
= false;
6303 else if (dt
[1] == vect_constant_def
6304 || dt
[1] == vect_external_def
6305 || dt
[1] == vect_internal_def
)
6307 /* In SLP, need to check whether the shift count is the same,
6308 in loops if it is a constant or invariant, it is always
6312 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
6313 stmt_vec_info slpstmt_info
;
6315 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
6317 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
6318 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
6319 scalar_shift_arg
= false;
6322 /* For internal SLP defs we have to make sure we see scalar stmts
6323 for all vector elements.
6324 ??? For different vectors we could resort to a different
6325 scalar shift operand but code-generation below simply always
6327 if (dt
[1] == vect_internal_def
6328 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
6330 scalar_shift_arg
= false;
6333 /* If the shift amount is computed by a pattern stmt we cannot
6334 use the scalar amount directly thus give up and use a vector
6336 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
6337 scalar_shift_arg
= false;
6341 if (dump_enabled_p ())
6342 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6343 "operand mode requires invariant argument.\n");
6347 /* Vector shifted by vector. */
6348 bool was_scalar_shift_arg
= scalar_shift_arg
;
6349 if (!scalar_shift_arg
)
6351 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
6352 if (dump_enabled_p ())
6353 dump_printf_loc (MSG_NOTE
, vect_location
,
6354 "vector/vector shift/rotate found.\n");
6357 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
6359 incompatible_op1_vectype_p
6360 = (op1_vectype
== NULL_TREE
6361 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
6362 TYPE_VECTOR_SUBPARTS (vectype
))
6363 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
6364 if (incompatible_op1_vectype_p
6366 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
6367 || slp_op1
->refcnt
!= 1))
6369 if (dump_enabled_p ())
6370 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6371 "unusable type for last operand in"
6372 " vector/vector shift/rotate.\n");
6376 /* See if the machine has a vector shifted by scalar insn and if not
6377 then see if it has a vector shifted by vector insn. */
6380 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
6382 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
6384 if (dump_enabled_p ())
6385 dump_printf_loc (MSG_NOTE
, vect_location
,
6386 "vector/scalar shift/rotate found.\n");
6390 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
6392 && (optab_handler (optab
, TYPE_MODE (vectype
))
6393 != CODE_FOR_nothing
))
6395 scalar_shift_arg
= false;
6397 if (dump_enabled_p ())
6398 dump_printf_loc (MSG_NOTE
, vect_location
,
6399 "vector/vector shift/rotate found.\n");
6402 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
6406 /* Unlike the other binary operators, shifts/rotates have
6407 the rhs being int, instead of the same type as the lhs,
6408 so make sure the scalar is the right type if we are
6409 dealing with vectors of long long/long/short/char. */
6410 incompatible_op1_vectype_p
6412 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
6414 if (incompatible_op1_vectype_p
6415 && dt
[1] == vect_internal_def
)
6417 if (dump_enabled_p ())
6418 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6419 "unusable type for last operand in"
6420 " vector/vector shift/rotate.\n");
6427 /* Supportable by target? */
6430 if (dump_enabled_p ())
6431 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6435 vec_mode
= TYPE_MODE (vectype
);
6436 icode
= (int) optab_handler (optab
, vec_mode
);
6437 if (icode
== CODE_FOR_nothing
)
6439 if (dump_enabled_p ())
6440 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6441 "op not supported by target.\n");
6444 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6445 if (vect_emulated_vector_p (vectype
))
6448 if (!vec_stmt
) /* transformation not required. */
6451 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6452 || ((!scalar_shift_arg
|| dt
[1] == vect_internal_def
)
6453 && (!incompatible_op1_vectype_p
6454 || dt
[1] == vect_constant_def
)
6455 && !vect_maybe_update_slp_op_vectype
6457 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
6459 if (dump_enabled_p ())
6460 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6461 "incompatible vector types for invariants\n");
6464 /* Now adjust the constant shift amount in place. */
6466 && incompatible_op1_vectype_p
6467 && dt
[1] == vect_constant_def
)
6469 for (unsigned i
= 0;
6470 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
6472 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
6473 = fold_convert (TREE_TYPE (vectype
),
6474 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
6475 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
6479 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
6480 DUMP_VECT_SCOPE ("vectorizable_shift");
6481 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
6482 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
6488 if (dump_enabled_p ())
6489 dump_printf_loc (MSG_NOTE
, vect_location
,
6490 "transform binary/unary operation.\n");
6492 if (incompatible_op1_vectype_p
&& !slp_node
)
6494 gcc_assert (!scalar_shift_arg
&& was_scalar_shift_arg
);
6495 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
6496 if (dt
[1] != vect_constant_def
)
6497 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
6498 TREE_TYPE (vectype
), NULL
);
6502 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6504 if (scalar_shift_arg
&& dt
[1] != vect_internal_def
)
6506 /* Vector shl and shr insn patterns can be defined with scalar
6507 operand 2 (shift operand). In this case, use constant or loop
6508 invariant op1 directly, without extending it to vector mode
6510 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
6511 if (!VECTOR_MODE_P (optab_op2_mode
))
6513 if (dump_enabled_p ())
6514 dump_printf_loc (MSG_NOTE
, vect_location
,
6515 "operand 1 using scalar mode.\n");
6517 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
6518 vec_oprnds1
.quick_push (vec_oprnd1
);
6519 /* Store vec_oprnd1 for every vector stmt to be created.
6520 We check during the analysis that all the shift arguments
6522 TODO: Allow different constants for different vector
6523 stmts generated for an SLP instance. */
6525 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
6526 vec_oprnds1
.quick_push (vec_oprnd1
);
6529 else if (!scalar_shift_arg
&& slp_node
&& incompatible_op1_vectype_p
)
6531 if (was_scalar_shift_arg
)
6533 /* If the argument was the same in all lanes create
6534 the correctly typed vector shift amount directly. */
6535 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
6536 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
6537 !loop_vinfo
? gsi
: NULL
);
6538 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
6539 !loop_vinfo
? gsi
: NULL
);
6540 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
6541 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
6542 vec_oprnds1
.quick_push (vec_oprnd1
);
6544 else if (dt
[1] == vect_constant_def
)
6545 /* The constant shift amount has been adjusted in place. */
6548 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
6551 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6552 (a special case for certain kind of vector shifts); otherwise,
6553 operand 1 should be of a vector type (the usual case). */
6554 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6556 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
6558 /* Arguments are ready. Create the new vector stmt. */
6559 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6561 /* For internal defs where we need to use a scalar shift arg
6562 extract the first lane. */
6563 if (scalar_shift_arg
&& dt
[1] == vect_internal_def
)
6565 vop1
= vec_oprnds1
[0];
6566 new_temp
= make_ssa_name (TREE_TYPE (TREE_TYPE (vop1
)));
6568 = gimple_build_assign (new_temp
,
6569 build3 (BIT_FIELD_REF
, TREE_TYPE (new_temp
),
6571 TYPE_SIZE (TREE_TYPE (new_temp
)),
6572 bitsize_zero_node
));
6573 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6577 vop1
= vec_oprnds1
[i
];
6578 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
6579 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6580 gimple_assign_set_lhs (new_stmt
, new_temp
);
6581 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6583 slp_node
->push_vec_def (new_stmt
);
6585 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6589 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6591 vec_oprnds0
.release ();
6592 vec_oprnds1
.release ();
6597 /* Function vectorizable_operation.
6599 Check if STMT_INFO performs a binary, unary or ternary operation that can
6601 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6602 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6603 Return true if STMT_INFO is vectorizable in this way. */
6606 vectorizable_operation (vec_info
*vinfo
,
6607 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6608 gimple
**vec_stmt
, slp_tree slp_node
,
6609 stmt_vector_for_cost
*cost_vec
)
6613 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
6615 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6616 enum tree_code code
, orig_code
;
6617 machine_mode vec_mode
;
6621 bool target_support_p
;
6622 enum vect_def_type dt
[3]
6623 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
6625 poly_uint64 nunits_in
;
6626 poly_uint64 nunits_out
;
6628 int ncopies
, vec_num
;
6630 vec
<tree
> vec_oprnds0
= vNULL
;
6631 vec
<tree
> vec_oprnds1
= vNULL
;
6632 vec
<tree
> vec_oprnds2
= vNULL
;
6633 tree vop0
, vop1
, vop2
;
6634 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6636 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6639 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6643 /* Is STMT a vectorizable binary/unary operation? */
6644 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6648 /* Loads and stores are handled in vectorizable_{load,store}. */
6649 if (STMT_VINFO_DATA_REF (stmt_info
))
6652 orig_code
= code
= gimple_assign_rhs_code (stmt
);
6654 /* Shifts are handled in vectorizable_shift. */
6655 if (code
== LSHIFT_EXPR
6656 || code
== RSHIFT_EXPR
6657 || code
== LROTATE_EXPR
6658 || code
== RROTATE_EXPR
)
6661 /* Comparisons are handled in vectorizable_comparison. */
6662 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
6665 /* Conditions are handled in vectorizable_condition. */
6666 if (code
== COND_EXPR
)
6669 /* For pointer addition and subtraction, we should use the normal
6670 plus and minus for the vector operation. */
6671 if (code
== POINTER_PLUS_EXPR
)
6673 if (code
== POINTER_DIFF_EXPR
)
6676 /* Support only unary or binary operations. */
6677 op_type
= TREE_CODE_LENGTH (code
);
6678 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
6680 if (dump_enabled_p ())
6681 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6682 "num. args = %d (not unary/binary/ternary op).\n",
6687 scalar_dest
= gimple_assign_lhs (stmt
);
6688 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6690 /* Most operations cannot handle bit-precision types without extra
6692 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
6694 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
6695 /* Exception are bitwise binary operations. */
6696 && code
!= BIT_IOR_EXPR
6697 && code
!= BIT_XOR_EXPR
6698 && code
!= BIT_AND_EXPR
)
6700 if (dump_enabled_p ())
6701 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6702 "bit-precision arithmetic not supported.\n");
6707 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6708 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6710 if (dump_enabled_p ())
6711 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6712 "use not simple.\n");
6715 bool is_invariant
= (dt
[0] == vect_external_def
6716 || dt
[0] == vect_constant_def
);
6717 /* If op0 is an external or constant def, infer the vector type
6718 from the scalar type. */
6721 /* For boolean type we cannot determine vectype by
6722 invariant value (don't know whether it is a vector
6723 of booleans or vector of integers). We use output
6724 vectype because operations on boolean don't change
6726 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6728 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6730 if (dump_enabled_p ())
6731 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6732 "not supported operation on bool value.\n");
6735 vectype
= vectype_out
;
6738 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6742 gcc_assert (vectype
);
6745 if (dump_enabled_p ())
6746 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6747 "no vectype for scalar type %T\n",
6753 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6754 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6755 if (maybe_ne (nunits_out
, nunits_in
))
6758 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6759 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6760 if (op_type
== binary_op
|| op_type
== ternary_op
)
6762 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6763 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6765 if (dump_enabled_p ())
6766 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6767 "use not simple.\n");
6770 is_invariant
&= (dt
[1] == vect_external_def
6771 || dt
[1] == vect_constant_def
);
6773 && maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype2
)))
6776 if (op_type
== ternary_op
)
6778 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6779 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6781 if (dump_enabled_p ())
6782 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6783 "use not simple.\n");
6786 is_invariant
&= (dt
[2] == vect_external_def
6787 || dt
[2] == vect_constant_def
);
6789 && maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype3
)))
6793 /* Multiple types in SLP are handled by creating the appropriate number of
6794 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6799 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6803 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6807 gcc_assert (ncopies
>= 1);
6809 /* Reject attempts to combine mask types with nonmask types, e.g. if
6810 we have an AND between a (nonmask) boolean loaded from memory and
6811 a (mask) boolean result of a comparison.
6813 TODO: We could easily fix these cases up using pattern statements. */
6814 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6815 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6816 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6818 if (dump_enabled_p ())
6819 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6820 "mixed mask and nonmask vector types\n");
6824 /* Supportable by target? */
6826 vec_mode
= TYPE_MODE (vectype
);
6827 if (code
== MULT_HIGHPART_EXPR
)
6828 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6831 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6834 if (dump_enabled_p ())
6835 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6839 target_support_p
= (optab_handler (optab
, vec_mode
) != CODE_FOR_nothing
6840 || optab_libfunc (optab
, vec_mode
));
6843 bool using_emulated_vectors_p
= vect_emulated_vector_p (vectype
);
6844 if (!target_support_p
|| using_emulated_vectors_p
)
6846 if (dump_enabled_p ())
6847 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6848 "op not supported by target.\n");
6849 /* When vec_mode is not a vector mode and we verified ops we
6850 do not have to lower like AND are natively supported let
6851 those through even when the mode isn't word_mode. For
6852 ops we have to lower the lowering code assumes we are
6853 dealing with word_mode. */
6854 if ((((code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== NEGATE_EXPR
)
6855 || !target_support_p
)
6856 && maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
))
6857 /* Check only during analysis. */
6858 || (!vec_stmt
&& !vect_can_vectorize_without_simd_p (code
)))
6860 if (dump_enabled_p ())
6861 dump_printf (MSG_NOTE
, "using word mode not possible.\n");
6864 if (dump_enabled_p ())
6865 dump_printf_loc (MSG_NOTE
, vect_location
,
6866 "proceeding using word mode.\n");
6867 using_emulated_vectors_p
= true;
6870 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6871 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6872 vec_loop_lens
*lens
= (loop_vinfo
? &LOOP_VINFO_LENS (loop_vinfo
) : NULL
);
6873 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6874 internal_fn cond_len_fn
= get_conditional_len_internal_fn (code
);
6876 /* If operating on inactive elements could generate spurious traps,
6877 we need to restrict the operation to active lanes. Note that this
6878 specifically doesn't apply to unhoisted invariants, since they
6879 operate on the same value for every lane.
6881 Similarly, if this operation is part of a reduction, a fully-masked
6882 loop should only change the active lanes of the reduction chain,
6883 keeping the inactive lanes as-is. */
6884 bool mask_out_inactive
= ((!is_invariant
&& gimple_could_trap_p (stmt
))
6887 if (!vec_stmt
) /* transformation not required. */
6890 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
6891 && mask_out_inactive
)
6893 if (cond_len_fn
!= IFN_LAST
6894 && direct_internal_fn_supported_p (cond_len_fn
, vectype
,
6895 OPTIMIZE_FOR_SPEED
))
6896 vect_record_loop_len (loop_vinfo
, lens
, ncopies
* vec_num
, vectype
,
6898 else if (cond_fn
!= IFN_LAST
6899 && direct_internal_fn_supported_p (cond_fn
, vectype
,
6900 OPTIMIZE_FOR_SPEED
))
6901 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6905 if (dump_enabled_p ())
6906 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6907 "can't use a fully-masked loop because no"
6908 " conditional operation is available.\n");
6909 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
6913 /* Put types on constant and invariant SLP children. */
6915 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6916 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6917 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6919 if (dump_enabled_p ())
6920 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6921 "incompatible vector types for invariants\n");
6925 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6926 DUMP_VECT_SCOPE ("vectorizable_operation");
6927 vect_model_simple_cost (vinfo
, stmt_info
,
6928 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6929 if (using_emulated_vectors_p
)
6931 /* The above vect_model_simple_cost call handles constants
6932 in the prologue and (mis-)costs one of the stmts as
6933 vector stmt. See below for the actual lowering that will
6936 = slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
;
6949 /* Bit operations do not have extra cost and are accounted
6950 as vector stmt by vect_model_simple_cost. */
6956 /* We also need to materialize two large constants. */
6957 record_stmt_cost (cost_vec
, 2, scalar_stmt
, stmt_info
,
6959 record_stmt_cost (cost_vec
, n
, scalar_stmt
, stmt_info
,
6968 if (dump_enabled_p ())
6969 dump_printf_loc (MSG_NOTE
, vect_location
,
6970 "transform binary/unary operation.\n");
6972 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6973 bool len_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
);
6975 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6976 vectors with unsigned elements, but the result is signed. So, we
6977 need to compute the MINUS_EXPR into vectype temporary and
6978 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6979 tree vec_cvt_dest
= NULL_TREE
;
6980 if (orig_code
== POINTER_DIFF_EXPR
)
6982 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6983 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6987 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6989 /* In case the vectorization factor (VF) is bigger than the number
6990 of elements that we can fit in a vectype (nunits), we have to generate
6991 more than one vector stmt - i.e - we need to "unroll" the
6992 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6993 from one copy of the vector stmt to the next, in the field
6994 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6995 stages to find the correct vector defs to be used when vectorizing
6996 stmts that use the defs of the current stmt. The example below
6997 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6998 we need to create 4 vectorized stmts):
7000 before vectorization:
7001 RELATED_STMT VEC_STMT
7005 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
7007 RELATED_STMT VEC_STMT
7008 VS1_0: vx0 = memref0 VS1_1 -
7009 VS1_1: vx1 = memref1 VS1_2 -
7010 VS1_2: vx2 = memref2 VS1_3 -
7011 VS1_3: vx3 = memref3 - -
7012 S1: x = load - VS1_0
7015 step2: vectorize stmt S2 (done here):
7016 To vectorize stmt S2 we first need to find the relevant vector
7017 def for the first operand 'x'. This is, as usual, obtained from
7018 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
7019 that defines 'x' (S1). This way we find the stmt VS1_0, and the
7020 relevant vector def 'vx0'. Having found 'vx0' we can generate
7021 the vector stmt VS2_0, and as usual, record it in the
7022 STMT_VINFO_VEC_STMT of stmt S2.
7023 When creating the second copy (VS2_1), we obtain the relevant vector
7024 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
7025 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
7026 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
7027 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
7028 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
7029 chain of stmts and pointers:
7030 RELATED_STMT VEC_STMT
7031 VS1_0: vx0 = memref0 VS1_1 -
7032 VS1_1: vx1 = memref1 VS1_2 -
7033 VS1_2: vx2 = memref2 VS1_3 -
7034 VS1_3: vx3 = memref3 - -
7035 S1: x = load - VS1_0
7036 VS2_0: vz0 = vx0 + v1 VS2_1 -
7037 VS2_1: vz1 = vx1 + v1 VS2_2 -
7038 VS2_2: vz2 = vx2 + v1 VS2_3 -
7039 VS2_3: vz3 = vx3 + v1 - -
7040 S2: z = x + 1 - VS2_0 */
7042 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
7043 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
7044 /* Arguments are ready. Create the new vector stmt. */
7045 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
7047 gimple
*new_stmt
= NULL
;
7048 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
7049 ? vec_oprnds1
[i
] : NULL_TREE
);
7050 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
7051 if (using_emulated_vectors_p
7052 && (code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== NEGATE_EXPR
))
7054 /* Lower the operation. This follows vector lowering. */
7055 unsigned int width
= vector_element_bits (vectype
);
7056 tree inner_type
= TREE_TYPE (vectype
);
7058 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode
), 1);
7059 HOST_WIDE_INT max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
7060 tree low_bits
= build_replicated_int_cst (word_type
, width
, max
>> 1);
7062 = build_replicated_int_cst (word_type
, width
, max
& ~(max
>> 1));
7063 tree wvop0
= make_ssa_name (word_type
);
7064 new_stmt
= gimple_build_assign (wvop0
, VIEW_CONVERT_EXPR
,
7065 build1 (VIEW_CONVERT_EXPR
,
7067 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7068 tree result_low
, signs
;
7069 if (code
== PLUS_EXPR
|| code
== MINUS_EXPR
)
7071 tree wvop1
= make_ssa_name (word_type
);
7072 new_stmt
= gimple_build_assign (wvop1
, VIEW_CONVERT_EXPR
,
7073 build1 (VIEW_CONVERT_EXPR
,
7075 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7076 signs
= make_ssa_name (word_type
);
7077 new_stmt
= gimple_build_assign (signs
,
7078 BIT_XOR_EXPR
, wvop0
, wvop1
);
7079 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7080 tree b_low
= make_ssa_name (word_type
);
7081 new_stmt
= gimple_build_assign (b_low
,
7082 BIT_AND_EXPR
, wvop1
, low_bits
);
7083 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7084 tree a_low
= make_ssa_name (word_type
);
7085 if (code
== PLUS_EXPR
)
7086 new_stmt
= gimple_build_assign (a_low
,
7087 BIT_AND_EXPR
, wvop0
, low_bits
);
7089 new_stmt
= gimple_build_assign (a_low
,
7090 BIT_IOR_EXPR
, wvop0
, high_bits
);
7091 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7092 if (code
== MINUS_EXPR
)
7094 new_stmt
= gimple_build_assign (NULL_TREE
,
7095 BIT_NOT_EXPR
, signs
);
7096 signs
= make_ssa_name (word_type
);
7097 gimple_assign_set_lhs (new_stmt
, signs
);
7098 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7100 new_stmt
= gimple_build_assign (NULL_TREE
,
7101 BIT_AND_EXPR
, signs
, high_bits
);
7102 signs
= make_ssa_name (word_type
);
7103 gimple_assign_set_lhs (new_stmt
, signs
);
7104 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7105 result_low
= make_ssa_name (word_type
);
7106 new_stmt
= gimple_build_assign (result_low
, code
, a_low
, b_low
);
7107 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7111 tree a_low
= make_ssa_name (word_type
);
7112 new_stmt
= gimple_build_assign (a_low
,
7113 BIT_AND_EXPR
, wvop0
, low_bits
);
7114 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7115 signs
= make_ssa_name (word_type
);
7116 new_stmt
= gimple_build_assign (signs
, BIT_NOT_EXPR
, wvop0
);
7117 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7118 new_stmt
= gimple_build_assign (NULL_TREE
,
7119 BIT_AND_EXPR
, signs
, high_bits
);
7120 signs
= make_ssa_name (word_type
);
7121 gimple_assign_set_lhs (new_stmt
, signs
);
7122 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7123 result_low
= make_ssa_name (word_type
);
7124 new_stmt
= gimple_build_assign (result_low
,
7125 MINUS_EXPR
, high_bits
, a_low
);
7126 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7128 new_stmt
= gimple_build_assign (NULL_TREE
, BIT_XOR_EXPR
, result_low
,
7130 result_low
= make_ssa_name (word_type
);
7131 gimple_assign_set_lhs (new_stmt
, result_low
);
7132 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7133 new_stmt
= gimple_build_assign (NULL_TREE
, VIEW_CONVERT_EXPR
,
7134 build1 (VIEW_CONVERT_EXPR
,
7135 vectype
, result_low
));
7136 new_temp
= make_ssa_name (vectype
);
7137 gimple_assign_set_lhs (new_stmt
, new_temp
);
7138 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7140 else if ((masked_loop_p
|| len_loop_p
) && mask_out_inactive
)
7144 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
,
7145 vec_num
* ncopies
, vectype
, i
);
7148 mask
= build_minus_one_cst (truth_type_for (vectype
));
7149 auto_vec
<tree
> vops (6);
7150 vops
.quick_push (mask
);
7151 vops
.quick_push (vop0
);
7153 vops
.quick_push (vop1
);
7155 vops
.quick_push (vop2
);
7158 /* Perform the operation on active elements only and take
7159 inactive elements from the reduction chain input. */
7161 vops
.quick_push (reduc_idx
== 1 ? vop1
: vop0
);
7165 auto else_value
= targetm
.preferred_else_value
7166 (cond_fn
, vectype
, vops
.length () - 1, &vops
[1]);
7167 vops
.quick_push (else_value
);
7171 tree len
= vect_get_loop_len (loop_vinfo
, gsi
, lens
,
7172 vec_num
* ncopies
, vectype
, i
, 1);
7174 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
7175 tree bias
= build_int_cst (intQI_type_node
, biasval
);
7176 vops
.quick_push (len
);
7177 vops
.quick_push (bias
);
7180 = gimple_build_call_internal_vec (masked_loop_p
? cond_fn
7183 new_temp
= make_ssa_name (vec_dest
, call
);
7184 gimple_call_set_lhs (call
, new_temp
);
7185 gimple_call_set_nothrow (call
, true);
7186 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
7191 tree mask
= NULL_TREE
;
7192 /* When combining two masks check if either of them is elsewhere
7193 combined with a loop mask, if that's the case we can mark that the
7194 new combined mask doesn't need to be combined with a loop mask. */
7196 && code
== BIT_AND_EXPR
7197 && VECTOR_BOOLEAN_TYPE_P (vectype
))
7199 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op0
,
7202 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
,
7203 vec_num
* ncopies
, vectype
, i
);
7205 vop0
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
7209 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op1
,
7212 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
,
7213 vec_num
* ncopies
, vectype
, i
);
7215 vop1
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
7220 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
7221 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7222 gimple_assign_set_lhs (new_stmt
, new_temp
);
7223 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7224 if (using_emulated_vectors_p
)
7225 suppress_warning (new_stmt
, OPT_Wvector_operation_performance
);
7227 /* Enter the combined value into the vector cond hash so we don't
7228 AND it with a loop mask again. */
7230 loop_vinfo
->vec_cond_masked_set
.add ({ new_temp
, mask
});
7235 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
7236 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
7238 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
7239 gimple_assign_set_lhs (new_stmt
, new_temp
);
7240 vect_finish_stmt_generation (vinfo
, stmt_info
,
7245 slp_node
->push_vec_def (new_stmt
);
7247 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
7251 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7253 vec_oprnds0
.release ();
7254 vec_oprnds1
.release ();
7255 vec_oprnds2
.release ();
7260 /* A helper function to ensure data reference DR_INFO's base alignment. */
7263 ensure_base_align (dr_vec_info
*dr_info
)
7265 /* Alignment is only analyzed for the first element of a DR group,
7266 use that to look at base alignment we need to enforce. */
7267 if (STMT_VINFO_GROUPED_ACCESS (dr_info
->stmt
))
7268 dr_info
= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info
->stmt
));
7270 gcc_assert (dr_info
->misalignment
!= DR_MISALIGNMENT_UNINITIALIZED
);
7272 if (dr_info
->base_misaligned
)
7274 tree base_decl
= dr_info
->base_decl
;
7276 // We should only be able to increase the alignment of a base object if
7277 // we know what its new alignment should be at compile time.
7278 unsigned HOST_WIDE_INT align_base_to
=
7279 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
7281 if (decl_in_symtab_p (base_decl
))
7282 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
7283 else if (DECL_ALIGN (base_decl
) < align_base_to
)
7285 SET_DECL_ALIGN (base_decl
, align_base_to
);
7286 DECL_USER_ALIGN (base_decl
) = 1;
7288 dr_info
->base_misaligned
= false;
7293 /* Function get_group_alias_ptr_type.
7295 Return the alias type for the group starting at FIRST_STMT_INFO. */
7298 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
7300 struct data_reference
*first_dr
, *next_dr
;
7302 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
7303 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
7304 while (next_stmt_info
)
7306 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
7307 if (get_alias_set (DR_REF (first_dr
))
7308 != get_alias_set (DR_REF (next_dr
)))
7310 if (dump_enabled_p ())
7311 dump_printf_loc (MSG_NOTE
, vect_location
,
7312 "conflicting alias set types.\n");
7313 return ptr_type_node
;
7315 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7317 return reference_alias_ptr_type (DR_REF (first_dr
));
7321 /* Function scan_operand_equal_p.
7323 Helper function for check_scan_store. Compare two references
7324 with .GOMP_SIMD_LANE bases. */
7327 scan_operand_equal_p (tree ref1
, tree ref2
)
7329 tree ref
[2] = { ref1
, ref2
};
7330 poly_int64 bitsize
[2], bitpos
[2];
7331 tree offset
[2], base
[2];
7332 for (int i
= 0; i
< 2; ++i
)
7335 int unsignedp
, reversep
, volatilep
= 0;
7336 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
7337 &offset
[i
], &mode
, &unsignedp
,
7338 &reversep
, &volatilep
);
7339 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
7341 if (TREE_CODE (base
[i
]) == MEM_REF
7342 && offset
[i
] == NULL_TREE
7343 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
7345 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
7346 if (is_gimple_assign (def_stmt
)
7347 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
7348 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
7349 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
7351 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
7353 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
7354 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
7359 if (!operand_equal_p (base
[0], base
[1], 0))
7361 if (maybe_ne (bitsize
[0], bitsize
[1]))
7363 if (offset
[0] != offset
[1])
7365 if (!offset
[0] || !offset
[1])
7367 if (!operand_equal_p (offset
[0], offset
[1], 0))
7370 for (int i
= 0; i
< 2; ++i
)
7372 step
[i
] = integer_one_node
;
7373 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
7375 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
7376 if (is_gimple_assign (def_stmt
)
7377 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
7378 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
7381 step
[i
] = gimple_assign_rhs2 (def_stmt
);
7382 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
7385 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
7387 step
[i
] = TREE_OPERAND (offset
[i
], 1);
7388 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
7390 tree rhs1
= NULL_TREE
;
7391 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
7393 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
7394 if (gimple_assign_cast_p (def_stmt
))
7395 rhs1
= gimple_assign_rhs1 (def_stmt
);
7397 else if (CONVERT_EXPR_P (offset
[i
]))
7398 rhs1
= TREE_OPERAND (offset
[i
], 0);
7400 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
7401 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
7402 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
7403 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
7406 if (!operand_equal_p (offset
[0], offset
[1], 0)
7407 || !operand_equal_p (step
[0], step
[1], 0))
7415 enum scan_store_kind
{
7416 /* Normal permutation. */
7417 scan_store_kind_perm
,
7419 /* Whole vector left shift permutation with zero init. */
7420 scan_store_kind_lshift_zero
,
7422 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7423 scan_store_kind_lshift_cond
7426 /* Function check_scan_store.
7428 Verify if we can perform the needed permutations or whole vector shifts.
7429 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7430 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7431 to do at each step. */
7434 scan_store_can_perm_p (tree vectype
, tree init
,
7435 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
7437 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7438 unsigned HOST_WIDE_INT nunits
;
7439 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7441 int units_log2
= exact_log2 (nunits
);
7442 if (units_log2
<= 0)
7446 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
7447 for (i
= 0; i
<= units_log2
; ++i
)
7449 unsigned HOST_WIDE_INT j
, k
;
7450 enum scan_store_kind kind
= scan_store_kind_perm
;
7451 vec_perm_builder
sel (nunits
, nunits
, 1);
7452 sel
.quick_grow (nunits
);
7453 if (i
== units_log2
)
7455 for (j
= 0; j
< nunits
; ++j
)
7456 sel
[j
] = nunits
- 1;
7460 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7462 for (k
= 0; j
< nunits
; ++j
, ++k
)
7463 sel
[j
] = nunits
+ k
;
7465 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7466 if (!can_vec_perm_const_p (vec_mode
, vec_mode
, indices
))
7468 if (i
== units_log2
)
7471 if (whole_vector_shift_kind
== scan_store_kind_perm
)
7473 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
7475 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
7476 /* Whole vector shifts shift in zeros, so if init is all zero
7477 constant, there is no need to do anything further. */
7478 if ((TREE_CODE (init
) != INTEGER_CST
7479 && TREE_CODE (init
) != REAL_CST
)
7480 || !initializer_zerop (init
))
7482 tree masktype
= truth_type_for (vectype
);
7483 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
7485 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
7488 kind
= whole_vector_shift_kind
;
7490 if (use_whole_vector
)
7492 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
7493 use_whole_vector
->safe_grow_cleared (i
, true);
7494 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
7495 use_whole_vector
->safe_push (kind
);
7503 /* Function check_scan_store.
7505 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7508 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
7509 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
7510 vect_memory_access_type memory_access_type
)
7512 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7513 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7516 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
7519 || memory_access_type
!= VMAT_CONTIGUOUS
7520 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
7521 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
7522 || loop_vinfo
== NULL
7523 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7524 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7525 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
7526 || !integer_zerop (DR_INIT (dr_info
->dr
))
7527 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
7528 || !alias_sets_conflict_p (get_alias_set (vectype
),
7529 get_alias_set (TREE_TYPE (ref_type
))))
7531 if (dump_enabled_p ())
7532 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7533 "unsupported OpenMP scan store.\n");
7537 /* We need to pattern match code built by OpenMP lowering and simplified
7538 by following optimizations into something we can handle.
7539 #pragma omp simd reduction(inscan,+:r)
7543 #pragma omp scan inclusive (r)
7546 shall have body with:
7547 // Initialization for input phase, store the reduction initializer:
7548 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7549 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7551 // Actual input phase:
7553 r.0_5 = D.2042[_20];
7556 // Initialization for scan phase:
7557 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7563 // Actual scan phase:
7565 r.1_8 = D.2042[_20];
7567 The "omp simd array" variable D.2042 holds the privatized copy used
7568 inside of the loop and D.2043 is another one that holds copies of
7569 the current original list item. The separate GOMP_SIMD_LANE ifn
7570 kinds are there in order to allow optimizing the initializer store
7571 and combiner sequence, e.g. if it is originally some C++ish user
7572 defined reduction, but allow the vectorizer to pattern recognize it
7573 and turn into the appropriate vectorized scan.
7575 For exclusive scan, this is slightly different:
7576 #pragma omp simd reduction(inscan,+:r)
7580 #pragma omp scan exclusive (r)
7583 shall have body with:
7584 // Initialization for input phase, store the reduction initializer:
7585 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7586 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7588 // Actual input phase:
7590 r.0_5 = D.2042[_20];
7593 // Initialization for scan phase:
7594 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7600 // Actual scan phase:
7602 r.1_8 = D.2044[_20];
7605 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
7607 /* Match the D.2042[_21] = 0; store above. Just require that
7608 it is a constant or external definition store. */
7609 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
7612 if (dump_enabled_p ())
7613 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7614 "unsupported OpenMP scan initializer store.\n");
7618 if (! loop_vinfo
->scan_map
)
7619 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
7620 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7621 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
7624 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
7626 /* These stores can be vectorized normally. */
7630 if (rhs_dt
!= vect_internal_def
)
7633 if (dump_enabled_p ())
7634 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7635 "unsupported OpenMP scan combiner pattern.\n");
7639 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7640 tree rhs
= gimple_assign_rhs1 (stmt
);
7641 if (TREE_CODE (rhs
) != SSA_NAME
)
7644 gimple
*other_store_stmt
= NULL
;
7645 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7646 bool inscan_var_store
7647 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7649 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7651 if (!inscan_var_store
)
7653 use_operand_p use_p
;
7654 imm_use_iterator iter
;
7655 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7657 gimple
*use_stmt
= USE_STMT (use_p
);
7658 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7660 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
7661 || !is_gimple_assign (use_stmt
)
7662 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
7664 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
7666 other_store_stmt
= use_stmt
;
7668 if (other_store_stmt
== NULL
)
7670 rhs
= gimple_assign_lhs (other_store_stmt
);
7671 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
7675 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
7677 use_operand_p use_p
;
7678 imm_use_iterator iter
;
7679 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7681 gimple
*use_stmt
= USE_STMT (use_p
);
7682 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7684 if (other_store_stmt
)
7686 other_store_stmt
= use_stmt
;
7692 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7693 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
7694 || !is_gimple_assign (def_stmt
)
7695 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
7698 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7699 /* For pointer addition, we should use the normal plus for the vector
7703 case POINTER_PLUS_EXPR
:
7706 case MULT_HIGHPART_EXPR
:
7711 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
7714 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7715 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7716 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
7719 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7720 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7721 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
7722 || !gimple_assign_load_p (load1_stmt
)
7723 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
7724 || !gimple_assign_load_p (load2_stmt
))
7727 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7728 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7729 if (load1_stmt_info
== NULL
7730 || load2_stmt_info
== NULL
7731 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
7732 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
7733 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
7734 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7737 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
7739 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7740 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
7741 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
7743 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7745 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7749 use_operand_p use_p
;
7750 imm_use_iterator iter
;
7751 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
7753 gimple
*use_stmt
= USE_STMT (use_p
);
7754 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
7756 if (other_store_stmt
)
7758 other_store_stmt
= use_stmt
;
7762 if (other_store_stmt
== NULL
)
7764 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
7765 || !gimple_store_p (other_store_stmt
))
7768 stmt_vec_info other_store_stmt_info
7769 = loop_vinfo
->lookup_stmt (other_store_stmt
);
7770 if (other_store_stmt_info
== NULL
7771 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
7772 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7775 gimple
*stmt1
= stmt
;
7776 gimple
*stmt2
= other_store_stmt
;
7777 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7778 std::swap (stmt1
, stmt2
);
7779 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7780 gimple_assign_rhs1 (load2_stmt
)))
7782 std::swap (rhs1
, rhs2
);
7783 std::swap (load1_stmt
, load2_stmt
);
7784 std::swap (load1_stmt_info
, load2_stmt_info
);
7786 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7787 gimple_assign_rhs1 (load1_stmt
)))
7790 tree var3
= NULL_TREE
;
7791 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
7792 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
7793 gimple_assign_rhs1 (load2_stmt
)))
7795 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7797 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7798 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
7799 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
7801 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7802 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
7803 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
7804 || lookup_attribute ("omp simd inscan exclusive",
7805 DECL_ATTRIBUTES (var3
)))
7809 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
7810 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
7811 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
7814 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7815 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
7816 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
7817 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
7818 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7819 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
7822 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7823 std::swap (var1
, var2
);
7825 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7827 if (!lookup_attribute ("omp simd inscan exclusive",
7828 DECL_ATTRIBUTES (var1
)))
7833 if (loop_vinfo
->scan_map
== NULL
)
7835 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7839 /* The IL is as expected, now check if we can actually vectorize it.
7846 should be vectorized as (where _40 is the vectorized rhs
7847 from the D.2042[_21] = 0; store):
7848 _30 = MEM <vector(8) int> [(int *)&D.2043];
7849 _31 = MEM <vector(8) int> [(int *)&D.2042];
7850 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7852 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7853 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7855 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7856 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7857 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7859 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7860 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7862 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7863 MEM <vector(8) int> [(int *)&D.2043] = _39;
7864 MEM <vector(8) int> [(int *)&D.2042] = _38;
7871 should be vectorized as (where _40 is the vectorized rhs
7872 from the D.2042[_21] = 0; store):
7873 _30 = MEM <vector(8) int> [(int *)&D.2043];
7874 _31 = MEM <vector(8) int> [(int *)&D.2042];
7875 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7876 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7878 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7879 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7880 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7882 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7883 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7884 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7886 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7887 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7890 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7891 MEM <vector(8) int> [(int *)&D.2044] = _39;
7892 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7893 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7894 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7895 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7898 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7899 if (units_log2
== -1)
7906 /* Function vectorizable_scan_store.
7908 Helper of vectorizable_score, arguments like on vectorizable_store.
7909 Handle only the transformation, checking is done in check_scan_store. */
7912 vectorizable_scan_store (vec_info
*vinfo
,
7913 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7914 gimple
**vec_stmt
, int ncopies
)
7916 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7917 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7918 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7919 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7921 if (dump_enabled_p ())
7922 dump_printf_loc (MSG_NOTE
, vect_location
,
7923 "transform scan store. ncopies = %d\n", ncopies
);
7925 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7926 tree rhs
= gimple_assign_rhs1 (stmt
);
7927 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7929 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7930 bool inscan_var_store
7931 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7933 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7935 use_operand_p use_p
;
7936 imm_use_iterator iter
;
7937 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7939 gimple
*use_stmt
= USE_STMT (use_p
);
7940 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7942 rhs
= gimple_assign_lhs (use_stmt
);
7947 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7948 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7949 if (code
== POINTER_PLUS_EXPR
)
7951 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7952 && commutative_tree_code (code
));
7953 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7954 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7955 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7956 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7957 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7958 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7959 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7960 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7961 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7962 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7963 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7965 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7967 std::swap (rhs1
, rhs2
);
7968 std::swap (var1
, var2
);
7969 std::swap (load1_dr_info
, load2_dr_info
);
7972 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7975 unsigned HOST_WIDE_INT nunits
;
7976 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7978 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7979 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7980 gcc_assert (units_log2
> 0);
7981 auto_vec
<tree
, 16> perms
;
7982 perms
.quick_grow (units_log2
+ 1);
7983 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7984 for (int i
= 0; i
<= units_log2
; ++i
)
7986 unsigned HOST_WIDE_INT j
, k
;
7987 vec_perm_builder
sel (nunits
, nunits
, 1);
7988 sel
.quick_grow (nunits
);
7989 if (i
== units_log2
)
7990 for (j
= 0; j
< nunits
; ++j
)
7991 sel
[j
] = nunits
- 1;
7994 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7996 for (k
= 0; j
< nunits
; ++j
, ++k
)
7997 sel
[j
] = nunits
+ k
;
7999 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
8000 if (!use_whole_vector
.is_empty ()
8001 && use_whole_vector
[i
] != scan_store_kind_perm
)
8003 if (zero_vec
== NULL_TREE
)
8004 zero_vec
= build_zero_cst (vectype
);
8005 if (masktype
== NULL_TREE
8006 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
8007 masktype
= truth_type_for (vectype
);
8008 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
8011 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
8014 tree vec_oprnd1
= NULL_TREE
;
8015 tree vec_oprnd2
= NULL_TREE
;
8016 tree vec_oprnd3
= NULL_TREE
;
8017 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
8018 tree dataref_offset
= build_int_cst (ref_type
, 0);
8019 tree bump
= vect_get_data_ptr_increment (vinfo
, gsi
, dr_info
,
8020 vectype
, VMAT_CONTIGUOUS
);
8021 tree ldataref_ptr
= NULL_TREE
;
8022 tree orig
= NULL_TREE
;
8023 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
8024 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
8025 auto_vec
<tree
> vec_oprnds1
;
8026 auto_vec
<tree
> vec_oprnds2
;
8027 auto_vec
<tree
> vec_oprnds3
;
8028 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
8029 *init
, &vec_oprnds1
,
8030 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
8031 rhs2
, &vec_oprnds3
);
8032 for (int j
= 0; j
< ncopies
; j
++)
8034 vec_oprnd1
= vec_oprnds1
[j
];
8035 if (ldataref_ptr
== NULL
)
8036 vec_oprnd2
= vec_oprnds2
[j
];
8037 vec_oprnd3
= vec_oprnds3
[j
];
8040 else if (!inscan_var_store
)
8041 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8045 vec_oprnd2
= make_ssa_name (vectype
);
8046 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
8047 unshare_expr (ldataref_ptr
),
8049 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
8050 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
8051 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8052 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8053 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
8056 tree v
= vec_oprnd2
;
8057 for (int i
= 0; i
< units_log2
; ++i
)
8059 tree new_temp
= make_ssa_name (vectype
);
8060 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
8062 && (use_whole_vector
[i
]
8063 != scan_store_kind_perm
))
8064 ? zero_vec
: vec_oprnd1
, v
,
8066 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8067 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8068 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
8070 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
8072 /* Whole vector shift shifted in zero bits, but if *init
8073 is not initializer_zerop, we need to replace those elements
8074 with elements from vec_oprnd1. */
8075 tree_vector_builder
vb (masktype
, nunits
, 1);
8076 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
8077 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
8078 ? boolean_false_node
: boolean_true_node
);
8080 tree new_temp2
= make_ssa_name (vectype
);
8081 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
8082 new_temp
, vec_oprnd1
);
8083 vect_finish_stmt_generation (vinfo
, stmt_info
,
8085 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8086 new_temp
= new_temp2
;
8089 /* For exclusive scan, perform the perms[i] permutation once
8092 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
8100 tree new_temp2
= make_ssa_name (vectype
);
8101 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
8102 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8103 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8108 tree new_temp
= make_ssa_name (vectype
);
8109 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
8110 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8111 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8113 tree last_perm_arg
= new_temp
;
8114 /* For exclusive scan, new_temp computed above is the exclusive scan
8115 prefix sum. Turn it into inclusive prefix sum for the broadcast
8116 of the last element into orig. */
8117 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
8119 last_perm_arg
= make_ssa_name (vectype
);
8120 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
8121 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8122 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8125 orig
= make_ssa_name (vectype
);
8126 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
8127 last_perm_arg
, perms
[units_log2
]);
8128 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8129 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8131 if (!inscan_var_store
)
8133 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
8134 unshare_expr (dataref_ptr
),
8136 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
8137 g
= gimple_build_assign (data_ref
, new_temp
);
8138 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8139 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8143 if (inscan_var_store
)
8144 for (int j
= 0; j
< ncopies
; j
++)
8147 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8149 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
8150 unshare_expr (dataref_ptr
),
8152 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
8153 gimple
*g
= gimple_build_assign (data_ref
, orig
);
8154 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8155 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8161 /* Function vectorizable_store.
8163 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8164 that can be vectorized.
8165 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8166 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8167 Return true if STMT_INFO is vectorizable in this way. */
8170 vectorizable_store (vec_info
*vinfo
,
8171 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8172 gimple
**vec_stmt
, slp_tree slp_node
,
8173 stmt_vector_for_cost
*cost_vec
)
8176 tree vec_oprnd
= NULL_TREE
;
8178 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8179 class loop
*loop
= NULL
;
8180 machine_mode vec_mode
;
8182 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
8183 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8184 tree dataref_ptr
= NULL_TREE
;
8185 tree dataref_offset
= NULL_TREE
;
8186 gimple
*ptr_incr
= NULL
;
8189 stmt_vec_info first_stmt_info
;
8191 unsigned int group_size
, i
;
8192 bool slp
= (slp_node
!= NULL
);
8193 unsigned int vec_num
;
8194 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8196 gather_scatter_info gs_info
;
8198 vec_load_store_type vls_type
;
8201 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8204 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8208 /* Is vectorizable store? */
8210 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8211 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8213 tree scalar_dest
= gimple_assign_lhs (assign
);
8214 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
8215 && is_pattern_stmt_p (stmt_info
))
8216 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
8217 if (TREE_CODE (scalar_dest
) != ARRAY_REF
8218 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
8219 && TREE_CODE (scalar_dest
) != INDIRECT_REF
8220 && TREE_CODE (scalar_dest
) != COMPONENT_REF
8221 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
8222 && TREE_CODE (scalar_dest
) != REALPART_EXPR
8223 && TREE_CODE (scalar_dest
) != MEM_REF
)
8228 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8229 if (!call
|| !gimple_call_internal_p (call
))
8232 internal_fn ifn
= gimple_call_internal_fn (call
);
8233 if (!internal_store_fn_p (ifn
))
8236 int mask_index
= internal_fn_mask_index (ifn
);
8237 if (mask_index
>= 0 && slp_node
)
8238 mask_index
= vect_slp_child_index_for_operand
8239 (call
, mask_index
, STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8241 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
8242 &mask
, NULL
, &mask_dt
, &mask_vectype
))
8246 /* Cannot have hybrid store SLP -- that would mean storing to the
8247 same location twice. */
8248 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
8250 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
8251 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8255 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8256 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8261 /* Multiple types in SLP are handled by creating the appropriate number of
8262 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8267 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8269 gcc_assert (ncopies
>= 1);
8271 /* FORNOW. This restriction should be relaxed. */
8272 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
8274 if (dump_enabled_p ())
8275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8276 "multiple types in nested loop.\n");
8282 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
8283 &op
, &op_node
, &rhs_dt
, &rhs_vectype
, &vls_type
))
8286 elem_type
= TREE_TYPE (vectype
);
8287 vec_mode
= TYPE_MODE (vectype
);
8289 if (!STMT_VINFO_DATA_REF (stmt_info
))
8292 vect_memory_access_type memory_access_type
;
8293 enum dr_alignment_support alignment_support_scheme
;
8296 internal_fn lanes_ifn
;
8297 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, vls_type
,
8298 ncopies
, &memory_access_type
, &poffset
,
8299 &alignment_support_scheme
, &misalignment
, &gs_info
,
8305 if (memory_access_type
== VMAT_CONTIGUOUS
)
8307 if (!VECTOR_MODE_P (vec_mode
)
8308 || !can_vec_mask_load_store_p (vec_mode
,
8309 TYPE_MODE (mask_vectype
), false))
8312 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8313 && (memory_access_type
!= VMAT_GATHER_SCATTER
8314 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
8316 if (dump_enabled_p ())
8317 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8318 "unsupported access type for masked store.\n");
8321 else if (memory_access_type
== VMAT_GATHER_SCATTER
8322 && gs_info
.ifn
== IFN_LAST
8325 if (dump_enabled_p ())
8326 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8327 "unsupported masked emulated scatter.\n");
8333 /* FORNOW. In some cases can vectorize even if data-type not supported
8334 (e.g. - array initialization with 0). */
8335 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
8339 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8340 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
8341 && memory_access_type
!= VMAT_GATHER_SCATTER
8342 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
8345 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8346 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8347 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8351 first_stmt_info
= stmt_info
;
8352 first_dr_info
= dr_info
;
8353 group_size
= vec_num
= 1;
8356 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
8358 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
8359 memory_access_type
))
8363 bool costing_p
= !vec_stmt
;
8364 if (costing_p
) /* transformation not required. */
8366 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8369 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
8370 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
8371 vls_type
, group_size
,
8372 memory_access_type
, &gs_info
,
8376 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
8379 if (dump_enabled_p ())
8380 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8381 "incompatible vector types for invariants\n");
8385 if (dump_enabled_p ()
8386 && memory_access_type
!= VMAT_ELEMENTWISE
8387 && memory_access_type
!= VMAT_GATHER_SCATTER
8388 && alignment_support_scheme
!= dr_aligned
)
8389 dump_printf_loc (MSG_NOTE
, vect_location
,
8390 "Vectorizing an unaligned access.\n");
8392 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
8394 /* As function vect_transform_stmt shows, for interleaving stores
8395 the whole chain is vectorized when the last store in the chain
8396 is reached, the other stores in the group are skipped. So we
8397 want to only cost the last one here, but it's not trivial to
8398 get the last, as it's equivalent to use the first one for
8399 costing, use the first one instead. */
8402 && first_stmt_info
!= stmt_info
)
8405 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8409 ensure_base_align (dr_info
);
8411 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8413 vect_build_scatter_store_calls (vinfo
, stmt_info
, gsi
, vec_stmt
, &gs_info
,
8417 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
8419 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS
);
8423 unsigned int inside_cost
= 0, prologue_cost
= 0;
8424 if (vls_type
== VLS_STORE_INVARIANT
)
8425 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
8426 stmt_info
, 0, vect_prologue
);
8427 vect_get_store_cost (vinfo
, stmt_info
, ncopies
,
8428 alignment_support_scheme
, misalignment
,
8429 &inside_cost
, cost_vec
);
8431 if (dump_enabled_p ())
8432 dump_printf_loc (MSG_NOTE
, vect_location
,
8433 "vect_model_store_cost: inside_cost = %d, "
8434 "prologue_cost = %d .\n",
8435 inside_cost
, prologue_cost
);
8439 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
8445 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
8449 grouped_store
= false;
8450 /* VEC_NUM is the number of vect stmts to be created for this
8452 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8453 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8454 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
8455 == first_stmt_info
);
8456 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8457 op
= vect_get_store_rhs (first_stmt_info
);
8460 /* VEC_NUM is the number of vect stmts to be created for this
8462 vec_num
= group_size
;
8464 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8467 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
8469 if (!costing_p
&& dump_enabled_p ())
8470 dump_printf_loc (MSG_NOTE
, vect_location
, "transform store. ncopies = %d\n",
8473 /* Check if we need to update prologue cost for invariant,
8474 and update it accordingly if so. If it's not for
8475 interleaving store, we can just check vls_type; but if
8476 it's for interleaving store, need to check the def_type
8477 of the stored value since the current vls_type is just
8478 for first_stmt_info. */
8479 auto update_prologue_cost
= [&](unsigned *prologue_cost
, tree store_rhs
)
8481 gcc_assert (costing_p
);
8486 gcc_assert (store_rhs
);
8487 enum vect_def_type cdt
;
8488 gcc_assert (vect_is_simple_use (store_rhs
, vinfo
, &cdt
));
8489 if (cdt
!= vect_constant_def
&& cdt
!= vect_external_def
)
8492 else if (vls_type
!= VLS_STORE_INVARIANT
)
8494 *prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
, stmt_info
,
8498 if (memory_access_type
== VMAT_ELEMENTWISE
8499 || memory_access_type
== VMAT_STRIDED_SLP
)
8501 unsigned inside_cost
= 0, prologue_cost
= 0;
8502 gimple_stmt_iterator incr_gsi
;
8508 tree stride_base
, stride_step
, alias_off
;
8509 tree vec_oprnd
= NULL_TREE
;
8512 /* Checked by get_load_store_type. */
8513 unsigned int const_nunits
= nunits
.to_constant ();
8515 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
8516 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
8518 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
8520 = fold_build_pointer_plus
8521 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8522 size_binop (PLUS_EXPR
,
8523 convert_to_ptrofftype (dr_offset
),
8524 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8525 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8527 /* For a store with loop-invariant (but other than power-of-2)
8528 stride (i.e. not a grouped access) like so:
8530 for (i = 0; i < n; i += stride)
8533 we generate a new induction variable and new stores from
8534 the components of the (vectorized) rhs:
8536 for (j = 0; ; j += VF*stride)
8541 array[j + stride] = tmp2;
8545 unsigned nstores
= const_nunits
;
8547 tree ltype
= elem_type
;
8548 tree lvectype
= vectype
;
8551 if (group_size
< const_nunits
8552 && const_nunits
% group_size
== 0)
8554 nstores
= const_nunits
/ group_size
;
8556 ltype
= build_vector_type (elem_type
, group_size
);
8559 /* First check if vec_extract optab doesn't support extraction
8560 of vector elts directly. */
8561 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
8563 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
8564 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
8565 group_size
).exists (&vmode
)
8566 || (convert_optab_handler (vec_extract_optab
,
8567 TYPE_MODE (vectype
), vmode
)
8568 == CODE_FOR_nothing
))
8570 /* Try to avoid emitting an extract of vector elements
8571 by performing the extracts using an integer type of the
8572 same size, extracting from a vector of those and then
8573 re-interpreting it as the original vector type if
8576 = group_size
* GET_MODE_BITSIZE (elmode
);
8577 unsigned int lnunits
= const_nunits
/ group_size
;
8578 /* If we can't construct such a vector fall back to
8579 element extracts from the original vector type and
8580 element size stores. */
8581 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
8582 && VECTOR_MODE_P (TYPE_MODE (vectype
))
8583 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
8584 lnunits
).exists (&vmode
)
8585 && (convert_optab_handler (vec_extract_optab
,
8587 != CODE_FOR_nothing
))
8591 ltype
= build_nonstandard_integer_type (lsize
, 1);
8592 lvectype
= build_vector_type (ltype
, nstores
);
8594 /* Else fall back to vector extraction anyway.
8595 Fewer stores are more important than avoiding spilling
8596 of the vector we extract from. Compared to the
8597 construction case in vectorizable_load no store-forwarding
8598 issue exists here for reasonable archs. */
8601 else if (group_size
>= const_nunits
8602 && group_size
% const_nunits
== 0)
8604 int mis_align
= dr_misalignment (first_dr_info
, vectype
);
8605 dr_alignment_support dr_align
8606 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
,
8608 if (dr_align
== dr_aligned
8609 || dr_align
== dr_unaligned_supported
)
8612 lnel
= const_nunits
;
8615 alignment_support_scheme
= dr_align
;
8616 misalignment
= mis_align
;
8619 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
8620 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8625 ivstep
= stride_step
;
8626 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
8627 build_int_cst (TREE_TYPE (ivstep
), vf
));
8629 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8631 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8632 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8633 create_iv (stride_base
, PLUS_EXPR
, ivstep
, NULL
, loop
, &incr_gsi
,
8634 insert_after
, &offvar
, NULL
);
8635 incr
= gsi_stmt (incr_gsi
);
8637 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8640 alias_off
= build_int_cst (ref_type
, 0);
8641 stmt_vec_info next_stmt_info
= first_stmt_info
;
8642 auto_vec
<tree
> vec_oprnds (ncopies
);
8643 /* For costing some adjacent vector stores, we'd like to cost with
8644 the total number of them once instead of cost each one by one. */
8645 unsigned int n_adjacent_stores
= 0;
8646 for (g
= 0; g
< group_size
; g
++)
8648 running_off
= offvar
;
8653 tree size
= TYPE_SIZE_UNIT (ltype
);
8655 = fold_build2 (MULT_EXPR
, sizetype
, size_int (g
), size
);
8656 tree newoff
= copy_ssa_name (running_off
, NULL
);
8657 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8659 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8660 running_off
= newoff
;
8664 op
= vect_get_store_rhs (next_stmt_info
);
8666 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
, op
,
8669 update_prologue_cost (&prologue_cost
, op
);
8670 unsigned int group_el
= 0;
8671 unsigned HOST_WIDE_INT
8672 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8673 for (j
= 0; j
< ncopies
; j
++)
8677 vec_oprnd
= vec_oprnds
[j
];
8678 /* Pun the vector to extract from if necessary. */
8679 if (lvectype
!= vectype
)
8681 tree tem
= make_ssa_name (lvectype
);
8683 = build1 (VIEW_CONVERT_EXPR
, lvectype
, vec_oprnd
);
8684 gimple
*pun
= gimple_build_assign (tem
, cvt
);
8685 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
8689 for (i
= 0; i
< nstores
; i
++)
8693 /* Only need vector extracting when there are more
8697 += record_stmt_cost (cost_vec
, 1, vec_to_scalar
,
8698 stmt_info
, 0, vect_body
);
8699 /* Take a single lane vector type store as scalar
8700 store to avoid ICE like 110776. */
8701 if (VECTOR_TYPE_P (ltype
)
8702 && known_ne (TYPE_VECTOR_SUBPARTS (ltype
), 1U))
8703 n_adjacent_stores
++;
8706 += record_stmt_cost (cost_vec
, 1, scalar_store
,
8707 stmt_info
, 0, vect_body
);
8710 tree newref
, newoff
;
8711 gimple
*incr
, *assign
;
8712 tree size
= TYPE_SIZE (ltype
);
8713 /* Extract the i'th component. */
8714 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
8715 bitsize_int (i
), size
);
8716 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
8719 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
8723 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8725 newref
= build2 (MEM_REF
, ltype
,
8726 running_off
, this_off
);
8727 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
8729 /* And store it to *running_off. */
8730 assign
= gimple_build_assign (newref
, elem
);
8731 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
8735 || group_el
== group_size
)
8737 newoff
= copy_ssa_name (running_off
, NULL
);
8738 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8739 running_off
, stride_step
);
8740 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8742 running_off
= newoff
;
8745 if (g
== group_size
- 1
8748 if (j
== 0 && i
== 0)
8750 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
8754 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8755 vec_oprnds
.truncate(0);
8762 if (n_adjacent_stores
> 0)
8763 vect_get_store_cost (vinfo
, stmt_info
, n_adjacent_stores
,
8764 alignment_support_scheme
, misalignment
,
8765 &inside_cost
, cost_vec
);
8766 if (dump_enabled_p ())
8767 dump_printf_loc (MSG_NOTE
, vect_location
,
8768 "vect_model_store_cost: inside_cost = %d, "
8769 "prologue_cost = %d .\n",
8770 inside_cost
, prologue_cost
);
8776 gcc_assert (alignment_support_scheme
);
8777 vec_loop_masks
*loop_masks
8778 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8779 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8781 vec_loop_lens
*loop_lens
8782 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
8783 ? &LOOP_VINFO_LENS (loop_vinfo
)
8786 /* Shouldn't go with length-based approach if fully masked. */
8787 gcc_assert (!loop_lens
|| !loop_masks
);
8789 /* Targets with store-lane instructions must not require explicit
8790 realignment. vect_supportable_dr_alignment always returns either
8791 dr_aligned or dr_unaligned_supported for masked operations. */
8792 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8795 || alignment_support_scheme
== dr_aligned
8796 || alignment_support_scheme
== dr_unaligned_supported
);
8798 tree offset
= NULL_TREE
;
8799 if (!known_eq (poffset
, 0))
8800 offset
= size_int (poffset
);
8803 tree vec_offset
= NULL_TREE
;
8804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8806 aggr_type
= NULL_TREE
;
8809 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8811 aggr_type
= elem_type
;
8813 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, gsi
, &gs_info
,
8814 &bump
, &vec_offset
, loop_lens
);
8818 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8819 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8821 aggr_type
= vectype
;
8822 bump
= vect_get_data_ptr_increment (vinfo
, gsi
, dr_info
, aggr_type
,
8823 memory_access_type
, loop_lens
);
8826 if (mask
&& !costing_p
)
8827 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8829 /* In case the vectorization factor (VF) is bigger than the number
8830 of elements that we can fit in a vectype (nunits), we have to generate
8831 more than one vector stmt - i.e - we need to "unroll" the
8832 vector stmt by a factor VF/nunits. */
8834 /* In case of interleaving (non-unit grouped access):
8841 We create vectorized stores starting from base address (the access of the
8842 first stmt in the chain (S2 in the above example), when the last store stmt
8843 of the chain (S4) is reached:
8846 VS2: &base + vec_size*1 = vx0
8847 VS3: &base + vec_size*2 = vx1
8848 VS4: &base + vec_size*3 = vx3
8850 Then permutation statements are generated:
8852 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8853 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8856 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8857 (the order of the data-refs in the output of vect_permute_store_chain
8858 corresponds to the order of scalar stmts in the interleaving chain - see
8859 the documentation of vect_permute_store_chain()).
8861 In case of both multiple types and interleaving, above vector stores and
8862 permutation stmts are created for every copy. The result vector stmts are
8863 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8864 STMT_VINFO_RELATED_STMT for the next copies.
8867 auto_vec
<tree
> dr_chain (group_size
);
8868 auto_vec
<tree
> vec_masks
;
8869 tree vec_mask
= NULL
;
8870 auto_delete_vec
<auto_vec
<tree
>> gvec_oprnds (group_size
);
8871 for (i
= 0; i
< group_size
; i
++)
8872 gvec_oprnds
.quick_push (new auto_vec
<tree
> (ncopies
));
8874 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8876 gcc_assert (!slp
&& grouped_store
);
8877 unsigned inside_cost
= 0, prologue_cost
= 0;
8878 /* For costing some adjacent vector stores, we'd like to cost with
8879 the total number of them once instead of cost each one by one. */
8880 unsigned int n_adjacent_stores
= 0;
8881 for (j
= 0; j
< ncopies
; j
++)
8886 /* For interleaved stores we collect vectorized defs for all
8887 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8888 as an input to vect_permute_store_chain(). */
8889 stmt_vec_info next_stmt_info
= first_stmt_info
;
8890 for (i
= 0; i
< group_size
; i
++)
8892 /* Since gaps are not supported for interleaved stores,
8893 DR_GROUP_SIZE is the exact number of stmts in the
8894 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8895 op
= vect_get_store_rhs (next_stmt_info
);
8897 update_prologue_cost (&prologue_cost
, op
);
8900 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
8903 vec_oprnd
= (*gvec_oprnds
[i
])[0];
8904 dr_chain
.quick_push (vec_oprnd
);
8906 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8913 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
8916 vec_mask
= vec_masks
[0];
8919 /* We should have catched mismatched types earlier. */
8921 useless_type_conversion_p (vectype
, TREE_TYPE (vec_oprnd
)));
8923 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
,
8924 aggr_type
, NULL
, offset
, &dummy
,
8925 gsi
, &ptr_incr
, false, bump
);
8928 else if (!costing_p
)
8930 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
8931 /* DR_CHAIN is then used as an input to
8932 vect_permute_store_chain(). */
8933 for (i
= 0; i
< group_size
; i
++)
8935 vec_oprnd
= (*gvec_oprnds
[i
])[j
];
8936 dr_chain
[i
] = vec_oprnd
;
8939 vec_mask
= vec_masks
[j
];
8940 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8946 n_adjacent_stores
+= vec_num
;
8950 /* Get an array into which we can store the individual vectors. */
8951 tree vec_array
= create_vector_array (vectype
, vec_num
);
8953 /* Invalidate the current contents of VEC_ARRAY. This should
8954 become an RTL clobber too, which prevents the vector registers
8955 from being upward-exposed. */
8956 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8958 /* Store the individual vectors into the array. */
8959 for (i
= 0; i
< vec_num
; i
++)
8961 vec_oprnd
= dr_chain
[i
];
8962 write_vector_array (vinfo
, stmt_info
, gsi
, vec_oprnd
, vec_array
,
8966 tree final_mask
= NULL
;
8967 tree final_len
= NULL
;
8970 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
8971 ncopies
, vectype
, j
);
8973 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
8976 if (lanes_ifn
== IFN_MASK_LEN_STORE_LANES
)
8979 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
8980 ncopies
, vectype
, j
, 1);
8982 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
8984 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
8985 bias
= build_int_cst (intQI_type_node
, biasval
);
8988 mask_vectype
= truth_type_for (vectype
);
8989 final_mask
= build_minus_one_cst (mask_vectype
);
8994 if (final_len
&& final_mask
)
8997 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8998 LEN, BIAS, VEC_ARRAY). */
8999 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
9000 tree alias_ptr
= build_int_cst (ref_type
, align
);
9001 call
= gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES
, 6,
9002 dataref_ptr
, alias_ptr
,
9003 final_mask
, final_len
, bias
,
9006 else if (final_mask
)
9009 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
9011 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
9012 tree alias_ptr
= build_int_cst (ref_type
, align
);
9013 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
9014 dataref_ptr
, alias_ptr
,
9015 final_mask
, vec_array
);
9020 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
9021 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9022 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
9023 gimple_call_set_lhs (call
, data_ref
);
9025 gimple_call_set_nothrow (call
, true);
9026 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9029 /* Record that VEC_ARRAY is now dead. */
9030 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
9032 *vec_stmt
= new_stmt
;
9033 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9038 if (n_adjacent_stores
> 0)
9039 vect_get_store_cost (vinfo
, stmt_info
, n_adjacent_stores
,
9040 alignment_support_scheme
, misalignment
,
9041 &inside_cost
, cost_vec
);
9042 if (dump_enabled_p ())
9043 dump_printf_loc (MSG_NOTE
, vect_location
,
9044 "vect_model_store_cost: inside_cost = %d, "
9045 "prologue_cost = %d .\n",
9046 inside_cost
, prologue_cost
);
9052 if (memory_access_type
== VMAT_GATHER_SCATTER
)
9054 gcc_assert (!slp
&& !grouped_store
);
9055 auto_vec
<tree
> vec_offsets
;
9056 unsigned int inside_cost
= 0, prologue_cost
= 0;
9057 for (j
= 0; j
< ncopies
; j
++)
9062 if (costing_p
&& vls_type
== VLS_STORE_INVARIANT
)
9063 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
9064 stmt_info
, 0, vect_prologue
);
9065 else if (!costing_p
)
9067 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
9068 DR_CHAIN is of size 1. */
9069 gcc_assert (group_size
== 1);
9070 op
= vect_get_store_rhs (first_stmt_info
);
9071 vect_get_vec_defs_for_operand (vinfo
, first_stmt_info
,
9072 ncopies
, op
, gvec_oprnds
[0]);
9073 vec_oprnd
= (*gvec_oprnds
[0])[0];
9074 dr_chain
.quick_push (vec_oprnd
);
9077 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
9080 vec_mask
= vec_masks
[0];
9083 /* We should have catched mismatched types earlier. */
9085 useless_type_conversion_p (vectype
, TREE_TYPE (vec_oprnd
)));
9086 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9087 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
9089 &dataref_ptr
, &vec_offsets
);
9092 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
,
9093 aggr_type
, NULL
, offset
,
9094 &dummy
, gsi
, &ptr_incr
, false,
9098 else if (!costing_p
)
9100 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
9101 vec_oprnd
= (*gvec_oprnds
[0])[j
];
9102 dr_chain
[0] = vec_oprnd
;
9104 vec_mask
= vec_masks
[j
];
9105 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9106 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9107 gsi
, stmt_info
, bump
);
9111 unsigned HOST_WIDE_INT align
;
9112 tree final_mask
= NULL_TREE
;
9113 tree final_len
= NULL_TREE
;
9114 tree bias
= NULL_TREE
;
9118 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
9119 ncopies
, vectype
, j
);
9121 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
9122 final_mask
, vec_mask
, gsi
);
9125 if (gs_info
.ifn
!= IFN_LAST
)
9129 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
9131 += record_stmt_cost (cost_vec
, cnunits
, scalar_store
,
9132 stmt_info
, 0, vect_body
);
9136 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9137 vec_offset
= vec_offsets
[j
];
9138 tree scale
= size_int (gs_info
.scale
);
9140 if (gs_info
.ifn
== IFN_MASK_LEN_SCATTER_STORE
)
9143 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
9144 ncopies
, vectype
, j
, 1);
9146 final_len
= build_int_cst (sizetype
,
9147 TYPE_VECTOR_SUBPARTS (vectype
));
9149 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
9150 bias
= build_int_cst (intQI_type_node
, biasval
);
9153 mask_vectype
= truth_type_for (vectype
);
9154 final_mask
= build_minus_one_cst (mask_vectype
);
9159 if (final_len
&& final_mask
)
9160 call
= gimple_build_call_internal (IFN_MASK_LEN_SCATTER_STORE
,
9161 7, dataref_ptr
, vec_offset
,
9162 scale
, vec_oprnd
, final_mask
,
9164 else if (final_mask
)
9166 = gimple_build_call_internal (IFN_MASK_SCATTER_STORE
, 5,
9167 dataref_ptr
, vec_offset
, scale
,
9168 vec_oprnd
, final_mask
);
9170 call
= gimple_build_call_internal (IFN_SCATTER_STORE
, 4,
9171 dataref_ptr
, vec_offset
,
9173 gimple_call_set_nothrow (call
, true);
9174 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9179 /* Emulated scatter. */
9180 gcc_assert (!final_mask
);
9183 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
9184 /* For emulated scatter N offset vector element extracts
9185 (we assume the scalar scaling and ptr + offset add is
9186 consumed by the load). */
9188 += record_stmt_cost (cost_vec
, cnunits
, vec_to_scalar
,
9189 stmt_info
, 0, vect_body
);
9190 /* N scalar stores plus extracting the elements. */
9192 += record_stmt_cost (cost_vec
, cnunits
, vec_to_scalar
,
9193 stmt_info
, 0, vect_body
);
9195 += record_stmt_cost (cost_vec
, cnunits
, scalar_store
,
9196 stmt_info
, 0, vect_body
);
9200 unsigned HOST_WIDE_INT const_nunits
= nunits
.to_constant ();
9201 unsigned HOST_WIDE_INT const_offset_nunits
9202 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
).to_constant ();
9203 vec
<constructor_elt
, va_gc
> *ctor_elts
;
9204 vec_alloc (ctor_elts
, const_nunits
);
9205 gimple_seq stmts
= NULL
;
9206 tree elt_type
= TREE_TYPE (vectype
);
9207 unsigned HOST_WIDE_INT elt_size
9208 = tree_to_uhwi (TYPE_SIZE (elt_type
));
9209 /* We support offset vectors with more elements
9210 than the data vector for now. */
9211 unsigned HOST_WIDE_INT factor
9212 = const_offset_nunits
/ const_nunits
;
9213 vec_offset
= vec_offsets
[j
/ factor
];
9214 unsigned elt_offset
= (j
% factor
) * const_nunits
;
9215 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
9216 tree scale
= size_int (gs_info
.scale
);
9217 align
= get_object_alignment (DR_REF (first_dr_info
->dr
));
9218 tree ltype
= build_aligned_type (TREE_TYPE (vectype
), align
);
9219 for (unsigned k
= 0; k
< const_nunits
; ++k
)
9221 /* Compute the offsetted pointer. */
9222 tree boff
= size_binop (MULT_EXPR
, TYPE_SIZE (idx_type
),
9223 bitsize_int (k
+ elt_offset
));
9225 = gimple_build (&stmts
, BIT_FIELD_REF
, idx_type
, vec_offset
,
9226 TYPE_SIZE (idx_type
), boff
);
9227 idx
= gimple_convert (&stmts
, sizetype
, idx
);
9228 idx
= gimple_build (&stmts
, MULT_EXPR
, sizetype
, idx
, scale
);
9230 = gimple_build (&stmts
, PLUS_EXPR
, TREE_TYPE (dataref_ptr
),
9232 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
9233 /* Extract the element to be stored. */
9235 = gimple_build (&stmts
, BIT_FIELD_REF
, TREE_TYPE (vectype
),
9236 vec_oprnd
, TYPE_SIZE (elt_type
),
9237 bitsize_int (k
* elt_size
));
9238 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
9241 = build2 (MEM_REF
, ltype
, ptr
, build_int_cst (ref_type
, 0));
9242 new_stmt
= gimple_build_assign (ref
, elt
);
9243 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9247 *vec_stmt
= new_stmt
;
9248 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9251 if (costing_p
&& dump_enabled_p ())
9252 dump_printf_loc (MSG_NOTE
, vect_location
,
9253 "vect_model_store_cost: inside_cost = %d, "
9254 "prologue_cost = %d .\n",
9255 inside_cost
, prologue_cost
);
9260 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS
9261 || memory_access_type
== VMAT_CONTIGUOUS_DOWN
9262 || memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
9263 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
);
9265 unsigned inside_cost
= 0, prologue_cost
= 0;
9266 /* For costing some adjacent vector stores, we'd like to cost with
9267 the total number of them once instead of cost each one by one. */
9268 unsigned int n_adjacent_stores
= 0;
9269 auto_vec
<tree
> result_chain (group_size
);
9270 auto_vec
<tree
, 1> vec_oprnds
;
9271 for (j
= 0; j
< ncopies
; j
++)
9276 if (slp
&& !costing_p
)
9278 /* Get vectorized arguments for SLP_NODE. */
9279 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1, op
,
9280 &vec_oprnds
, mask
, &vec_masks
);
9281 vec_oprnd
= vec_oprnds
[0];
9283 vec_mask
= vec_masks
[0];
9287 /* For interleaved stores we collect vectorized defs for all the
9288 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9289 input to vect_permute_store_chain().
9291 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9293 stmt_vec_info next_stmt_info
= first_stmt_info
;
9294 for (i
= 0; i
< group_size
; i
++)
9296 /* Since gaps are not supported for interleaved stores,
9297 DR_GROUP_SIZE is the exact number of stmts in the chain.
9298 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9299 that there is no interleaving, DR_GROUP_SIZE is 1,
9300 and only one iteration of the loop will be executed. */
9301 op
= vect_get_store_rhs (next_stmt_info
);
9303 update_prologue_cost (&prologue_cost
, op
);
9306 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
9309 vec_oprnd
= (*gvec_oprnds
[i
])[0];
9310 dr_chain
.quick_push (vec_oprnd
);
9312 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9314 if (mask
&& !costing_p
)
9316 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
9319 vec_mask
= vec_masks
[0];
9323 /* We should have catched mismatched types earlier. */
9324 gcc_assert (costing_p
9325 || useless_type_conversion_p (vectype
,
9326 TREE_TYPE (vec_oprnd
)));
9327 bool simd_lane_access_p
9328 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9330 && simd_lane_access_p
9332 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9333 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9334 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9335 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9336 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9337 get_alias_set (TREE_TYPE (ref_type
))))
9339 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9340 dataref_offset
= build_int_cst (ref_type
, 0);
9342 else if (!costing_p
)
9344 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9345 simd_lane_access_p
? loop
: NULL
,
9346 offset
, &dummy
, gsi
, &ptr_incr
,
9347 simd_lane_access_p
, bump
);
9349 else if (!costing_p
)
9351 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
9352 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9353 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9355 for (i
= 0; i
< group_size
; i
++)
9357 vec_oprnd
= (*gvec_oprnds
[i
])[j
];
9358 dr_chain
[i
] = vec_oprnd
;
9361 vec_mask
= vec_masks
[j
];
9363 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
9365 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9373 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
);
9376 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
9377 int nstmts
= ceil_log2 (group_size
) * group_size
;
9378 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
9379 stmt_info
, 0, vect_body
);
9380 if (dump_enabled_p ())
9381 dump_printf_loc (MSG_NOTE
, vect_location
,
9382 "vect_model_store_cost: "
9383 "strided group_size = %d .\n",
9387 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
9388 gsi
, &result_chain
);
9391 stmt_vec_info next_stmt_info
= first_stmt_info
;
9392 for (i
= 0; i
< vec_num
; i
++)
9397 vec_oprnd
= vec_oprnds
[i
];
9398 else if (grouped_store
)
9399 /* For grouped stores vectorized defs are interleaved in
9400 vect_permute_store_chain(). */
9401 vec_oprnd
= result_chain
[i
];
9404 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9407 inside_cost
+= record_stmt_cost (cost_vec
, 1, vec_perm
,
9408 stmt_info
, 0, vect_body
);
9411 tree perm_mask
= perm_mask_for_reverse (vectype
);
9412 tree perm_dest
= vect_create_destination_var (
9413 vect_get_store_rhs (stmt_info
), vectype
);
9414 tree new_temp
= make_ssa_name (perm_dest
);
9416 /* Generate the permute statement. */
9418 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
9419 vec_oprnd
, perm_mask
);
9420 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
,
9423 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9424 vec_oprnd
= new_temp
;
9430 n_adjacent_stores
++;
9434 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9435 if (!next_stmt_info
)
9442 tree final_mask
= NULL_TREE
;
9443 tree final_len
= NULL_TREE
;
9444 tree bias
= NULL_TREE
;
9446 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
9447 vec_num
* ncopies
, vectype
,
9449 if (slp
&& vec_mask
)
9450 vec_mask
= vec_masks
[i
];
9452 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
9456 /* Bump the vector pointer. */
9457 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9461 unsigned HOST_WIDE_INT align
;
9462 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9463 if (alignment_support_scheme
== dr_aligned
)
9465 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
9467 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
9471 misalign
= misalignment
;
9472 if (dataref_offset
== NULL_TREE
9473 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9474 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
9476 align
= least_bit_hwi (misalign
| align
);
9478 /* Compute IFN when LOOP_LENS or final_mask valid. */
9479 machine_mode vmode
= TYPE_MODE (vectype
);
9480 machine_mode new_vmode
= vmode
;
9481 internal_fn partial_ifn
= IFN_LAST
;
9484 opt_machine_mode new_ovmode
9485 = get_len_load_store_mode (vmode
, false, &partial_ifn
);
9486 new_vmode
= new_ovmode
.require ();
9488 = (new_ovmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vmode
);
9489 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
9490 vec_num
* ncopies
, vectype
,
9491 vec_num
* j
+ i
, factor
);
9493 else if (final_mask
)
9495 if (!can_vec_mask_load_store_p (
9496 vmode
, TYPE_MODE (TREE_TYPE (final_mask
)), false,
9501 if (partial_ifn
== IFN_MASK_LEN_STORE
)
9505 /* Pass VF value to 'len' argument of
9506 MASK_LEN_STORE if LOOP_LENS is invalid. */
9507 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9511 /* Pass all ones value to 'mask' argument of
9512 MASK_LEN_STORE if final_mask is invalid. */
9513 mask_vectype
= truth_type_for (vectype
);
9514 final_mask
= build_minus_one_cst (mask_vectype
);
9520 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
9522 bias
= build_int_cst (intQI_type_node
, biasval
);
9525 /* Arguments are ready. Create the new vector stmt. */
9529 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
9530 /* Need conversion if it's wrapped with VnQI. */
9531 if (vmode
!= new_vmode
)
9534 = build_vector_type_for_mode (unsigned_intQI_type_node
,
9536 tree var
= vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
9537 vec_oprnd
= build1 (VIEW_CONVERT_EXPR
, new_vtype
, vec_oprnd
);
9539 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, vec_oprnd
);
9540 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9544 if (partial_ifn
== IFN_MASK_LEN_STORE
)
9545 call
= gimple_build_call_internal (IFN_MASK_LEN_STORE
, 6,
9546 dataref_ptr
, ptr
, final_mask
,
9547 final_len
, bias
, vec_oprnd
);
9549 call
= gimple_build_call_internal (IFN_LEN_STORE
, 5,
9550 dataref_ptr
, ptr
, final_len
,
9552 gimple_call_set_nothrow (call
, true);
9553 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9556 else if (final_mask
)
9558 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
9560 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
9561 ptr
, final_mask
, vec_oprnd
);
9562 gimple_call_set_nothrow (call
, true);
9563 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9569 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
9570 dataref_offset
? dataref_offset
9571 : build_int_cst (ref_type
, 0));
9572 if (alignment_support_scheme
== dr_aligned
)
9575 TREE_TYPE (data_ref
)
9576 = build_aligned_type (TREE_TYPE (data_ref
),
9577 align
* BITS_PER_UNIT
);
9578 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9579 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
9580 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9586 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9587 if (!next_stmt_info
)
9590 if (!slp
&& !costing_p
)
9593 *vec_stmt
= new_stmt
;
9594 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9600 if (n_adjacent_stores
> 0)
9601 vect_get_store_cost (vinfo
, stmt_info
, n_adjacent_stores
,
9602 alignment_support_scheme
, misalignment
,
9603 &inside_cost
, cost_vec
);
9605 /* When vectorizing a store into the function result assign
9606 a penalty if the function returns in a multi-register location.
9607 In this case we assume we'll end up with having to spill the
9608 vector result and do piecewise loads as a conservative estimate. */
9609 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
9611 && (TREE_CODE (base
) == RESULT_DECL
9612 || (DECL_P (base
) && cfun_returns (base
)))
9613 && !aggregate_value_p (base
, cfun
->decl
))
9615 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
9616 /* ??? Handle PARALLEL in some way. */
9619 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
9620 /* Assume that a single reg-reg move is possible and cheap,
9621 do not account for vector to gp register move cost. */
9626 += record_stmt_cost (cost_vec
, ncopies
, vector_store
,
9627 stmt_info
, 0, vect_epilogue
);
9630 += record_stmt_cost (cost_vec
, ncopies
* nregs
, scalar_load
,
9631 stmt_info
, 0, vect_epilogue
);
9635 if (dump_enabled_p ())
9636 dump_printf_loc (MSG_NOTE
, vect_location
,
9637 "vect_model_store_cost: inside_cost = %d, "
9638 "prologue_cost = %d .\n",
9639 inside_cost
, prologue_cost
);
9645 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9646 VECTOR_CST mask. No checks are made that the target platform supports the
9647 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9648 vect_gen_perm_mask_checked. */
9651 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
9655 poly_uint64 nunits
= sel
.length ();
9656 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
9658 mask_type
= build_vector_type (ssizetype
, nunits
);
9659 return vec_perm_indices_to_tree (mask_type
, sel
);
9662 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9663 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9666 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
9668 machine_mode vmode
= TYPE_MODE (vectype
);
9669 gcc_assert (can_vec_perm_const_p (vmode
, vmode
, sel
));
9670 return vect_gen_perm_mask_any (vectype
, sel
);
9673 /* Given a vector variable X and Y, that was generated for the scalar
9674 STMT_INFO, generate instructions to permute the vector elements of X and Y
9675 using permutation mask MASK_VEC, insert them at *GSI and return the
9676 permuted vector variable. */
9679 permute_vec_elements (vec_info
*vinfo
,
9680 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
9681 gimple_stmt_iterator
*gsi
)
9683 tree vectype
= TREE_TYPE (x
);
9684 tree perm_dest
, data_ref
;
9687 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
9688 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
9689 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9691 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
9692 data_ref
= make_ssa_name (perm_dest
);
9694 /* Generate the permute statement. */
9695 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
9696 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
9701 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9702 inserting them on the loops preheader edge. Returns true if we
9703 were successful in doing so (and thus STMT_INFO can be moved then),
9704 otherwise returns false. HOIST_P indicates if we want to hoist the
9705 definitions of all SSA uses, it would be false when we are costing. */
9708 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
, bool hoist_p
)
9714 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
9716 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
9717 if (!gimple_nop_p (def_stmt
)
9718 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
9720 /* Make sure we don't need to recurse. While we could do
9721 so in simple cases when there are more complex use webs
9722 we don't have an easy way to preserve stmt order to fulfil
9723 dependencies within them. */
9726 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
9728 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
9730 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
9731 if (!gimple_nop_p (def_stmt2
)
9732 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
9745 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
9747 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
9748 if (!gimple_nop_p (def_stmt
)
9749 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
9751 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
9752 gsi_remove (&gsi
, false);
9753 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
9760 /* vectorizable_load.
9762 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9763 that can be vectorized.
9764 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9765 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9766 Return true if STMT_INFO is vectorizable in this way. */
9769 vectorizable_load (vec_info
*vinfo
,
9770 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9771 gimple
**vec_stmt
, slp_tree slp_node
,
9772 stmt_vector_for_cost
*cost_vec
)
9775 tree vec_dest
= NULL
;
9776 tree data_ref
= NULL
;
9777 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
9778 class loop
*loop
= NULL
;
9779 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
9780 bool nested_in_vect_loop
= false;
9782 /* Avoid false positive uninitialized warning, see PR110652. */
9783 tree new_temp
= NULL_TREE
;
9786 tree dataref_ptr
= NULL_TREE
;
9787 tree dataref_offset
= NULL_TREE
;
9788 gimple
*ptr_incr
= NULL
;
9791 unsigned int group_size
;
9792 poly_uint64 group_gap_adj
;
9793 tree msq
= NULL_TREE
, lsq
;
9794 tree realignment_token
= NULL_TREE
;
9796 vec
<tree
> dr_chain
= vNULL
;
9797 bool grouped_load
= false;
9798 stmt_vec_info first_stmt_info
;
9799 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
9800 bool compute_in_loop
= false;
9801 class loop
*at_loop
;
9803 bool slp
= (slp_node
!= NULL
);
9804 bool slp_perm
= false;
9805 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
9808 gather_scatter_info gs_info
;
9810 enum vect_def_type mask_dt
= vect_unknown_def_type
;
9812 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9815 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
9819 if (!STMT_VINFO_DATA_REF (stmt_info
))
9822 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
9823 int mask_index
= -1;
9824 slp_tree slp_op
= NULL
;
9825 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
9827 scalar_dest
= gimple_assign_lhs (assign
);
9828 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
9831 tree_code code
= gimple_assign_rhs_code (assign
);
9832 if (code
!= ARRAY_REF
9833 && code
!= BIT_FIELD_REF
9834 && code
!= INDIRECT_REF
9835 && code
!= COMPONENT_REF
9836 && code
!= IMAGPART_EXPR
9837 && code
!= REALPART_EXPR
9839 && TREE_CODE_CLASS (code
) != tcc_declaration
)
9844 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
9845 if (!call
|| !gimple_call_internal_p (call
))
9848 internal_fn ifn
= gimple_call_internal_fn (call
);
9849 if (!internal_load_fn_p (ifn
))
9852 scalar_dest
= gimple_call_lhs (call
);
9856 mask_index
= internal_fn_mask_index (ifn
);
9857 if (mask_index
>= 0 && slp_node
)
9858 mask_index
= vect_slp_child_index_for_operand
9859 (call
, mask_index
, STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
9861 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
9862 &mask
, &slp_op
, &mask_dt
, &mask_vectype
))
9866 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9867 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
9871 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
9872 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
9873 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
9878 /* Multiple types in SLP are handled by creating the appropriate number of
9879 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9884 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9886 gcc_assert (ncopies
>= 1);
9888 /* FORNOW. This restriction should be relaxed. */
9889 if (nested_in_vect_loop
&& ncopies
> 1)
9891 if (dump_enabled_p ())
9892 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9893 "multiple types in nested loop.\n");
9897 /* Invalidate assumptions made by dependence analysis when vectorization
9898 on the unrolled body effectively re-orders stmts. */
9900 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
9901 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
9902 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
9904 if (dump_enabled_p ())
9905 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9906 "cannot perform implicit CSE when unrolling "
9907 "with negative dependence distance\n");
9911 elem_type
= TREE_TYPE (vectype
);
9912 mode
= TYPE_MODE (vectype
);
9914 /* FORNOW. In some cases can vectorize even if data-type not supported
9915 (e.g. - data copies). */
9916 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
9918 if (dump_enabled_p ())
9919 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9920 "Aligned load, but unsupported type.\n");
9924 /* Check if the load is a part of an interleaving chain. */
9925 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
9927 grouped_load
= true;
9929 gcc_assert (!nested_in_vect_loop
);
9930 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
9932 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9933 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9935 /* Refuse non-SLP vectorization of SLP-only groups. */
9936 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
9938 if (dump_enabled_p ())
9939 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9940 "cannot vectorize load in non-SLP mode.\n");
9944 /* Invalidate assumptions made by dependence analysis when vectorization
9945 on the unrolled body effectively re-orders stmts. */
9946 if (!PURE_SLP_STMT (stmt_info
)
9947 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
9948 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
9949 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
9951 if (dump_enabled_p ())
9952 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9953 "cannot perform implicit CSE when performing "
9954 "group loads with negative dependence distance\n");
9961 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9967 /* In BB vectorization we may not actually use a loaded vector
9968 accessing elements in excess of DR_GROUP_SIZE. */
9969 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9970 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
9971 unsigned HOST_WIDE_INT nunits
;
9972 unsigned j
, k
, maxk
= 0;
9973 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
9976 tree vectype
= SLP_TREE_VECTYPE (slp_node
);
9977 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
9978 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
9980 if (dump_enabled_p ())
9981 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9982 "BB vectorization with gaps at the end of "
9983 "a load is not supported\n");
9990 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
9993 if (dump_enabled_p ())
9994 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
9996 "unsupported load permutation\n");
10001 vect_memory_access_type memory_access_type
;
10002 enum dr_alignment_support alignment_support_scheme
;
10004 poly_int64 poffset
;
10005 internal_fn lanes_ifn
;
10006 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, VLS_LOAD
,
10007 ncopies
, &memory_access_type
, &poffset
,
10008 &alignment_support_scheme
, &misalignment
, &gs_info
,
10014 if (memory_access_type
== VMAT_CONTIGUOUS
)
10016 machine_mode vec_mode
= TYPE_MODE (vectype
);
10017 if (!VECTOR_MODE_P (vec_mode
)
10018 || !can_vec_mask_load_store_p (vec_mode
,
10019 TYPE_MODE (mask_vectype
), true))
10022 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
10023 && memory_access_type
!= VMAT_GATHER_SCATTER
)
10025 if (dump_enabled_p ())
10026 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10027 "unsupported access type for masked load.\n");
10030 else if (memory_access_type
== VMAT_GATHER_SCATTER
10031 && gs_info
.ifn
== IFN_LAST
10034 if (dump_enabled_p ())
10035 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10036 "unsupported masked emulated gather.\n");
10041 bool costing_p
= !vec_stmt
;
10043 if (costing_p
) /* transformation not required. */
10047 && !vect_maybe_update_slp_op_vectype (slp_op
,
10050 if (dump_enabled_p ())
10051 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10052 "incompatible vector types for invariants\n");
10057 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
10060 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
10061 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
10062 VLS_LOAD
, group_size
,
10063 memory_access_type
, &gs_info
,
10066 if (dump_enabled_p ()
10067 && memory_access_type
!= VMAT_ELEMENTWISE
10068 && memory_access_type
!= VMAT_GATHER_SCATTER
10069 && alignment_support_scheme
!= dr_aligned
)
10070 dump_printf_loc (MSG_NOTE
, vect_location
,
10071 "Vectorizing an unaligned access.\n");
10073 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
10074 vinfo
->any_known_not_updated_vssa
= true;
10076 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
10080 gcc_assert (memory_access_type
10081 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
10083 if (dump_enabled_p () && !costing_p
)
10084 dump_printf_loc (MSG_NOTE
, vect_location
,
10085 "transform load. ncopies = %d\n", ncopies
);
10089 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
10090 ensure_base_align (dr_info
);
10092 if (memory_access_type
== VMAT_INVARIANT
)
10094 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
10095 /* If we have versioned for aliasing or the loop doesn't
10096 have any data dependencies that would preclude this,
10097 then we are sure this is a loop invariant load and
10098 thus we can insert it on the preheader edge.
10099 TODO: hoist_defs_of_uses should ideally be computed
10100 once at analysis time, remembered and used in the
10102 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
10103 && !nested_in_vect_loop
10104 && hoist_defs_of_uses (stmt_info
, loop
, !costing_p
));
10107 enum vect_cost_model_location cost_loc
10108 = hoist_p
? vect_prologue
: vect_body
;
10109 unsigned int cost
= record_stmt_cost (cost_vec
, 1, scalar_load
,
10110 stmt_info
, 0, cost_loc
);
10111 cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
, stmt_info
, 0,
10113 unsigned int prologue_cost
= hoist_p
? cost
: 0;
10114 unsigned int inside_cost
= hoist_p
? 0 : cost
;
10115 if (dump_enabled_p ())
10116 dump_printf_loc (MSG_NOTE
, vect_location
,
10117 "vect_model_load_cost: inside_cost = %d, "
10118 "prologue_cost = %d .\n",
10119 inside_cost
, prologue_cost
);
10124 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
10125 if (dump_enabled_p ())
10126 dump_printf_loc (MSG_NOTE
, vect_location
,
10127 "hoisting out of the vectorized loop: %G",
10129 scalar_dest
= copy_ssa_name (scalar_dest
);
10130 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
10131 edge pe
= loop_preheader_edge (loop
);
10132 gphi
*vphi
= get_virtual_phi (loop
->header
);
10135 vuse
= PHI_ARG_DEF_FROM_EDGE (vphi
, pe
);
10137 vuse
= gimple_vuse (gsi_stmt (*gsi
));
10138 gimple
*new_stmt
= gimple_build_assign (scalar_dest
, rhs
);
10139 gimple_set_vuse (new_stmt
, vuse
);
10140 gsi_insert_on_edge_immediate (pe
, new_stmt
);
10142 /* These copies are all equivalent. */
10144 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
10148 gimple_stmt_iterator gsi2
= *gsi
;
10150 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
10153 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
10155 for (j
= 0; j
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
); ++j
)
10156 slp_node
->push_vec_def (new_stmt
);
10159 for (j
= 0; j
< ncopies
; ++j
)
10160 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10161 *vec_stmt
= new_stmt
;
10166 if (memory_access_type
== VMAT_ELEMENTWISE
10167 || memory_access_type
== VMAT_STRIDED_SLP
)
10169 gimple_stmt_iterator incr_gsi
;
10174 vec
<constructor_elt
, va_gc
> *v
= NULL
;
10175 tree stride_base
, stride_step
, alias_off
;
10176 /* Checked by get_load_store_type. */
10177 unsigned int const_nunits
= nunits
.to_constant ();
10178 unsigned HOST_WIDE_INT cst_offset
= 0;
10180 unsigned int inside_cost
= 0;
10182 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
));
10183 gcc_assert (!nested_in_vect_loop
);
10187 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
10188 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
10192 first_stmt_info
= stmt_info
;
10193 first_dr_info
= dr_info
;
10196 if (slp
&& grouped_load
)
10198 group_size
= DR_GROUP_SIZE (first_stmt_info
);
10199 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
10205 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
10206 * vect_get_place_in_interleaving_chain (stmt_info
,
10209 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
10214 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
10215 stride_base
= fold_build_pointer_plus (
10216 DR_BASE_ADDRESS (first_dr_info
->dr
),
10217 size_binop (PLUS_EXPR
, convert_to_ptrofftype (dr_offset
),
10218 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
10219 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
10221 /* For a load with loop-invariant (but other than power-of-2)
10222 stride (i.e. not a grouped access) like so:
10224 for (i = 0; i < n; i += stride)
10227 we generate a new induction variable and new accesses to
10228 form a new vector (or vectors, depending on ncopies):
10230 for (j = 0; ; j += VF*stride)
10232 tmp2 = array[j + stride];
10234 vectemp = {tmp1, tmp2, ...}
10237 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
10238 build_int_cst (TREE_TYPE (stride_step
), vf
));
10240 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
10242 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
10243 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
10244 create_iv (stride_base
, PLUS_EXPR
, ivstep
, NULL
,
10245 loop
, &incr_gsi
, insert_after
,
10248 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
10251 running_off
= offvar
;
10252 alias_off
= build_int_cst (ref_type
, 0);
10253 int nloads
= const_nunits
;
10255 tree ltype
= TREE_TYPE (vectype
);
10256 tree lvectype
= vectype
;
10257 auto_vec
<tree
> dr_chain
;
10258 if (memory_access_type
== VMAT_STRIDED_SLP
)
10260 if (group_size
< const_nunits
)
10262 /* First check if vec_init optab supports construction from vector
10263 elts directly. Otherwise avoid emitting a constructor of
10264 vector elements by performing the loads using an integer type
10265 of the same size, constructing a vector of those and then
10266 re-interpreting it as the original vector type. This avoids a
10267 huge runtime penalty due to the general inability to perform
10268 store forwarding from smaller stores to a larger load. */
10271 = vector_vector_composition_type (vectype
,
10272 const_nunits
/ group_size
,
10274 if (vtype
!= NULL_TREE
)
10276 nloads
= const_nunits
/ group_size
;
10285 lnel
= const_nunits
;
10288 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
10290 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10291 else if (nloads
== 1)
10296 /* For SLP permutation support we need to load the whole group,
10297 not only the number of vector stmts the permutation result
10301 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10303 unsigned int const_vf
= vf
.to_constant ();
10304 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
10305 dr_chain
.create (ncopies
);
10308 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10310 unsigned int group_el
= 0;
10311 unsigned HOST_WIDE_INT
10312 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
10313 unsigned int n_groups
= 0;
10314 /* For costing some adjacent vector loads, we'd like to cost with
10315 the total number of them once instead of cost each one by one. */
10316 unsigned int n_adjacent_loads
= 0;
10317 for (j
= 0; j
< ncopies
; j
++)
10319 if (nloads
> 1 && !costing_p
)
10320 vec_alloc (v
, nloads
);
10321 gimple
*new_stmt
= NULL
;
10322 for (i
= 0; i
< nloads
; i
++)
10326 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10327 avoid ICE, see PR110776. */
10328 if (VECTOR_TYPE_P (ltype
)
10329 && memory_access_type
!= VMAT_ELEMENTWISE
)
10330 n_adjacent_loads
++;
10332 inside_cost
+= record_stmt_cost (cost_vec
, 1, scalar_load
,
10333 stmt_info
, 0, vect_body
);
10336 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
10337 group_el
* elsz
+ cst_offset
);
10338 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
10339 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
10340 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
10341 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10343 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
10344 gimple_assign_lhs (new_stmt
));
10348 || group_el
== group_size
)
10351 /* When doing SLP make sure to not load elements from
10352 the next vector iteration, those will not be accessed
10353 so just use the last element again. See PR107451. */
10354 if (!slp
|| known_lt (n_groups
, vf
))
10356 tree newoff
= copy_ssa_name (running_off
);
10358 = gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
10359 running_off
, stride_step
);
10360 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
10361 running_off
= newoff
;
10370 inside_cost
+= record_stmt_cost (cost_vec
, 1, vec_construct
,
10371 stmt_info
, 0, vect_body
);
10374 tree vec_inv
= build_constructor (lvectype
, v
);
10375 new_temp
= vect_init_vector (vinfo
, stmt_info
, vec_inv
,
10377 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
10378 if (lvectype
!= vectype
)
10381 = gimple_build_assign (make_ssa_name (vectype
),
10383 build1 (VIEW_CONVERT_EXPR
,
10384 vectype
, new_temp
));
10385 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
10396 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
10398 slp_node
->push_vec_def (new_stmt
);
10403 *vec_stmt
= new_stmt
;
10404 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10414 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
, vf
,
10415 true, &n_perms
, &n_loads
);
10416 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
10417 first_stmt_info
, 0, vect_body
);
10420 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
10426 if (n_adjacent_loads
> 0)
10427 vect_get_load_cost (vinfo
, stmt_info
, n_adjacent_loads
,
10428 alignment_support_scheme
, misalignment
, false,
10429 &inside_cost
, nullptr, cost_vec
, cost_vec
,
10431 if (dump_enabled_p ())
10432 dump_printf_loc (MSG_NOTE
, vect_location
,
10433 "vect_model_load_cost: inside_cost = %u, "
10434 "prologue_cost = 0 .\n",
10441 if (memory_access_type
== VMAT_GATHER_SCATTER
10442 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
10443 grouped_load
= false;
10446 || (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()))
10450 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
10451 group_size
= DR_GROUP_SIZE (first_stmt_info
);
10455 first_stmt_info
= stmt_info
;
10458 /* For SLP vectorization we directly vectorize a subchain
10459 without permutation. */
10460 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
10461 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
10462 /* For BB vectorization always use the first stmt to base
10463 the data ref pointer on. */
10465 first_stmt_info_for_drptr
10466 = vect_find_first_scalar_stmt_in_slp (slp_node
);
10468 /* Check if the chain of loads is already vectorized. */
10469 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
10470 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10471 ??? But we can only do so if there is exactly one
10472 as we have no way to get at the rest. Leave the CSE
10474 ??? With the group load eventually participating
10475 in multiple different permutations (having multiple
10476 slp nodes which refer to the same group) the CSE
10477 is even wrong code. See PR56270. */
10480 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10483 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
10486 /* VEC_NUM is the number of vect stmts to be created for this group. */
10489 grouped_load
= false;
10490 /* If an SLP permutation is from N elements to N elements,
10491 and if one vector holds a whole number of N, we can load
10492 the inputs to the permutation in the same way as an
10493 unpermuted sequence. In other cases we need to load the
10494 whole group, not only the number of vector stmts the
10495 permutation result fits in. */
10496 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
10498 && (group_size
!= scalar_lanes
10499 || !multiple_p (nunits
, group_size
)))
10501 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10502 variable VF; see vect_transform_slp_perm_load. */
10503 unsigned int const_vf
= vf
.to_constant ();
10504 unsigned int const_nunits
= nunits
.to_constant ();
10505 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
10506 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
10510 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10512 = group_size
- scalar_lanes
;
10516 vec_num
= group_size
;
10518 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
10522 first_stmt_info
= stmt_info
;
10523 first_dr_info
= dr_info
;
10524 group_size
= vec_num
= 1;
10526 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
10528 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10531 gcc_assert (alignment_support_scheme
);
10532 vec_loop_masks
*loop_masks
10533 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
10534 ? &LOOP_VINFO_MASKS (loop_vinfo
)
10536 vec_loop_lens
*loop_lens
10537 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
10538 ? &LOOP_VINFO_LENS (loop_vinfo
)
10541 /* Shouldn't go with length-based approach if fully masked. */
10542 gcc_assert (!loop_lens
|| !loop_masks
);
10544 /* Targets with store-lane instructions must not require explicit
10545 realignment. vect_supportable_dr_alignment always returns either
10546 dr_aligned or dr_unaligned_supported for masked operations. */
10547 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
10550 || alignment_support_scheme
== dr_aligned
10551 || alignment_support_scheme
== dr_unaligned_supported
);
10553 /* In case the vectorization factor (VF) is bigger than the number
10554 of elements that we can fit in a vectype (nunits), we have to generate
10555 more than one vector stmt - i.e - we need to "unroll" the
10556 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10557 from one copy of the vector stmt to the next, in the field
10558 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10559 stages to find the correct vector defs to be used when vectorizing
10560 stmts that use the defs of the current stmt. The example below
10561 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10562 need to create 4 vectorized stmts):
10564 before vectorization:
10565 RELATED_STMT VEC_STMT
10569 step 1: vectorize stmt S1:
10570 We first create the vector stmt VS1_0, and, as usual, record a
10571 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10572 Next, we create the vector stmt VS1_1, and record a pointer to
10573 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10574 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10575 stmts and pointers:
10576 RELATED_STMT VEC_STMT
10577 VS1_0: vx0 = memref0 VS1_1 -
10578 VS1_1: vx1 = memref1 VS1_2 -
10579 VS1_2: vx2 = memref2 VS1_3 -
10580 VS1_3: vx3 = memref3 - -
10581 S1: x = load - VS1_0
10585 /* In case of interleaving (non-unit grouped access):
10592 Vectorized loads are created in the order of memory accesses
10593 starting from the access of the first stmt of the chain:
10596 VS2: vx1 = &base + vec_size*1
10597 VS3: vx3 = &base + vec_size*2
10598 VS4: vx4 = &base + vec_size*3
10600 Then permutation statements are generated:
10602 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10603 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10606 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10607 (the order of the data-refs in the output of vect_permute_load_chain
10608 corresponds to the order of scalar stmts in the interleaving chain - see
10609 the documentation of vect_permute_load_chain()).
10610 The generation of permutation stmts and recording them in
10611 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10613 In case of both multiple types and interleaving, the vector loads and
10614 permutation stmts above are created for every copy. The result vector
10615 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10616 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10618 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10619 on a target that supports unaligned accesses (dr_unaligned_supported)
10620 we generate the following code:
10624 p = p + indx * vectype_size;
10629 Otherwise, the data reference is potentially unaligned on a target that
10630 does not support unaligned accesses (dr_explicit_realign_optimized) -
10631 then generate the following code, in which the data in each iteration is
10632 obtained by two vector loads, one from the previous iteration, and one
10633 from the current iteration:
10635 msq_init = *(floor(p1))
10636 p2 = initial_addr + VS - 1;
10637 realignment_token = call target_builtin;
10640 p2 = p2 + indx * vectype_size
10642 vec_dest = realign_load (msq, lsq, realignment_token)
10647 /* If the misalignment remains the same throughout the execution of the
10648 loop, we can create the init_addr and permutation mask at the loop
10649 preheader. Otherwise, it needs to be created inside the loop.
10650 This can only occur when vectorizing memory accesses in the inner-loop
10651 nested within an outer-loop that is being vectorized. */
10653 if (nested_in_vect_loop
10654 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
10655 GET_MODE_SIZE (TYPE_MODE (vectype
))))
10657 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
10658 compute_in_loop
= true;
10661 bool diff_first_stmt_info
10662 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
10664 tree offset
= NULL_TREE
;
10665 if ((alignment_support_scheme
== dr_explicit_realign_optimized
10666 || alignment_support_scheme
== dr_explicit_realign
)
10667 && !compute_in_loop
)
10669 /* If we have different first_stmt_info, we can't set up realignment
10670 here, since we can't guarantee first_stmt_info DR has been
10671 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10672 distance from first_stmt_info DR instead as below. */
10675 if (!diff_first_stmt_info
)
10676 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
10677 &realignment_token
,
10678 alignment_support_scheme
, NULL_TREE
,
10680 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
10682 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
10683 offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
10685 gcc_assert (!first_stmt_info_for_drptr
);
10692 if (!known_eq (poffset
, 0))
10694 ? size_binop (PLUS_EXPR
, offset
, size_int (poffset
))
10695 : size_int (poffset
));
10698 tree vec_offset
= NULL_TREE
;
10699 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10701 aggr_type
= NULL_TREE
;
10704 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
10706 aggr_type
= elem_type
;
10708 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, gsi
, &gs_info
,
10709 &bump
, &vec_offset
, loop_lens
);
10713 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
10714 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
10716 aggr_type
= vectype
;
10717 bump
= vect_get_data_ptr_increment (vinfo
, gsi
, dr_info
, aggr_type
,
10718 memory_access_type
, loop_lens
);
10721 auto_vec
<tree
> vec_offsets
;
10722 auto_vec
<tree
> vec_masks
;
10723 if (mask
&& !costing_p
)
10726 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[mask_index
],
10729 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
, mask
,
10730 &vec_masks
, mask_vectype
);
10733 tree vec_mask
= NULL_TREE
;
10734 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
10736 gcc_assert (alignment_support_scheme
== dr_aligned
10737 || alignment_support_scheme
== dr_unaligned_supported
);
10738 gcc_assert (grouped_load
&& !slp
);
10740 unsigned int inside_cost
= 0, prologue_cost
= 0;
10741 /* For costing some adjacent vector loads, we'd like to cost with
10742 the total number of them once instead of cost each one by one. */
10743 unsigned int n_adjacent_loads
= 0;
10744 for (j
= 0; j
< ncopies
; j
++)
10748 /* An IFN_LOAD_LANES will load all its vector results,
10749 regardless of which ones we actually need. Account
10750 for the cost of unused results. */
10751 if (first_stmt_info
== stmt_info
)
10753 unsigned int gaps
= DR_GROUP_SIZE (first_stmt_info
);
10754 stmt_vec_info next_stmt_info
= first_stmt_info
;
10758 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
10760 while (next_stmt_info
);
10763 if (dump_enabled_p ())
10764 dump_printf_loc (MSG_NOTE
, vect_location
,
10765 "vect_model_load_cost: %d "
10766 "unused vectors.\n",
10768 vect_get_load_cost (vinfo
, stmt_info
, gaps
,
10769 alignment_support_scheme
,
10770 misalignment
, false, &inside_cost
,
10771 &prologue_cost
, cost_vec
, cost_vec
,
10775 n_adjacent_loads
++;
10779 /* 1. Create the vector or array pointer update chain. */
10782 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
10783 at_loop
, offset
, &dummy
, gsi
,
10784 &ptr_incr
, false, bump
);
10787 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
10788 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
10792 vec_mask
= vec_masks
[j
];
10794 tree vec_array
= create_vector_array (vectype
, vec_num
);
10796 tree final_mask
= NULL_TREE
;
10797 tree final_len
= NULL_TREE
;
10798 tree bias
= NULL_TREE
;
10800 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
10801 ncopies
, vectype
, j
);
10803 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
10806 if (lanes_ifn
== IFN_MASK_LEN_LOAD_LANES
)
10809 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
10810 ncopies
, vectype
, j
, 1);
10812 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
10813 signed char biasval
10814 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
10815 bias
= build_int_cst (intQI_type_node
, biasval
);
10818 mask_vectype
= truth_type_for (vectype
);
10819 final_mask
= build_minus_one_cst (mask_vectype
);
10824 if (final_len
&& final_mask
)
10827 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10828 VEC_MASK, LEN, BIAS). */
10829 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
10830 tree alias_ptr
= build_int_cst (ref_type
, align
);
10831 call
= gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES
, 5,
10832 dataref_ptr
, alias_ptr
,
10833 final_mask
, final_len
, bias
);
10835 else if (final_mask
)
10838 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10840 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
10841 tree alias_ptr
= build_int_cst (ref_type
, align
);
10842 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
10843 dataref_ptr
, alias_ptr
,
10849 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10850 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
10851 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
10853 gimple_call_set_lhs (call
, vec_array
);
10854 gimple_call_set_nothrow (call
, true);
10855 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
10857 dr_chain
.create (vec_num
);
10858 /* Extract each vector into an SSA_NAME. */
10859 for (i
= 0; i
< vec_num
; i
++)
10861 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
10863 dr_chain
.quick_push (new_temp
);
10866 /* Record the mapping between SSA_NAMEs and statements. */
10867 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
10869 /* Record that VEC_ARRAY is now dead. */
10870 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
10872 dr_chain
.release ();
10874 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10879 if (n_adjacent_loads
> 0)
10880 vect_get_load_cost (vinfo
, stmt_info
, n_adjacent_loads
,
10881 alignment_support_scheme
, misalignment
, false,
10882 &inside_cost
, &prologue_cost
, cost_vec
,
10884 if (dump_enabled_p ())
10885 dump_printf_loc (MSG_NOTE
, vect_location
,
10886 "vect_model_load_cost: inside_cost = %u, "
10887 "prologue_cost = %u .\n",
10888 inside_cost
, prologue_cost
);
10894 if (memory_access_type
== VMAT_GATHER_SCATTER
)
10896 gcc_assert (alignment_support_scheme
== dr_aligned
10897 || alignment_support_scheme
== dr_unaligned_supported
);
10898 gcc_assert (!grouped_load
&& !slp_perm
);
10900 unsigned int inside_cost
= 0, prologue_cost
= 0;
10901 for (j
= 0; j
< ncopies
; j
++)
10903 /* 1. Create the vector or array pointer update chain. */
10904 if (j
== 0 && !costing_p
)
10906 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10907 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
10908 slp_node
, &gs_info
, &dataref_ptr
,
10912 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
10913 at_loop
, offset
, &dummy
, gsi
,
10914 &ptr_incr
, false, bump
);
10916 else if (!costing_p
)
10918 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
10919 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10920 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10921 gsi
, stmt_info
, bump
);
10924 gimple
*new_stmt
= NULL
;
10925 for (i
= 0; i
< vec_num
; i
++)
10927 tree final_mask
= NULL_TREE
;
10928 tree final_len
= NULL_TREE
;
10929 tree bias
= NULL_TREE
;
10933 vec_mask
= vec_masks
[vec_num
* j
+ i
];
10936 = vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
10937 vec_num
* ncopies
, vectype
,
10940 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
10941 final_mask
, vec_mask
, gsi
);
10943 if (i
> 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10944 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10945 gsi
, stmt_info
, bump
);
10948 /* 2. Create the vector-load in the loop. */
10949 unsigned HOST_WIDE_INT align
;
10950 if (gs_info
.ifn
!= IFN_LAST
)
10954 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
10956 = record_stmt_cost (cost_vec
, cnunits
, scalar_load
,
10957 stmt_info
, 0, vect_body
);
10960 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10961 vec_offset
= vec_offsets
[vec_num
* j
+ i
];
10962 tree zero
= build_zero_cst (vectype
);
10963 tree scale
= size_int (gs_info
.scale
);
10965 if (gs_info
.ifn
== IFN_MASK_LEN_GATHER_LOAD
)
10969 = vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
10970 vec_num
* ncopies
, vectype
,
10971 vec_num
* j
+ i
, 1);
10974 = build_int_cst (sizetype
,
10975 TYPE_VECTOR_SUBPARTS (vectype
));
10976 signed char biasval
10977 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
10978 bias
= build_int_cst (intQI_type_node
, biasval
);
10981 mask_vectype
= truth_type_for (vectype
);
10982 final_mask
= build_minus_one_cst (mask_vectype
);
10987 if (final_len
&& final_mask
)
10989 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD
, 7,
10990 dataref_ptr
, vec_offset
,
10991 scale
, zero
, final_mask
,
10993 else if (final_mask
)
10994 call
= gimple_build_call_internal (IFN_MASK_GATHER_LOAD
, 5,
10995 dataref_ptr
, vec_offset
,
10996 scale
, zero
, final_mask
);
10998 call
= gimple_build_call_internal (IFN_GATHER_LOAD
, 4,
10999 dataref_ptr
, vec_offset
,
11001 gimple_call_set_nothrow (call
, true);
11003 data_ref
= NULL_TREE
;
11005 else if (gs_info
.decl
)
11007 /* The builtin decls path for gather is legacy, x86 only. */
11008 gcc_assert (!final_len
&& nunits
.is_constant ());
11011 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
11013 = record_stmt_cost (cost_vec
, cnunits
, scalar_load
,
11014 stmt_info
, 0, vect_body
);
11017 poly_uint64 offset_nunits
11018 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
11019 if (known_eq (nunits
, offset_nunits
))
11021 new_stmt
= vect_build_one_gather_load_call
11022 (vinfo
, stmt_info
, gsi
, &gs_info
,
11023 dataref_ptr
, vec_offsets
[vec_num
* j
+ i
],
11025 data_ref
= NULL_TREE
;
11027 else if (known_eq (nunits
, offset_nunits
* 2))
11029 /* We have a offset vector with half the number of
11030 lanes but the builtins will produce full vectype
11031 data with just the lower lanes filled. */
11032 new_stmt
= vect_build_one_gather_load_call
11033 (vinfo
, stmt_info
, gsi
, &gs_info
,
11034 dataref_ptr
, vec_offsets
[2 * vec_num
* j
+ 2 * i
],
11036 tree low
= make_ssa_name (vectype
);
11037 gimple_set_lhs (new_stmt
, low
);
11038 vect_finish_stmt_generation (vinfo
, stmt_info
,
11041 /* now put upper half of final_mask in final_mask low. */
11043 && !SCALAR_INT_MODE_P
11044 (TYPE_MODE (TREE_TYPE (final_mask
))))
11046 int count
= nunits
.to_constant ();
11047 vec_perm_builder
sel (count
, count
, 1);
11048 sel
.quick_grow (count
);
11049 for (int i
= 0; i
< count
; ++i
)
11050 sel
[i
] = i
| (count
/ 2);
11051 vec_perm_indices
indices (sel
, 2, count
);
11052 tree perm_mask
= vect_gen_perm_mask_checked
11053 (TREE_TYPE (final_mask
), indices
);
11054 new_stmt
= gimple_build_assign (NULL_TREE
,
11059 final_mask
= make_ssa_name (TREE_TYPE (final_mask
));
11060 gimple_set_lhs (new_stmt
, final_mask
);
11061 vect_finish_stmt_generation (vinfo
, stmt_info
,
11064 else if (final_mask
)
11066 new_stmt
= gimple_build_assign (NULL_TREE
,
11067 VEC_UNPACK_HI_EXPR
,
11069 final_mask
= make_ssa_name
11070 (truth_type_for (gs_info
.offset_vectype
));
11071 gimple_set_lhs (new_stmt
, final_mask
);
11072 vect_finish_stmt_generation (vinfo
, stmt_info
,
11076 new_stmt
= vect_build_one_gather_load_call
11077 (vinfo
, stmt_info
, gsi
, &gs_info
,
11079 vec_offsets
[2 * vec_num
* j
+ 2 * i
+ 1],
11081 tree high
= make_ssa_name (vectype
);
11082 gimple_set_lhs (new_stmt
, high
);
11083 vect_finish_stmt_generation (vinfo
, stmt_info
,
11086 /* compose low + high. */
11087 int count
= nunits
.to_constant ();
11088 vec_perm_builder
sel (count
, count
, 1);
11089 sel
.quick_grow (count
);
11090 for (int i
= 0; i
< count
; ++i
)
11091 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
11092 vec_perm_indices
indices (sel
, 2, count
);
11094 = vect_gen_perm_mask_checked (vectype
, indices
);
11095 new_stmt
= gimple_build_assign (NULL_TREE
,
11097 low
, high
, perm_mask
);
11098 data_ref
= NULL_TREE
;
11100 else if (known_eq (nunits
* 2, offset_nunits
))
11102 /* We have a offset vector with double the number of
11103 lanes. Select the low/high part accordingly. */
11104 vec_offset
= vec_offsets
[(vec_num
* j
+ i
) / 2];
11105 if ((vec_num
* j
+ i
) & 1)
11107 int count
= offset_nunits
.to_constant ();
11108 vec_perm_builder
sel (count
, count
, 1);
11109 sel
.quick_grow (count
);
11110 for (int i
= 0; i
< count
; ++i
)
11111 sel
[i
] = i
| (count
/ 2);
11112 vec_perm_indices
indices (sel
, 2, count
);
11113 tree perm_mask
= vect_gen_perm_mask_checked
11114 (TREE_TYPE (vec_offset
), indices
);
11115 new_stmt
= gimple_build_assign (NULL_TREE
,
11120 vec_offset
= make_ssa_name (TREE_TYPE (vec_offset
));
11121 gimple_set_lhs (new_stmt
, vec_offset
);
11122 vect_finish_stmt_generation (vinfo
, stmt_info
,
11125 new_stmt
= vect_build_one_gather_load_call
11126 (vinfo
, stmt_info
, gsi
, &gs_info
,
11127 dataref_ptr
, vec_offset
, final_mask
);
11128 data_ref
= NULL_TREE
;
11131 gcc_unreachable ();
11135 /* Emulated gather-scatter. */
11136 gcc_assert (!final_mask
);
11137 unsigned HOST_WIDE_INT const_nunits
= nunits
.to_constant ();
11140 /* For emulated gathers N offset vector element
11141 offset add is consumed by the load). */
11142 inside_cost
= record_stmt_cost (cost_vec
, const_nunits
,
11143 vec_to_scalar
, stmt_info
,
11145 /* N scalar loads plus gathering them into a
11148 = record_stmt_cost (cost_vec
, const_nunits
, scalar_load
,
11149 stmt_info
, 0, vect_body
);
11151 = record_stmt_cost (cost_vec
, 1, vec_construct
,
11152 stmt_info
, 0, vect_body
);
11155 unsigned HOST_WIDE_INT const_offset_nunits
11156 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
)
11158 vec
<constructor_elt
, va_gc
> *ctor_elts
;
11159 vec_alloc (ctor_elts
, const_nunits
);
11160 gimple_seq stmts
= NULL
;
11161 /* We support offset vectors with more elements
11162 than the data vector for now. */
11163 unsigned HOST_WIDE_INT factor
11164 = const_offset_nunits
/ const_nunits
;
11165 vec_offset
= vec_offsets
[(vec_num
* j
+ i
) / factor
];
11166 unsigned elt_offset
= (j
% factor
) * const_nunits
;
11167 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
11168 tree scale
= size_int (gs_info
.scale
);
11169 align
= get_object_alignment (DR_REF (first_dr_info
->dr
));
11170 tree ltype
= build_aligned_type (TREE_TYPE (vectype
), align
);
11171 for (unsigned k
= 0; k
< const_nunits
; ++k
)
11173 tree boff
= size_binop (MULT_EXPR
, TYPE_SIZE (idx_type
),
11174 bitsize_int (k
+ elt_offset
));
11176 = gimple_build (&stmts
, BIT_FIELD_REF
, idx_type
,
11177 vec_offset
, TYPE_SIZE (idx_type
), boff
);
11178 idx
= gimple_convert (&stmts
, sizetype
, idx
);
11179 idx
= gimple_build (&stmts
, MULT_EXPR
, sizetype
, idx
,
11181 tree ptr
= gimple_build (&stmts
, PLUS_EXPR
,
11182 TREE_TYPE (dataref_ptr
),
11184 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
11185 tree elt
= make_ssa_name (TREE_TYPE (vectype
));
11186 tree ref
= build2 (MEM_REF
, ltype
, ptr
,
11187 build_int_cst (ref_type
, 0));
11188 new_stmt
= gimple_build_assign (elt
, ref
);
11189 gimple_set_vuse (new_stmt
, gimple_vuse (gsi_stmt (*gsi
)));
11190 gimple_seq_add_stmt (&stmts
, new_stmt
);
11191 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
, elt
);
11193 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
11194 new_stmt
= gimple_build_assign (
11195 NULL_TREE
, build_constructor (vectype
, ctor_elts
));
11196 data_ref
= NULL_TREE
;
11199 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11200 /* DATA_REF is null if we've already built the statement. */
11203 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
11204 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
11206 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11207 gimple_set_lhs (new_stmt
, new_temp
);
11208 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11210 /* Store vector loads in the corresponding SLP_NODE. */
11212 slp_node
->push_vec_def (new_stmt
);
11215 if (!slp
&& !costing_p
)
11216 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
11219 if (!slp
&& !costing_p
)
11220 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11222 if (costing_p
&& dump_enabled_p ())
11223 dump_printf_loc (MSG_NOTE
, vect_location
,
11224 "vect_model_load_cost: inside_cost = %u, "
11225 "prologue_cost = %u .\n",
11226 inside_cost
, prologue_cost
);
11230 poly_uint64 group_elt
= 0;
11231 unsigned int inside_cost
= 0, prologue_cost
= 0;
11232 /* For costing some adjacent vector loads, we'd like to cost with
11233 the total number of them once instead of cost each one by one. */
11234 unsigned int n_adjacent_loads
= 0;
11235 for (j
= 0; j
< ncopies
; j
++)
11237 /* 1. Create the vector or array pointer update chain. */
11238 if (j
== 0 && !costing_p
)
11240 bool simd_lane_access_p
11241 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
11242 if (simd_lane_access_p
11243 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
11244 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
11245 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
11246 && integer_zerop (DR_INIT (first_dr_info
->dr
))
11247 && alias_sets_conflict_p (get_alias_set (aggr_type
),
11248 get_alias_set (TREE_TYPE (ref_type
)))
11249 && (alignment_support_scheme
== dr_aligned
11250 || alignment_support_scheme
== dr_unaligned_supported
))
11252 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
11253 dataref_offset
= build_int_cst (ref_type
, 0);
11255 else if (diff_first_stmt_info
)
11258 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
11259 aggr_type
, at_loop
, offset
, &dummy
,
11260 gsi
, &ptr_incr
, simd_lane_access_p
,
11262 /* Adjust the pointer by the difference to first_stmt. */
11263 data_reference_p ptrdr
11264 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
11266 = fold_convert (sizetype
,
11267 size_binop (MINUS_EXPR
,
11268 DR_INIT (first_dr_info
->dr
),
11270 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11272 if (alignment_support_scheme
== dr_explicit_realign
)
11274 msq
= vect_setup_realignment (vinfo
,
11275 first_stmt_info_for_drptr
, gsi
,
11276 &realignment_token
,
11277 alignment_support_scheme
,
11278 dataref_ptr
, &at_loop
);
11279 gcc_assert (!compute_in_loop
);
11284 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
11286 offset
, &dummy
, gsi
, &ptr_incr
,
11287 simd_lane_access_p
, bump
);
11289 else if (!costing_p
)
11291 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
11292 if (dataref_offset
)
11293 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
11296 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11300 if (grouped_load
|| slp_perm
)
11301 dr_chain
.create (vec_num
);
11303 gimple
*new_stmt
= NULL
;
11304 for (i
= 0; i
< vec_num
; i
++)
11306 tree final_mask
= NULL_TREE
;
11307 tree final_len
= NULL_TREE
;
11308 tree bias
= NULL_TREE
;
11312 vec_mask
= vec_masks
[vec_num
* j
+ i
];
11314 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
11315 vec_num
* ncopies
, vectype
,
11318 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
11319 final_mask
, vec_mask
, gsi
);
11322 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
11323 gsi
, stmt_info
, bump
);
11326 /* 2. Create the vector-load in the loop. */
11327 switch (alignment_support_scheme
)
11330 case dr_unaligned_supported
:
11335 unsigned int misalign
;
11336 unsigned HOST_WIDE_INT align
;
11337 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
11338 if (alignment_support_scheme
== dr_aligned
)
11340 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
11343 = dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
11347 misalign
= misalignment
;
11348 if (dataref_offset
== NULL_TREE
11349 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
11350 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
11352 align
= least_bit_hwi (misalign
| align
);
11354 /* Compute IFN when LOOP_LENS or final_mask valid. */
11355 machine_mode vmode
= TYPE_MODE (vectype
);
11356 machine_mode new_vmode
= vmode
;
11357 internal_fn partial_ifn
= IFN_LAST
;
11360 opt_machine_mode new_ovmode
11361 = get_len_load_store_mode (vmode
, true, &partial_ifn
);
11362 new_vmode
= new_ovmode
.require ();
11364 = (new_ovmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vmode
);
11365 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
11366 vec_num
* ncopies
, vectype
,
11367 vec_num
* j
+ i
, factor
);
11369 else if (final_mask
)
11371 if (!can_vec_mask_load_store_p (
11372 vmode
, TYPE_MODE (TREE_TYPE (final_mask
)), true,
11374 gcc_unreachable ();
11377 if (partial_ifn
== IFN_MASK_LEN_LOAD
)
11381 /* Pass VF value to 'len' argument of
11382 MASK_LEN_LOAD if LOOP_LENS is invalid. */
11383 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
11387 /* Pass all ones value to 'mask' argument of
11388 MASK_LEN_LOAD if final_mask is invalid. */
11389 mask_vectype
= truth_type_for (vectype
);
11390 final_mask
= build_minus_one_cst (mask_vectype
);
11395 signed char biasval
11396 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
11398 bias
= build_int_cst (intQI_type_node
, biasval
);
11403 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
11405 if (partial_ifn
== IFN_MASK_LEN_LOAD
)
11406 call
= gimple_build_call_internal (IFN_MASK_LEN_LOAD
, 5,
11408 final_mask
, final_len
,
11411 call
= gimple_build_call_internal (IFN_LEN_LOAD
, 4,
11414 gimple_call_set_nothrow (call
, true);
11416 data_ref
= NULL_TREE
;
11418 /* Need conversion if it's wrapped with VnQI. */
11419 if (vmode
!= new_vmode
)
11421 tree new_vtype
= build_vector_type_for_mode (
11422 unsigned_intQI_type_node
, new_vmode
);
11424 = vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
11425 gimple_set_lhs (call
, var
);
11426 vect_finish_stmt_generation (vinfo
, stmt_info
, call
,
11428 tree op
= build1 (VIEW_CONVERT_EXPR
, vectype
, var
);
11429 new_stmt
= gimple_build_assign (vec_dest
,
11430 VIEW_CONVERT_EXPR
, op
);
11433 else if (final_mask
)
11435 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
11436 gcall
*call
= gimple_build_call_internal (IFN_MASK_LOAD
, 3,
11439 gimple_call_set_nothrow (call
, true);
11441 data_ref
= NULL_TREE
;
11445 tree ltype
= vectype
;
11446 tree new_vtype
= NULL_TREE
;
11447 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
11448 unsigned int vect_align
11449 = vect_known_alignment_in_bytes (first_dr_info
, vectype
);
11450 unsigned int scalar_dr_size
11451 = vect_get_scalar_dr_size (first_dr_info
);
11452 /* If there's no peeling for gaps but we have a gap
11453 with slp loads then load the lower half of the
11454 vector only. See get_group_load_store_type for
11455 when we apply this optimization. */
11458 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) && gap
!= 0
11459 && known_eq (nunits
, (group_size
- gap
) * 2)
11460 && known_eq (nunits
, group_size
)
11461 && gap
>= (vect_align
/ scalar_dr_size
))
11465 = vector_vector_composition_type (vectype
, 2,
11467 if (new_vtype
!= NULL_TREE
)
11468 ltype
= half_vtype
;
11471 = (dataref_offset
? dataref_offset
11472 : build_int_cst (ref_type
, 0));
11473 if (ltype
!= vectype
11474 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
11476 unsigned HOST_WIDE_INT gap_offset
11477 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
11478 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
11479 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
11482 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
11483 if (alignment_support_scheme
== dr_aligned
)
11486 TREE_TYPE (data_ref
)
11487 = build_aligned_type (TREE_TYPE (data_ref
),
11488 align
* BITS_PER_UNIT
);
11489 if (ltype
!= vectype
)
11491 vect_copy_ref_info (data_ref
,
11492 DR_REF (first_dr_info
->dr
));
11493 tree tem
= make_ssa_name (ltype
);
11494 new_stmt
= gimple_build_assign (tem
, data_ref
);
11495 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
11498 vec
<constructor_elt
, va_gc
> *v
;
11500 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
11502 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
11503 build_zero_cst (ltype
));
11504 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
11508 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
11509 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
11510 build_zero_cst (ltype
));
11512 gcc_assert (new_vtype
!= NULL_TREE
);
11513 if (new_vtype
== vectype
)
11514 new_stmt
= gimple_build_assign (
11515 vec_dest
, build_constructor (vectype
, v
));
11518 tree new_vname
= make_ssa_name (new_vtype
);
11519 new_stmt
= gimple_build_assign (
11520 new_vname
, build_constructor (new_vtype
, v
));
11521 vect_finish_stmt_generation (vinfo
, stmt_info
,
11523 new_stmt
= gimple_build_assign (
11525 build1 (VIEW_CONVERT_EXPR
, vectype
, new_vname
));
11531 case dr_explicit_realign
:
11537 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
11539 if (compute_in_loop
)
11540 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
11541 &realignment_token
,
11542 dr_explicit_realign
,
11543 dataref_ptr
, NULL
);
11545 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
11546 ptr
= copy_ssa_name (dataref_ptr
);
11548 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
11549 // For explicit realign the target alignment should be
11550 // known at compile time.
11551 unsigned HOST_WIDE_INT align
11552 = DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
11553 new_stmt
= gimple_build_assign (
11554 ptr
, BIT_AND_EXPR
, dataref_ptr
,
11555 build_int_cst (TREE_TYPE (dataref_ptr
),
11556 -(HOST_WIDE_INT
) align
));
11557 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11559 = build2 (MEM_REF
, vectype
, ptr
, build_int_cst (ref_type
, 0));
11560 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
11561 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11562 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
11563 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11564 gimple_assign_set_lhs (new_stmt
, new_temp
);
11565 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
11566 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11569 bump
= size_binop (MULT_EXPR
, vs
, TYPE_SIZE_UNIT (elem_type
));
11570 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
11571 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
, stmt_info
,
11573 new_stmt
= gimple_build_assign (
11574 NULL_TREE
, BIT_AND_EXPR
, ptr
,
11575 build_int_cst (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
11576 if (TREE_CODE (ptr
) == SSA_NAME
)
11577 ptr
= copy_ssa_name (ptr
, new_stmt
);
11579 ptr
= make_ssa_name (TREE_TYPE (ptr
), new_stmt
);
11580 gimple_assign_set_lhs (new_stmt
, ptr
);
11581 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11583 = build2 (MEM_REF
, vectype
, ptr
, build_int_cst (ref_type
, 0));
11586 case dr_explicit_realign_optimized
:
11590 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
11591 new_temp
= copy_ssa_name (dataref_ptr
);
11593 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
11594 // We should only be doing this if we know the target
11595 // alignment at compile time.
11596 unsigned HOST_WIDE_INT align
11597 = DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
11598 new_stmt
= gimple_build_assign (
11599 new_temp
, BIT_AND_EXPR
, dataref_ptr
,
11600 build_int_cst (TREE_TYPE (dataref_ptr
),
11601 -(HOST_WIDE_INT
) align
));
11602 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11603 data_ref
= build2 (MEM_REF
, vectype
, new_temp
,
11604 build_int_cst (ref_type
, 0));
11608 gcc_unreachable ();
11611 /* One common place to cost the above vect load for different
11612 alignment support schemes. */
11615 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11616 only need to take care of the first stmt, whose
11617 stmt_info is first_stmt_info, vec_num iterating on it
11618 will cover the cost for the remaining, it's consistent
11619 with transforming. For the prologue cost for realign,
11620 we only need to count it once for the whole group. */
11621 bool first_stmt_info_p
= first_stmt_info
== stmt_info
;
11622 bool add_realign_cost
= first_stmt_info_p
&& i
== 0;
11623 if (memory_access_type
== VMAT_CONTIGUOUS
11624 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
11625 || (memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
11626 && (!grouped_load
|| first_stmt_info_p
)))
11628 /* Leave realign cases alone to keep them simple. */
11629 if (alignment_support_scheme
== dr_explicit_realign_optimized
11630 || alignment_support_scheme
== dr_explicit_realign
)
11631 vect_get_load_cost (vinfo
, stmt_info
, 1,
11632 alignment_support_scheme
, misalignment
,
11633 add_realign_cost
, &inside_cost
,
11634 &prologue_cost
, cost_vec
, cost_vec
,
11637 n_adjacent_loads
++;
11642 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11643 /* DATA_REF is null if we've already built the statement. */
11646 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
11647 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
11649 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11650 gimple_set_lhs (new_stmt
, new_temp
);
11651 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11654 /* 3. Handle explicit realignment if necessary/supported.
11656 vec_dest = realign_load (msq, lsq, realignment_token) */
11658 && (alignment_support_scheme
== dr_explicit_realign_optimized
11659 || alignment_support_scheme
== dr_explicit_realign
))
11661 lsq
= gimple_assign_lhs (new_stmt
);
11662 if (!realignment_token
)
11663 realignment_token
= dataref_ptr
;
11664 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11665 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
, msq
,
11666 lsq
, realignment_token
);
11667 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11668 gimple_assign_set_lhs (new_stmt
, new_temp
);
11669 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11671 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
11674 if (i
== vec_num
- 1 && j
== ncopies
- 1)
11675 add_phi_arg (phi
, lsq
, loop_latch_edge (containing_loop
),
11681 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
11684 inside_cost
= record_stmt_cost (cost_vec
, 1, vec_perm
,
11685 stmt_info
, 0, vect_body
);
11688 tree perm_mask
= perm_mask_for_reverse (vectype
);
11689 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
11690 perm_mask
, stmt_info
, gsi
);
11691 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
11695 /* Collect vector loads and later create their permutation in
11696 vect_transform_grouped_load (). */
11697 if (!costing_p
&& (grouped_load
|| slp_perm
))
11698 dr_chain
.quick_push (new_temp
);
11700 /* Store vector loads in the corresponding SLP_NODE. */
11701 if (!costing_p
&& slp
&& !slp_perm
)
11702 slp_node
->push_vec_def (new_stmt
);
11704 /* With SLP permutation we load the gaps as well, without
11705 we need to skip the gaps after we manage to fully load
11706 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11707 group_elt
+= nunits
;
11709 && maybe_ne (group_gap_adj
, 0U)
11711 && known_eq (group_elt
, group_size
- group_gap_adj
))
11713 poly_wide_int bump_val
11714 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
)) * group_gap_adj
);
11715 if (tree_int_cst_sgn (vect_dr_behavior (vinfo
, dr_info
)->step
)
11717 bump_val
= -bump_val
;
11718 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
11719 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11724 /* Bump the vector pointer to account for a gap or for excess
11725 elements loaded for a permuted SLP load. */
11727 && maybe_ne (group_gap_adj
, 0U)
11730 poly_wide_int bump_val
11731 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
)) * group_gap_adj
);
11732 if (tree_int_cst_sgn (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
11733 bump_val
= -bump_val
;
11734 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
11735 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11739 if (slp
&& !slp_perm
)
11745 /* For SLP we know we've seen all possible uses of dr_chain so
11746 direct vect_transform_slp_perm_load to DCE the unused parts.
11747 ??? This is a hack to prevent compile-time issues as seen
11748 in PR101120 and friends. */
11751 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, nullptr, vf
,
11752 true, &n_perms
, nullptr);
11753 inside_cost
= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
11754 stmt_info
, 0, vect_body
);
11758 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
11759 gsi
, vf
, false, &n_perms
,
11768 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
);
11769 /* We assume that the cost of a single load-lanes instruction
11770 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11771 If a grouped access is instead being provided by a
11772 load-and-permute operation, include the cost of the
11774 if (costing_p
&& first_stmt_info
== stmt_info
)
11776 /* Uses an even and odd extract operations or shuffle
11777 operations for each needed permute. */
11778 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
11779 int nstmts
= ceil_log2 (group_size
) * group_size
;
11780 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
11781 stmt_info
, 0, vect_body
);
11783 if (dump_enabled_p ())
11784 dump_printf_loc (MSG_NOTE
, vect_location
,
11785 "vect_model_load_cost:"
11786 "strided group_size = %d .\n",
11789 else if (!costing_p
)
11791 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
11793 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11796 else if (!costing_p
)
11797 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
11799 dr_chain
.release ();
11801 if (!slp
&& !costing_p
)
11802 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11806 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS
11807 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
11808 || memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
);
11809 if (n_adjacent_loads
> 0)
11810 vect_get_load_cost (vinfo
, stmt_info
, n_adjacent_loads
,
11811 alignment_support_scheme
, misalignment
, false,
11812 &inside_cost
, &prologue_cost
, cost_vec
, cost_vec
,
11814 if (dump_enabled_p ())
11815 dump_printf_loc (MSG_NOTE
, vect_location
,
11816 "vect_model_load_cost: inside_cost = %u, "
11817 "prologue_cost = %u .\n",
11818 inside_cost
, prologue_cost
);
11824 /* Function vect_is_simple_cond.
11827 LOOP - the loop that is being vectorized.
11828 COND - Condition that is checked for simple use.
11831 *COMP_VECTYPE - the vector type for the comparison.
11832 *DTS - The def types for the arguments of the comparison
11834 Returns whether a COND can be vectorized. Checks whether
11835 condition operands are supportable using vec_is_simple_use. */
11838 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
11839 slp_tree slp_node
, tree
*comp_vectype
,
11840 enum vect_def_type
*dts
, tree vectype
)
11843 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
11847 if (TREE_CODE (cond
) == SSA_NAME
11848 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
11850 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
11851 &slp_op
, &dts
[0], comp_vectype
)
11853 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
11858 if (!COMPARISON_CLASS_P (cond
))
11861 lhs
= TREE_OPERAND (cond
, 0);
11862 rhs
= TREE_OPERAND (cond
, 1);
11864 if (TREE_CODE (lhs
) == SSA_NAME
)
11866 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
11867 &lhs
, &slp_op
, &dts
[0], &vectype1
))
11870 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
11871 || TREE_CODE (lhs
) == FIXED_CST
)
11872 dts
[0] = vect_constant_def
;
11876 if (TREE_CODE (rhs
) == SSA_NAME
)
11878 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
11879 &rhs
, &slp_op
, &dts
[1], &vectype2
))
11882 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
11883 || TREE_CODE (rhs
) == FIXED_CST
)
11884 dts
[1] = vect_constant_def
;
11888 if (vectype1
&& vectype2
11889 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
11890 TYPE_VECTOR_SUBPARTS (vectype2
)))
11893 *comp_vectype
= vectype1
? vectype1
: vectype2
;
11894 /* Invariant comparison. */
11895 if (! *comp_vectype
)
11897 tree scalar_type
= TREE_TYPE (lhs
);
11898 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11899 *comp_vectype
= truth_type_for (vectype
);
11902 /* If we can widen the comparison to match vectype do so. */
11903 if (INTEGRAL_TYPE_P (scalar_type
)
11905 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
11906 TYPE_SIZE (TREE_TYPE (vectype
))))
11907 scalar_type
= build_nonstandard_integer_type
11908 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
11909 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
11917 /* vectorizable_condition.
11919 Check if STMT_INFO is conditional modify expression that can be vectorized.
11920 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11921 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
11924 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
11926 Return true if STMT_INFO is vectorizable in this way. */
11929 vectorizable_condition (vec_info
*vinfo
,
11930 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11932 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
11934 tree scalar_dest
= NULL_TREE
;
11935 tree vec_dest
= NULL_TREE
;
11936 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
11937 tree then_clause
, else_clause
;
11938 tree comp_vectype
= NULL_TREE
;
11939 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
11940 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
11943 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
11944 enum vect_def_type dts
[4]
11945 = {vect_unknown_def_type
, vect_unknown_def_type
,
11946 vect_unknown_def_type
, vect_unknown_def_type
};
11950 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
11952 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
11953 vec
<tree
> vec_oprnds0
= vNULL
;
11954 vec
<tree
> vec_oprnds1
= vNULL
;
11955 vec
<tree
> vec_oprnds2
= vNULL
;
11956 vec
<tree
> vec_oprnds3
= vNULL
;
11958 bool masked
= false;
11960 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
11963 /* Is vectorizable conditional operation? */
11964 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
11968 code
= gimple_assign_rhs_code (stmt
);
11969 if (code
!= COND_EXPR
)
11972 stmt_vec_info reduc_info
= NULL
;
11973 int reduc_index
= -1;
11974 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
11976 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
11981 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
11982 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
11983 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
11984 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
11985 || reduc_index
!= -1);
11989 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
11993 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11994 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
11999 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
12003 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
12007 gcc_assert (ncopies
>= 1);
12008 if (for_reduction
&& ncopies
> 1)
12009 return false; /* FORNOW */
12011 cond_expr
= gimple_assign_rhs1 (stmt
);
12013 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
12014 &comp_vectype
, &dts
[0], vectype
)
12018 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
12019 slp_tree then_slp_node
, else_slp_node
;
12020 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
12021 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
12023 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
12024 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
12027 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
12030 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
12033 masked
= !COMPARISON_CLASS_P (cond_expr
);
12034 vec_cmp_type
= truth_type_for (comp_vectype
);
12036 if (vec_cmp_type
== NULL_TREE
)
12039 cond_code
= TREE_CODE (cond_expr
);
12042 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
12043 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
12046 /* For conditional reductions, the "then" value needs to be the candidate
12047 value calculated by this iteration while the "else" value needs to be
12048 the result carried over from previous iterations. If the COND_EXPR
12049 is the other way around, we need to swap it. */
12050 bool must_invert_cmp_result
= false;
12051 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
12054 must_invert_cmp_result
= true;
12057 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
12058 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
12059 if (new_code
== ERROR_MARK
)
12060 must_invert_cmp_result
= true;
12063 cond_code
= new_code
;
12064 /* Make sure we don't accidentally use the old condition. */
12065 cond_expr
= NULL_TREE
;
12068 std::swap (then_clause
, else_clause
);
12071 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
12073 /* Boolean values may have another representation in vectors
12074 and therefore we prefer bit operations over comparison for
12075 them (which also works for scalar masks). We store opcodes
12076 to use in bitop1 and bitop2. Statement is vectorized as
12077 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12078 depending on bitop1 and bitop2 arity. */
12082 bitop1
= BIT_NOT_EXPR
;
12083 bitop2
= BIT_AND_EXPR
;
12086 bitop1
= BIT_NOT_EXPR
;
12087 bitop2
= BIT_IOR_EXPR
;
12090 bitop1
= BIT_NOT_EXPR
;
12091 bitop2
= BIT_AND_EXPR
;
12092 std::swap (cond_expr0
, cond_expr1
);
12095 bitop1
= BIT_NOT_EXPR
;
12096 bitop2
= BIT_IOR_EXPR
;
12097 std::swap (cond_expr0
, cond_expr1
);
12100 bitop1
= BIT_XOR_EXPR
;
12103 bitop1
= BIT_XOR_EXPR
;
12104 bitop2
= BIT_NOT_EXPR
;
12109 cond_code
= SSA_NAME
;
12112 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
12113 && reduction_type
== EXTRACT_LAST_REDUCTION
12114 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
12116 if (dump_enabled_p ())
12117 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12118 "reduction comparison operation not supported.\n");
12124 if (bitop1
!= NOP_EXPR
)
12126 machine_mode mode
= TYPE_MODE (comp_vectype
);
12129 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
12130 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12133 if (bitop2
!= NOP_EXPR
)
12135 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
12137 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12142 vect_cost_for_stmt kind
= vector_stmt
;
12143 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12144 /* Count one reduction-like operation per vector. */
12145 kind
= vec_to_scalar
;
12146 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
)
12148 || (!expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
,
12150 || !expand_vec_cond_expr_p (vectype
, vec_cmp_type
,
12155 && (!vect_maybe_update_slp_op_vectype
12156 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
12158 && !vect_maybe_update_slp_op_vectype
12159 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
12160 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
12161 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
12163 if (dump_enabled_p ())
12164 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12165 "incompatible vector types for invariants\n");
12169 if (loop_vinfo
&& for_reduction
12170 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
12172 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12174 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST
,
12175 vectype
, OPTIMIZE_FOR_SPEED
))
12176 vect_record_loop_len (loop_vinfo
,
12177 &LOOP_VINFO_LENS (loop_vinfo
),
12178 ncopies
* vec_num
, vectype
, 1);
12180 vect_record_loop_mask (loop_vinfo
,
12181 &LOOP_VINFO_MASKS (loop_vinfo
),
12182 ncopies
* vec_num
, vectype
, NULL
);
12184 /* Extra inactive lanes should be safe for vect_nested_cycle. */
12185 else if (STMT_VINFO_DEF_TYPE (reduc_info
) != vect_nested_cycle
)
12187 if (dump_enabled_p ())
12188 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12189 "conditional reduction prevents the use"
12190 " of partial vectors.\n");
12191 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
12195 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
12196 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
12204 scalar_dest
= gimple_assign_lhs (stmt
);
12205 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
12206 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
12208 bool swap_cond_operands
= false;
12210 /* See whether another part of the vectorized code applies a loop
12211 mask to the condition, or to its inverse. */
12213 vec_loop_masks
*masks
= NULL
;
12214 vec_loop_lens
*lens
= NULL
;
12215 if (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
))
12217 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12218 lens
= &LOOP_VINFO_LENS (loop_vinfo
);
12220 else if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
12222 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12223 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12226 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
12227 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
12228 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12231 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
12232 tree_code orig_code
= cond
.code
;
12233 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
12234 if (!masked
&& loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
12236 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12237 cond_code
= cond
.code
;
12238 swap_cond_operands
= true;
12242 /* Try the inverse of the current mask. We check if the
12243 inverse mask is live and if so we generate a negate of
12244 the current mask such that we still honor NaNs. */
12245 cond
.inverted_p
= true;
12246 cond
.code
= orig_code
;
12247 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
12249 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12250 cond_code
= cond
.code
;
12251 swap_cond_operands
= true;
12252 must_invert_cmp_result
= true;
12259 /* Handle cond expr. */
12261 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
12262 cond_expr
, &vec_oprnds0
, comp_vectype
,
12263 then_clause
, &vec_oprnds2
, vectype
,
12264 reduction_type
!= EXTRACT_LAST_REDUCTION
12265 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
12267 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
12268 cond_expr0
, &vec_oprnds0
, comp_vectype
,
12269 cond_expr1
, &vec_oprnds1
, comp_vectype
,
12270 then_clause
, &vec_oprnds2
, vectype
,
12271 reduction_type
!= EXTRACT_LAST_REDUCTION
12272 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
12274 /* Arguments are ready. Create the new vector stmt. */
12275 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
12277 vec_then_clause
= vec_oprnds2
[i
];
12278 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
12279 vec_else_clause
= vec_oprnds3
[i
];
12281 if (swap_cond_operands
)
12282 std::swap (vec_then_clause
, vec_else_clause
);
12285 vec_compare
= vec_cond_lhs
;
12288 vec_cond_rhs
= vec_oprnds1
[i
];
12289 if (bitop1
== NOP_EXPR
)
12291 gimple_seq stmts
= NULL
;
12292 vec_compare
= gimple_build (&stmts
, cond_code
, vec_cmp_type
,
12293 vec_cond_lhs
, vec_cond_rhs
);
12294 gsi_insert_before (gsi
, stmts
, GSI_SAME_STMT
);
12298 new_temp
= make_ssa_name (vec_cmp_type
);
12300 if (bitop1
== BIT_NOT_EXPR
)
12301 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
12305 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
12307 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12308 if (bitop2
== NOP_EXPR
)
12309 vec_compare
= new_temp
;
12310 else if (bitop2
== BIT_NOT_EXPR
12311 && reduction_type
!= EXTRACT_LAST_REDUCTION
)
12313 /* Instead of doing ~x ? y : z do x ? z : y. */
12314 vec_compare
= new_temp
;
12315 std::swap (vec_then_clause
, vec_else_clause
);
12319 vec_compare
= make_ssa_name (vec_cmp_type
);
12320 if (bitop2
== BIT_NOT_EXPR
)
12322 = gimple_build_assign (vec_compare
, bitop2
, new_temp
);
12325 = gimple_build_assign (vec_compare
, bitop2
,
12326 vec_cond_lhs
, new_temp
);
12327 vect_finish_stmt_generation (vinfo
, stmt_info
,
12333 /* If we decided to apply a loop mask to the result of the vector
12334 comparison, AND the comparison with the mask now. Later passes
12335 should then be able to reuse the AND results between mulitple
12339 for (int i = 0; i < 100; ++i)
12340 x[i] = y[i] ? z[i] : 10;
12342 results in following optimized GIMPLE:
12344 mask__35.8_43 = vect__4.7_41 != { 0, ... };
12345 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12346 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12347 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12348 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12349 vect_iftmp.11_47, { 10, ... }>;
12351 instead of using a masked and unmasked forms of
12352 vec != { 0, ... } (masked in the MASK_LOAD,
12353 unmasked in the VEC_COND_EXPR). */
12355 /* Force vec_compare to be an SSA_NAME rather than a comparison,
12356 in cases where that's necessary. */
12358 tree len
= NULL_TREE
, bias
= NULL_TREE
;
12359 if (masks
|| lens
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
12361 if (!is_gimple_val (vec_compare
))
12363 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
12364 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
12366 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12367 vec_compare
= vec_compare_name
;
12370 if (must_invert_cmp_result
)
12372 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
12373 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
12376 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12377 vec_compare
= vec_compare_name
;
12380 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST
,
12381 vectype
, OPTIMIZE_FOR_SPEED
))
12385 len
= vect_get_loop_len (loop_vinfo
, gsi
, lens
,
12386 vec_num
* ncopies
, vectype
, i
, 1);
12387 signed char biasval
12388 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
12389 bias
= build_int_cst (intQI_type_node
, biasval
);
12393 len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
12394 bias
= build_int_cst (intQI_type_node
, 0);
12400 = vect_get_loop_mask (loop_vinfo
, gsi
, masks
, vec_num
* ncopies
,
12402 tree tmp2
= make_ssa_name (vec_cmp_type
);
12404 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
12406 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
12407 vec_compare
= tmp2
;
12412 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12414 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
12415 tree lhs
= gimple_get_lhs (old_stmt
);
12417 new_stmt
= gimple_build_call_internal
12418 (IFN_LEN_FOLD_EXTRACT_LAST
, 5, else_clause
, vec_compare
,
12419 vec_then_clause
, len
, bias
);
12421 new_stmt
= gimple_build_call_internal
12422 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
12424 gimple_call_set_lhs (new_stmt
, lhs
);
12425 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
12426 if (old_stmt
== gsi_stmt (*gsi
))
12427 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
12430 /* In this case we're moving the definition to later in the
12431 block. That doesn't matter because the only uses of the
12432 lhs are in phi statements. */
12433 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
12434 gsi_remove (&old_gsi
, true);
12435 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12440 new_temp
= make_ssa_name (vec_dest
);
12441 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
12442 vec_then_clause
, vec_else_clause
);
12443 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12446 slp_node
->push_vec_def (new_stmt
);
12448 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
12452 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
12454 vec_oprnds0
.release ();
12455 vec_oprnds1
.release ();
12456 vec_oprnds2
.release ();
12457 vec_oprnds3
.release ();
12462 /* Helper of vectorizable_comparison.
12464 Check if STMT_INFO is comparison expression CODE that can be vectorized.
12465 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12466 comparison, put it in VEC_STMT, and insert it at GSI.
12468 Return true if STMT_INFO is vectorizable in this way. */
12471 vectorizable_comparison_1 (vec_info
*vinfo
, tree vectype
,
12472 stmt_vec_info stmt_info
, tree_code code
,
12473 gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
12474 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
12476 tree lhs
, rhs1
, rhs2
;
12477 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
12478 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
12480 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
12481 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
12483 poly_uint64 nunits
;
12485 enum tree_code bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
12487 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
12488 vec
<tree
> vec_oprnds0
= vNULL
;
12489 vec
<tree
> vec_oprnds1
= vNULL
;
12493 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
12496 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
12499 mask_type
= vectype
;
12500 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
12505 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
12507 gcc_assert (ncopies
>= 1);
12509 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
12512 slp_tree slp_rhs1
, slp_rhs2
;
12513 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
12514 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
12517 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
12518 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
12521 if (vectype1
&& vectype2
12522 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
12523 TYPE_VECTOR_SUBPARTS (vectype2
)))
12526 vectype
= vectype1
? vectype1
: vectype2
;
12528 /* Invariant comparison. */
12531 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
12532 vectype
= mask_type
;
12534 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
12536 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
12539 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
12542 /* Can't compare mask and non-mask types. */
12543 if (vectype1
&& vectype2
12544 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
12547 /* Boolean values may have another representation in vectors
12548 and therefore we prefer bit operations over comparison for
12549 them (which also works for scalar masks). We store opcodes
12550 to use in bitop1 and bitop2. Statement is vectorized as
12551 BITOP2 (rhs1 BITOP1 rhs2) or
12552 rhs1 BITOP2 (BITOP1 rhs2)
12553 depending on bitop1 and bitop2 arity. */
12554 bool swap_p
= false;
12555 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
12557 if (code
== GT_EXPR
)
12559 bitop1
= BIT_NOT_EXPR
;
12560 bitop2
= BIT_AND_EXPR
;
12562 else if (code
== GE_EXPR
)
12564 bitop1
= BIT_NOT_EXPR
;
12565 bitop2
= BIT_IOR_EXPR
;
12567 else if (code
== LT_EXPR
)
12569 bitop1
= BIT_NOT_EXPR
;
12570 bitop2
= BIT_AND_EXPR
;
12573 else if (code
== LE_EXPR
)
12575 bitop1
= BIT_NOT_EXPR
;
12576 bitop2
= BIT_IOR_EXPR
;
12581 bitop1
= BIT_XOR_EXPR
;
12582 if (code
== EQ_EXPR
)
12583 bitop2
= BIT_NOT_EXPR
;
12589 if (bitop1
== NOP_EXPR
)
12591 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
12596 machine_mode mode
= TYPE_MODE (vectype
);
12599 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
12600 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12603 if (bitop2
!= NOP_EXPR
)
12605 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
12606 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12611 /* Put types on constant and invariant SLP children. */
12613 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
12614 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
12616 if (dump_enabled_p ())
12617 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12618 "incompatible vector types for invariants\n");
12622 vect_model_simple_cost (vinfo
, stmt_info
,
12623 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
12624 dts
, ndts
, slp_node
, cost_vec
);
12631 lhs
= gimple_assign_lhs (STMT_VINFO_STMT (stmt_info
));
12632 mask
= vect_create_destination_var (lhs
, mask_type
);
12634 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
12635 rhs1
, &vec_oprnds0
, vectype
,
12636 rhs2
, &vec_oprnds1
, vectype
);
12638 std::swap (vec_oprnds0
, vec_oprnds1
);
12640 /* Arguments are ready. Create the new vector stmt. */
12641 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
12644 vec_rhs2
= vec_oprnds1
[i
];
12646 new_temp
= make_ssa_name (mask
);
12647 if (bitop1
== NOP_EXPR
)
12649 new_stmt
= gimple_build_assign (new_temp
, code
,
12650 vec_rhs1
, vec_rhs2
);
12651 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12655 if (bitop1
== BIT_NOT_EXPR
)
12656 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
12658 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
12660 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12661 if (bitop2
!= NOP_EXPR
)
12663 tree res
= make_ssa_name (mask
);
12664 if (bitop2
== BIT_NOT_EXPR
)
12665 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
12667 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
12669 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12673 slp_node
->push_vec_def (new_stmt
);
12675 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
12679 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
12681 vec_oprnds0
.release ();
12682 vec_oprnds1
.release ();
12687 /* vectorizable_comparison.
12689 Check if STMT_INFO is comparison expression that can be vectorized.
12690 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12691 comparison, put it in VEC_STMT, and insert it at GSI.
12693 Return true if STMT_INFO is vectorizable in this way. */
12696 vectorizable_comparison (vec_info
*vinfo
,
12697 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
12699 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
12701 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
12703 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
12706 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
12709 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
12713 enum tree_code code
= gimple_assign_rhs_code (stmt
);
12714 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12715 if (!vectorizable_comparison_1 (vinfo
, vectype
, stmt_info
, code
, gsi
,
12716 vec_stmt
, slp_node
, cost_vec
))
12720 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
12725 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
12726 can handle all live statements in the node. Otherwise return true
12727 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
12728 VEC_STMT_P is as for vectorizable_live_operation. */
12731 can_vectorize_live_stmts (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12732 slp_tree slp_node
, slp_instance slp_node_instance
,
12734 stmt_vector_for_cost
*cost_vec
)
12738 stmt_vec_info slp_stmt_info
;
12740 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
12742 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
12743 && !vectorizable_live_operation (vinfo
, slp_stmt_info
, slp_node
,
12744 slp_node_instance
, i
,
12745 vec_stmt_p
, cost_vec
))
12749 else if (STMT_VINFO_LIVE_P (stmt_info
)
12750 && !vectorizable_live_operation (vinfo
, stmt_info
,
12751 slp_node
, slp_node_instance
, -1,
12752 vec_stmt_p
, cost_vec
))
12758 /* Make sure the statement is vectorizable. */
12761 vect_analyze_stmt (vec_info
*vinfo
,
12762 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
12763 slp_tree node
, slp_instance node_instance
,
12764 stmt_vector_for_cost
*cost_vec
)
12766 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
12767 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
12769 gimple_seq pattern_def_seq
;
12771 if (dump_enabled_p ())
12772 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
12775 if (gimple_has_volatile_ops (stmt_info
->stmt
))
12776 return opt_result::failure_at (stmt_info
->stmt
,
12778 " stmt has volatile operands: %G\n",
12781 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
12783 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
12785 gimple_stmt_iterator si
;
12787 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
12789 stmt_vec_info pattern_def_stmt_info
12790 = vinfo
->lookup_stmt (gsi_stmt (si
));
12791 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
12792 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
12794 /* Analyze def stmt of STMT if it's a pattern stmt. */
12795 if (dump_enabled_p ())
12796 dump_printf_loc (MSG_NOTE
, vect_location
,
12797 "==> examining pattern def statement: %G",
12798 pattern_def_stmt_info
->stmt
);
12801 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
12802 need_to_vectorize
, node
, node_instance
,
12810 /* Skip stmts that do not need to be vectorized. In loops this is expected
12812 - the COND_EXPR which is the loop exit condition
12813 - any LABEL_EXPRs in the loop
12814 - computations that are used only for array indexing or loop control.
12815 In basic blocks we only analyze statements that are a part of some SLP
12816 instance, therefore, all the statements are relevant.
12818 Pattern statement needs to be analyzed instead of the original statement
12819 if the original statement is not relevant. Otherwise, we analyze both
12820 statements. In basic blocks we are called from some SLP instance
12821 traversal, don't analyze pattern stmts instead, the pattern stmts
12822 already will be part of SLP instance. */
12824 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
12825 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
12826 && !STMT_VINFO_LIVE_P (stmt_info
))
12828 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
12829 && pattern_stmt_info
12830 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
12831 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
12833 /* Analyze PATTERN_STMT instead of the original stmt. */
12834 stmt_info
= pattern_stmt_info
;
12835 if (dump_enabled_p ())
12836 dump_printf_loc (MSG_NOTE
, vect_location
,
12837 "==> examining pattern statement: %G",
12842 if (dump_enabled_p ())
12843 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
12845 return opt_result::success ();
12848 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
12850 && pattern_stmt_info
12851 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
12852 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
12854 /* Analyze PATTERN_STMT too. */
12855 if (dump_enabled_p ())
12856 dump_printf_loc (MSG_NOTE
, vect_location
,
12857 "==> examining pattern statement: %G",
12858 pattern_stmt_info
->stmt
);
12861 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
12862 node_instance
, cost_vec
);
12867 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
12869 case vect_internal_def
:
12872 case vect_reduction_def
:
12873 case vect_nested_cycle
:
12874 gcc_assert (!bb_vinfo
12875 && (relevance
== vect_used_in_outer
12876 || relevance
== vect_used_in_outer_by_reduction
12877 || relevance
== vect_used_by_reduction
12878 || relevance
== vect_unused_in_scope
12879 || relevance
== vect_used_only_live
));
12882 case vect_induction_def
:
12883 case vect_first_order_recurrence
:
12884 gcc_assert (!bb_vinfo
);
12887 case vect_constant_def
:
12888 case vect_external_def
:
12889 case vect_unknown_def_type
:
12891 gcc_unreachable ();
12894 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12896 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (node
);
12898 if (STMT_VINFO_RELEVANT_P (stmt_info
))
12900 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
12901 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
12902 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
12903 *need_to_vectorize
= true;
12906 if (PURE_SLP_STMT (stmt_info
) && !node
)
12908 if (dump_enabled_p ())
12909 dump_printf_loc (MSG_NOTE
, vect_location
,
12910 "handled only by SLP analysis\n");
12911 return opt_result::success ();
12916 && (STMT_VINFO_RELEVANT_P (stmt_info
)
12917 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
12918 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
12919 -mveclibabi= takes preference over library functions with
12920 the simd attribute. */
12921 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
12922 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
12924 || vectorizable_conversion (vinfo
, stmt_info
,
12925 NULL
, NULL
, node
, cost_vec
)
12926 || vectorizable_operation (vinfo
, stmt_info
,
12927 NULL
, NULL
, node
, cost_vec
)
12928 || vectorizable_assignment (vinfo
, stmt_info
,
12929 NULL
, NULL
, node
, cost_vec
)
12930 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
12931 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
12932 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
12933 node
, node_instance
, cost_vec
)
12934 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
12935 NULL
, node
, cost_vec
)
12936 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
12937 || vectorizable_condition (vinfo
, stmt_info
,
12938 NULL
, NULL
, node
, cost_vec
)
12939 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
12941 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
12942 stmt_info
, NULL
, node
)
12943 || vectorizable_recurr (as_a
<loop_vec_info
> (vinfo
),
12944 stmt_info
, NULL
, node
, cost_vec
));
12948 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
12949 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
12950 NULL
, NULL
, node
, cost_vec
)
12951 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
12953 || vectorizable_shift (vinfo
, stmt_info
,
12954 NULL
, NULL
, node
, cost_vec
)
12955 || vectorizable_operation (vinfo
, stmt_info
,
12956 NULL
, NULL
, node
, cost_vec
)
12957 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
12959 || vectorizable_load (vinfo
, stmt_info
,
12960 NULL
, NULL
, node
, cost_vec
)
12961 || vectorizable_store (vinfo
, stmt_info
,
12962 NULL
, NULL
, node
, cost_vec
)
12963 || vectorizable_condition (vinfo
, stmt_info
,
12964 NULL
, NULL
, node
, cost_vec
)
12965 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
12967 || vectorizable_phi (vinfo
, stmt_info
, NULL
, node
, cost_vec
));
12971 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
12974 return opt_result::failure_at (stmt_info
->stmt
,
12976 " relevant stmt not supported: %G",
12979 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
12980 need extra handling, except for vectorizable reductions. */
12982 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
12983 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
12984 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
12985 stmt_info
, node
, node_instance
,
12987 return opt_result::failure_at (stmt_info
->stmt
,
12989 " live stmt not supported: %G",
12992 return opt_result::success ();
12996 /* Function vect_transform_stmt.
12998 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
13001 vect_transform_stmt (vec_info
*vinfo
,
13002 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
13003 slp_tree slp_node
, slp_instance slp_node_instance
)
13005 bool is_store
= false;
13006 gimple
*vec_stmt
= NULL
;
13009 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
13011 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
13013 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (slp_node
);
13015 switch (STMT_VINFO_TYPE (stmt_info
))
13017 case type_demotion_vec_info_type
:
13018 case type_promotion_vec_info_type
:
13019 case type_conversion_vec_info_type
:
13020 done
= vectorizable_conversion (vinfo
, stmt_info
,
13021 gsi
, &vec_stmt
, slp_node
, NULL
);
13025 case induc_vec_info_type
:
13026 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
13027 stmt_info
, &vec_stmt
, slp_node
,
13032 case shift_vec_info_type
:
13033 done
= vectorizable_shift (vinfo
, stmt_info
,
13034 gsi
, &vec_stmt
, slp_node
, NULL
);
13038 case op_vec_info_type
:
13039 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
13044 case assignment_vec_info_type
:
13045 done
= vectorizable_assignment (vinfo
, stmt_info
,
13046 gsi
, &vec_stmt
, slp_node
, NULL
);
13050 case load_vec_info_type
:
13051 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
13056 case store_vec_info_type
:
13057 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
13059 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))
13060 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info
))))
13061 /* In case of interleaving, the whole chain is vectorized when the
13062 last store in the chain is reached. Store stmts before the last
13063 one are skipped, and there vec_stmt_info shouldn't be freed
13068 done
= vectorizable_store (vinfo
, stmt_info
,
13069 gsi
, &vec_stmt
, slp_node
, NULL
);
13075 case condition_vec_info_type
:
13076 done
= vectorizable_condition (vinfo
, stmt_info
,
13077 gsi
, &vec_stmt
, slp_node
, NULL
);
13081 case comparison_vec_info_type
:
13082 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
13087 case call_vec_info_type
:
13088 done
= vectorizable_call (vinfo
, stmt_info
,
13089 gsi
, &vec_stmt
, slp_node
, NULL
);
13092 case call_simd_clone_vec_info_type
:
13093 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
13097 case reduc_vec_info_type
:
13098 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
13099 gsi
, &vec_stmt
, slp_node
);
13103 case cycle_phi_info_type
:
13104 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
13105 &vec_stmt
, slp_node
, slp_node_instance
);
13109 case lc_phi_info_type
:
13110 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
13111 stmt_info
, &vec_stmt
, slp_node
);
13115 case recurr_info_type
:
13116 done
= vectorizable_recurr (as_a
<loop_vec_info
> (vinfo
),
13117 stmt_info
, &vec_stmt
, slp_node
, NULL
);
13121 case phi_info_type
:
13122 done
= vectorizable_phi (vinfo
, stmt_info
, &vec_stmt
, slp_node
, NULL
);
13127 if (!STMT_VINFO_LIVE_P (stmt_info
))
13129 if (dump_enabled_p ())
13130 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
13131 "stmt not supported.\n");
13132 gcc_unreachable ();
13137 if (!slp_node
&& vec_stmt
)
13138 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
13140 if (STMT_VINFO_TYPE (stmt_info
) != store_vec_info_type
)
13142 /* Handle stmts whose DEF is used outside the loop-nest that is
13143 being vectorized. */
13144 done
= can_vectorize_live_stmts (vinfo
, stmt_info
, slp_node
,
13145 slp_node_instance
, true, NULL
);
13150 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
13156 /* Remove a group of stores (for SLP or interleaving), free their
13160 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
13162 stmt_vec_info next_stmt_info
= first_stmt_info
;
13164 while (next_stmt_info
)
13166 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
13167 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
13168 /* Free the attached stmt_vec_info and remove the stmt. */
13169 vinfo
->remove_stmt (next_stmt_info
);
13170 next_stmt_info
= tmp
;
13174 /* If NUNITS is nonzero, return a vector type that contains NUNITS
13175 elements of type SCALAR_TYPE, or null if the target doesn't support
13178 If NUNITS is zero, return a vector type that contains elements of
13179 type SCALAR_TYPE, choosing whichever vector size the target prefers.
13181 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13182 for this vectorization region and want to "autodetect" the best choice.
13183 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13184 and we want the new type to be interoperable with it. PREVAILING_MODE
13185 in this case can be a scalar integer mode or a vector mode; when it
13186 is a vector mode, the function acts like a tree-level version of
13187 related_vector_mode. */
13190 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
13191 tree scalar_type
, poly_uint64 nunits
)
13193 tree orig_scalar_type
= scalar_type
;
13194 scalar_mode inner_mode
;
13195 machine_mode simd_mode
;
13198 if ((!INTEGRAL_TYPE_P (scalar_type
)
13199 && !POINTER_TYPE_P (scalar_type
)
13200 && !SCALAR_FLOAT_TYPE_P (scalar_type
))
13201 || (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
13202 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
)))
13205 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
13207 /* Interoperability between modes requires one to be a constant multiple
13208 of the other, so that the number of vectors required for each operation
13209 is a compile-time constant. */
13210 if (prevailing_mode
!= VOIDmode
13211 && !constant_multiple_p (nunits
* nbytes
,
13212 GET_MODE_SIZE (prevailing_mode
))
13213 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode
),
13217 /* For vector types of elements whose mode precision doesn't
13218 match their types precision we use a element type of mode
13219 precision. The vectorization routines will have to make sure
13220 they support the proper result truncation/extension.
13221 We also make sure to build vector types with INTEGER_TYPE
13222 component type only. */
13223 if (INTEGRAL_TYPE_P (scalar_type
)
13224 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
13225 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
13226 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
13227 TYPE_UNSIGNED (scalar_type
));
13229 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13230 When the component mode passes the above test simply use a type
13231 corresponding to that mode. The theory is that any use that
13232 would cause problems with this will disable vectorization anyway. */
13233 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
13234 && !INTEGRAL_TYPE_P (scalar_type
))
13235 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
13237 /* We can't build a vector type of elements with alignment bigger than
13239 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
13240 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
13241 TYPE_UNSIGNED (scalar_type
));
13243 /* If we felt back to using the mode fail if there was
13244 no scalar type for it. */
13245 if (scalar_type
== NULL_TREE
)
13248 /* If no prevailing mode was supplied, use the mode the target prefers.
13249 Otherwise lookup a vector mode based on the prevailing mode. */
13250 if (prevailing_mode
== VOIDmode
)
13252 gcc_assert (known_eq (nunits
, 0U));
13253 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
13254 if (SCALAR_INT_MODE_P (simd_mode
))
13256 /* Traditional behavior is not to take the integer mode
13257 literally, but simply to use it as a way of determining
13258 the vector size. It is up to mode_for_vector to decide
13259 what the TYPE_MODE should be.
13261 Note that nunits == 1 is allowed in order to support single
13262 element vector types. */
13263 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
13264 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
13268 else if (SCALAR_INT_MODE_P (prevailing_mode
)
13269 || !related_vector_mode (prevailing_mode
,
13270 inner_mode
, nunits
).exists (&simd_mode
))
13272 /* Fall back to using mode_for_vector, mostly in the hope of being
13273 able to use an integer mode. */
13274 if (known_eq (nunits
, 0U)
13275 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
13278 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
13282 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
13284 /* In cases where the mode was chosen by mode_for_vector, check that
13285 the target actually supports the chosen mode, or that it at least
13286 allows the vector mode to be replaced by a like-sized integer. */
13287 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
13288 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
13291 /* Re-attach the address-space qualifier if we canonicalized the scalar
13293 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
13294 return build_qualified_type
13295 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
13300 /* Function get_vectype_for_scalar_type.
13302 Returns the vector type corresponding to SCALAR_TYPE as supported
13303 by the target. If GROUP_SIZE is nonzero and we're performing BB
13304 vectorization, make sure that the number of elements in the vector
13305 is no bigger than GROUP_SIZE. */
13308 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
13309 unsigned int group_size
)
13311 /* For BB vectorization, we should always have a group size once we've
13312 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13313 are tentative requests during things like early data reference
13314 analysis and pattern recognition. */
13315 if (is_a
<bb_vec_info
> (vinfo
))
13316 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
13320 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
13322 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
13323 vinfo
->vector_mode
= TYPE_MODE (vectype
);
13325 /* Register the natural choice of vector type, before the group size
13326 has been applied. */
13328 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
13330 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13331 try again with an explicit number of elements. */
13334 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
13336 /* Start with the biggest number of units that fits within
13337 GROUP_SIZE and halve it until we find a valid vector type.
13338 Usually either the first attempt will succeed or all will
13339 fail (in the latter case because GROUP_SIZE is too small
13340 for the target), but it's possible that a target could have
13341 a hole between supported vector types.
13343 If GROUP_SIZE is not a power of 2, this has the effect of
13344 trying the largest power of 2 that fits within the group,
13345 even though the group is not a multiple of that vector size.
13346 The BB vectorizer will then try to carve up the group into
13348 unsigned int nunits
= 1 << floor_log2 (group_size
);
13351 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
13352 scalar_type
, nunits
);
13355 while (nunits
> 1 && !vectype
);
13361 /* Return the vector type corresponding to SCALAR_TYPE as supported
13362 by the target. NODE, if nonnull, is the SLP tree node that will
13363 use the returned vector type. */
13366 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
13368 unsigned int group_size
= 0;
13370 group_size
= SLP_TREE_LANES (node
);
13371 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
13374 /* Function get_mask_type_for_scalar_type.
13376 Returns the mask type corresponding to a result of comparison
13377 of vectors of specified SCALAR_TYPE as supported by target.
13378 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13379 make sure that the number of elements in the vector is no bigger
13380 than GROUP_SIZE. */
13383 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
13384 unsigned int group_size
)
13386 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
13391 return truth_type_for (vectype
);
13394 /* Function get_mask_type_for_scalar_type.
13396 Returns the mask type corresponding to a result of comparison
13397 of vectors of specified SCALAR_TYPE as supported by target.
13398 NODE, if nonnull, is the SLP tree node that will use the returned
13402 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
13405 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, node
);
13410 return truth_type_for (vectype
);
13413 /* Function get_same_sized_vectype
13415 Returns a vector type corresponding to SCALAR_TYPE of size
13416 VECTOR_TYPE if supported by the target. */
13419 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
13421 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
13422 return truth_type_for (vector_type
);
13424 poly_uint64 nunits
;
13425 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
13426 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
13429 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
13430 scalar_type
, nunits
);
13433 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13434 would not change the chosen vector modes. */
13437 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
13439 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
13440 i
!= vinfo
->used_vector_modes
.end (); ++i
)
13441 if (!VECTOR_MODE_P (*i
)
13442 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
13447 /* Function vect_is_simple_use.
13450 VINFO - the vect info of the loop or basic block that is being vectorized.
13451 OPERAND - operand in the loop or bb.
13453 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13454 case OPERAND is an SSA_NAME that is defined in the vectorizable region
13455 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13456 the definition could be anywhere in the function
13457 DT - the type of definition
13459 Returns whether a stmt with OPERAND can be vectorized.
13460 For loops, supportable operands are constants, loop invariants, and operands
13461 that are defined by the current iteration of the loop. Unsupportable
13462 operands are those that are defined by a previous iteration of the loop (as
13463 is the case in reduction/induction computations).
13464 For basic blocks, supportable operands are constants and bb invariants.
13465 For now, operands defined outside the basic block are not supported. */
13468 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
13469 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
13471 if (def_stmt_info_out
)
13472 *def_stmt_info_out
= NULL
;
13474 *def_stmt_out
= NULL
;
13475 *dt
= vect_unknown_def_type
;
13477 if (dump_enabled_p ())
13479 dump_printf_loc (MSG_NOTE
, vect_location
,
13480 "vect_is_simple_use: operand ");
13481 if (TREE_CODE (operand
) == SSA_NAME
13482 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
13483 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
13485 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
13488 if (CONSTANT_CLASS_P (operand
))
13489 *dt
= vect_constant_def
;
13490 else if (is_gimple_min_invariant (operand
))
13491 *dt
= vect_external_def
;
13492 else if (TREE_CODE (operand
) != SSA_NAME
)
13493 *dt
= vect_unknown_def_type
;
13494 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
13495 *dt
= vect_external_def
;
13498 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
13499 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
13501 *dt
= vect_external_def
;
13504 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
13505 def_stmt
= stmt_vinfo
->stmt
;
13506 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
13507 if (def_stmt_info_out
)
13508 *def_stmt_info_out
= stmt_vinfo
;
13511 *def_stmt_out
= def_stmt
;
13514 if (dump_enabled_p ())
13516 dump_printf (MSG_NOTE
, ", type of def: ");
13519 case vect_uninitialized_def
:
13520 dump_printf (MSG_NOTE
, "uninitialized\n");
13522 case vect_constant_def
:
13523 dump_printf (MSG_NOTE
, "constant\n");
13525 case vect_external_def
:
13526 dump_printf (MSG_NOTE
, "external\n");
13528 case vect_internal_def
:
13529 dump_printf (MSG_NOTE
, "internal\n");
13531 case vect_induction_def
:
13532 dump_printf (MSG_NOTE
, "induction\n");
13534 case vect_reduction_def
:
13535 dump_printf (MSG_NOTE
, "reduction\n");
13537 case vect_double_reduction_def
:
13538 dump_printf (MSG_NOTE
, "double reduction\n");
13540 case vect_nested_cycle
:
13541 dump_printf (MSG_NOTE
, "nested cycle\n");
13543 case vect_first_order_recurrence
:
13544 dump_printf (MSG_NOTE
, "first order recurrence\n");
13546 case vect_unknown_def_type
:
13547 dump_printf (MSG_NOTE
, "unknown\n");
13552 if (*dt
== vect_unknown_def_type
)
13554 if (dump_enabled_p ())
13555 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
13556 "Unsupported pattern.\n");
13563 /* Function vect_is_simple_use.
13565 Same as vect_is_simple_use but also determines the vector operand
13566 type of OPERAND and stores it to *VECTYPE. If the definition of
13567 OPERAND is vect_uninitialized_def, vect_constant_def or
13568 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
13569 is responsible to compute the best suited vector type for the
13573 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
13574 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
13575 gimple
**def_stmt_out
)
13577 stmt_vec_info def_stmt_info
;
13579 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
13583 *def_stmt_out
= def_stmt
;
13584 if (def_stmt_info_out
)
13585 *def_stmt_info_out
= def_stmt_info
;
13587 /* Now get a vector type if the def is internal, otherwise supply
13588 NULL_TREE and leave it up to the caller to figure out a proper
13589 type for the use stmt. */
13590 if (*dt
== vect_internal_def
13591 || *dt
== vect_induction_def
13592 || *dt
== vect_reduction_def
13593 || *dt
== vect_double_reduction_def
13594 || *dt
== vect_nested_cycle
13595 || *dt
== vect_first_order_recurrence
)
13597 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
13598 gcc_assert (*vectype
!= NULL_TREE
);
13599 if (dump_enabled_p ())
13600 dump_printf_loc (MSG_NOTE
, vect_location
,
13601 "vect_is_simple_use: vectype %T\n", *vectype
);
13603 else if (*dt
== vect_uninitialized_def
13604 || *dt
== vect_constant_def
13605 || *dt
== vect_external_def
)
13606 *vectype
= NULL_TREE
;
13608 gcc_unreachable ();
13613 /* Function vect_is_simple_use.
13615 Same as vect_is_simple_use but determines the operand by operand
13616 position OPERAND from either STMT or SLP_NODE, filling in *OP
13617 and *SLP_DEF (when SLP_NODE is not NULL). */
13620 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
13621 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
13622 enum vect_def_type
*dt
,
13623 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
13627 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
13629 *vectype
= SLP_TREE_VECTYPE (child
);
13630 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
13632 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
13633 return vect_is_simple_use (*op
, vinfo
, dt
, def_stmt_info_out
);
13637 if (def_stmt_info_out
)
13638 *def_stmt_info_out
= NULL
;
13639 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
13640 *dt
= SLP_TREE_DEF_TYPE (child
);
13647 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
13649 if (gimple_assign_rhs_code (ass
) == COND_EXPR
13650 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
13653 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
13655 *op
= gimple_op (ass
, operand
);
13657 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
13658 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
13660 *op
= gimple_op (ass
, operand
+ 1);
13662 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
13663 *op
= gimple_call_arg (call
, operand
);
13665 gcc_unreachable ();
13666 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
13670 /* If OP is not NULL and is external or constant update its vector
13671 type with VECTYPE. Returns true if successful or false if not,
13672 for example when conflicting vector types are present. */
13675 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
13677 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
13679 if (SLP_TREE_VECTYPE (op
))
13680 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
13681 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
13682 should be handled by patters. Allow vect_constant_def for now. */
13683 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
13684 && SLP_TREE_DEF_TYPE (op
) == vect_external_def
)
13686 SLP_TREE_VECTYPE (op
) = vectype
;
13690 /* Function supportable_widening_operation
13692 Check whether an operation represented by the code CODE is a
13693 widening operation that is supported by the target platform in
13694 vector form (i.e., when operating on arguments of type VECTYPE_IN
13695 producing a result of type VECTYPE_OUT).
13697 Widening operations we currently support are NOP (CONVERT), FLOAT,
13698 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
13699 are supported by the target platform either directly (via vector
13700 tree-codes), or via target builtins.
13703 - CODE1 and CODE2 are codes of vector operations to be used when
13704 vectorizing the operation, if available.
13705 - MULTI_STEP_CVT determines the number of required intermediate steps in
13706 case of multi-step conversion (like char->short->int - in that case
13707 MULTI_STEP_CVT will be 1).
13708 - INTERM_TYPES contains the intermediate type required to perform the
13709 widening operation (short in the above example). */
13712 supportable_widening_operation (vec_info
*vinfo
,
13714 stmt_vec_info stmt_info
,
13715 tree vectype_out
, tree vectype_in
,
13716 code_helper
*code1
,
13717 code_helper
*code2
,
13718 int *multi_step_cvt
,
13719 vec
<tree
> *interm_types
)
13721 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
13722 class loop
*vect_loop
= NULL
;
13723 machine_mode vec_mode
;
13724 enum insn_code icode1
, icode2
;
13725 optab optab1
= unknown_optab
, optab2
= unknown_optab
;
13726 tree vectype
= vectype_in
;
13727 tree wide_vectype
= vectype_out
;
13728 tree_code c1
= MAX_TREE_CODES
, c2
= MAX_TREE_CODES
;
13730 tree prev_type
, intermediate_type
;
13731 machine_mode intermediate_mode
, prev_mode
;
13732 optab optab3
, optab4
;
13734 *multi_step_cvt
= 0;
13736 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
13738 switch (code
.safe_as_tree_code ())
13740 case MAX_TREE_CODES
:
13741 /* Don't set c1 and c2 if code is not a tree_code. */
13744 case WIDEN_MULT_EXPR
:
13745 /* The result of a vectorized widening operation usually requires
13746 two vectors (because the widened results do not fit into one vector).
13747 The generated vector results would normally be expected to be
13748 generated in the same order as in the original scalar computation,
13749 i.e. if 8 results are generated in each vector iteration, they are
13750 to be organized as follows:
13751 vect1: [res1,res2,res3,res4],
13752 vect2: [res5,res6,res7,res8].
13754 However, in the special case that the result of the widening
13755 operation is used in a reduction computation only, the order doesn't
13756 matter (because when vectorizing a reduction we change the order of
13757 the computation). Some targets can take advantage of this and
13758 generate more efficient code. For example, targets like Altivec,
13759 that support widen_mult using a sequence of {mult_even,mult_odd}
13760 generate the following vectors:
13761 vect1: [res1,res3,res5,res7],
13762 vect2: [res2,res4,res6,res8].
13764 When vectorizing outer-loops, we execute the inner-loop sequentially
13765 (each vectorized inner-loop iteration contributes to VF outer-loop
13766 iterations in parallel). We therefore don't allow to change the
13767 order of the computation in the inner-loop during outer-loop
13769 /* TODO: Another case in which order doesn't *really* matter is when we
13770 widen and then contract again, e.g. (short)((int)x * y >> 8).
13771 Normally, pack_trunc performs an even/odd permute, whereas the
13772 repack from an even/odd expansion would be an interleave, which
13773 would be significantly simpler for e.g. AVX2. */
13774 /* In any case, in order to avoid duplicating the code below, recurse
13775 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
13776 are properly set up for the caller. If we fail, we'll continue with
13777 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
13779 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
13780 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
13781 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
13782 stmt_info
, vectype_out
,
13784 code2
, multi_step_cvt
,
13787 /* Elements in a vector with vect_used_by_reduction property cannot
13788 be reordered if the use chain with this property does not have the
13789 same operation. One such an example is s += a * b, where elements
13790 in a and b cannot be reordered. Here we check if the vector defined
13791 by STMT is only directly used in the reduction statement. */
13792 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
13793 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
13795 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
13798 c1
= VEC_WIDEN_MULT_LO_EXPR
;
13799 c2
= VEC_WIDEN_MULT_HI_EXPR
;
13802 case DOT_PROD_EXPR
:
13803 c1
= DOT_PROD_EXPR
;
13804 c2
= DOT_PROD_EXPR
;
13812 case VEC_WIDEN_MULT_EVEN_EXPR
:
13813 /* Support the recursion induced just above. */
13814 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
13815 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
13818 case WIDEN_LSHIFT_EXPR
:
13819 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
13820 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
13824 c1
= VEC_UNPACK_LO_EXPR
;
13825 c2
= VEC_UNPACK_HI_EXPR
;
13829 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
13830 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
13833 case FIX_TRUNC_EXPR
:
13834 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
13835 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
13839 gcc_unreachable ();
13842 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
13843 std::swap (c1
, c2
);
13845 if (code
== FIX_TRUNC_EXPR
)
13847 /* The signedness is determined from output operand. */
13848 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
13849 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
13851 else if (CONVERT_EXPR_CODE_P (code
.safe_as_tree_code ())
13852 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
13853 && VECTOR_BOOLEAN_TYPE_P (vectype
)
13854 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
13855 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
13857 /* If the input and result modes are the same, a different optab
13858 is needed where we pass in the number of units in vectype. */
13859 optab1
= vec_unpacks_sbool_lo_optab
;
13860 optab2
= vec_unpacks_sbool_hi_optab
;
13863 vec_mode
= TYPE_MODE (vectype
);
13864 if (widening_fn_p (code
))
13866 /* If this is an internal fn then we must check whether the target
13867 supports either a low-high split or an even-odd split. */
13868 internal_fn ifn
= as_internal_fn ((combined_fn
) code
);
13870 internal_fn lo
, hi
, even
, odd
;
13871 lookup_hilo_internal_fn (ifn
, &lo
, &hi
);
13872 *code1
= as_combined_fn (lo
);
13873 *code2
= as_combined_fn (hi
);
13874 optab1
= direct_internal_fn_optab (lo
, {vectype
, vectype
});
13875 optab2
= direct_internal_fn_optab (hi
, {vectype
, vectype
});
13877 /* If we don't support low-high, then check for even-odd. */
13879 || (icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
13881 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
13883 lookup_evenodd_internal_fn (ifn
, &even
, &odd
);
13884 *code1
= as_combined_fn (even
);
13885 *code2
= as_combined_fn (odd
);
13886 optab1
= direct_internal_fn_optab (even
, {vectype
, vectype
});
13887 optab2
= direct_internal_fn_optab (odd
, {vectype
, vectype
});
13890 else if (code
.is_tree_code ())
13892 if (code
== FIX_TRUNC_EXPR
)
13894 /* The signedness is determined from output operand. */
13895 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
13896 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
13898 else if (CONVERT_EXPR_CODE_P ((tree_code
) code
.safe_as_tree_code ())
13899 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
13900 && VECTOR_BOOLEAN_TYPE_P (vectype
)
13901 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
13902 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
13904 /* If the input and result modes are the same, a different optab
13905 is needed where we pass in the number of units in vectype. */
13906 optab1
= vec_unpacks_sbool_lo_optab
;
13907 optab2
= vec_unpacks_sbool_hi_optab
;
13911 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
13912 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
13918 if (!optab1
|| !optab2
)
13921 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
13922 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
13926 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
13927 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
13929 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
13931 /* For scalar masks we may have different boolean
13932 vector types having the same QImode. Thus we
13933 add additional check for elements number. */
13934 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
13935 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
13939 /* Check if it's a multi-step conversion that can be done using intermediate
13942 prev_type
= vectype
;
13943 prev_mode
= vec_mode
;
13945 if (!CONVERT_EXPR_CODE_P (code
.safe_as_tree_code ()))
13948 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13949 intermediate steps in promotion sequence. We try
13950 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
13952 interm_types
->create (MAX_INTERM_CVT_STEPS
);
13953 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
13955 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
13956 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
13958 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
13959 else if (VECTOR_MODE_P (intermediate_mode
))
13961 tree intermediate_element_type
13962 = lang_hooks
.types
.type_for_mode (GET_MODE_INNER (intermediate_mode
),
13963 TYPE_UNSIGNED (prev_type
));
13965 = build_vector_type_for_mode (intermediate_element_type
,
13966 intermediate_mode
);
13970 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
13971 TYPE_UNSIGNED (prev_type
));
13973 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
13974 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
13975 && intermediate_mode
== prev_mode
13976 && SCALAR_INT_MODE_P (prev_mode
))
13978 /* If the input and result modes are the same, a different optab
13979 is needed where we pass in the number of units in vectype. */
13980 optab3
= vec_unpacks_sbool_lo_optab
;
13981 optab4
= vec_unpacks_sbool_hi_optab
;
13985 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
13986 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
13989 if (!optab3
|| !optab4
13990 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
13991 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
13992 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
13993 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
13994 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
13995 == CODE_FOR_nothing
)
13996 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
13997 == CODE_FOR_nothing
))
14000 interm_types
->quick_push (intermediate_type
);
14001 (*multi_step_cvt
)++;
14003 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
14004 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
14006 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14008 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
14009 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
14013 prev_type
= intermediate_type
;
14014 prev_mode
= intermediate_mode
;
14017 interm_types
->release ();
14022 /* Function supportable_narrowing_operation
14024 Check whether an operation represented by the code CODE is a
14025 narrowing operation that is supported by the target platform in
14026 vector form (i.e., when operating on arguments of type VECTYPE_IN
14027 and producing a result of type VECTYPE_OUT).
14029 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14030 and FLOAT. This function checks if these operations are supported by
14031 the target platform directly via vector tree-codes.
14034 - CODE1 is the code of a vector operation to be used when
14035 vectorizing the operation, if available.
14036 - MULTI_STEP_CVT determines the number of required intermediate steps in
14037 case of multi-step conversion (like int->short->char - in that case
14038 MULTI_STEP_CVT will be 1).
14039 - INTERM_TYPES contains the intermediate type required to perform the
14040 narrowing operation (short in the above example). */
14043 supportable_narrowing_operation (code_helper code
,
14044 tree vectype_out
, tree vectype_in
,
14045 code_helper
*code1
, int *multi_step_cvt
,
14046 vec
<tree
> *interm_types
)
14048 machine_mode vec_mode
;
14049 enum insn_code icode1
;
14050 optab optab1
, interm_optab
;
14051 tree vectype
= vectype_in
;
14052 tree narrow_vectype
= vectype_out
;
14054 tree intermediate_type
, prev_type
;
14055 machine_mode intermediate_mode
, prev_mode
;
14057 unsigned HOST_WIDE_INT n_elts
;
14060 if (!code
.is_tree_code ())
14063 *multi_step_cvt
= 0;
14064 switch ((tree_code
) code
)
14067 c1
= VEC_PACK_TRUNC_EXPR
;
14068 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
14069 && VECTOR_BOOLEAN_TYPE_P (vectype
)
14070 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
))
14071 && TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&n_elts
)
14072 && n_elts
< BITS_PER_UNIT
)
14073 optab1
= vec_pack_sbool_trunc_optab
;
14075 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
14078 case FIX_TRUNC_EXPR
:
14079 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
14080 /* The signedness is determined from output operand. */
14081 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
14085 c1
= VEC_PACK_FLOAT_EXPR
;
14086 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
14090 gcc_unreachable ();
14096 vec_mode
= TYPE_MODE (vectype
);
14097 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
14102 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
14104 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14106 /* For scalar masks we may have different boolean
14107 vector types having the same QImode. Thus we
14108 add additional check for elements number. */
14109 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
14110 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
14114 if (code
== FLOAT_EXPR
)
14117 /* Check if it's a multi-step conversion that can be done using intermediate
14119 prev_mode
= vec_mode
;
14120 prev_type
= vectype
;
14121 if (code
== FIX_TRUNC_EXPR
)
14122 uns
= TYPE_UNSIGNED (vectype_out
);
14124 uns
= TYPE_UNSIGNED (vectype
);
14126 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14127 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14128 costly than signed. */
14129 if (code
== FIX_TRUNC_EXPR
&& uns
)
14131 enum insn_code icode2
;
14134 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
14136 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
14137 if (interm_optab
!= unknown_optab
14138 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
14139 && insn_data
[icode1
].operand
[0].mode
14140 == insn_data
[icode2
].operand
[0].mode
)
14143 optab1
= interm_optab
;
14148 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14149 intermediate steps in promotion sequence. We try
14150 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14151 interm_types
->create (MAX_INTERM_CVT_STEPS
);
14152 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
14154 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
14155 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
14157 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
14160 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
14161 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
14162 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
14163 && SCALAR_INT_MODE_P (prev_mode
)
14164 && TYPE_VECTOR_SUBPARTS (intermediate_type
).is_constant (&n_elts
)
14165 && n_elts
< BITS_PER_UNIT
)
14166 interm_optab
= vec_pack_sbool_trunc_optab
;
14169 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
14172 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
14173 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
14174 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
14175 == CODE_FOR_nothing
))
14178 interm_types
->quick_push (intermediate_type
);
14179 (*multi_step_cvt
)++;
14181 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
14183 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14185 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
14186 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
14190 prev_mode
= intermediate_mode
;
14191 prev_type
= intermediate_type
;
14192 optab1
= interm_optab
;
14195 interm_types
->release ();
14199 /* Generate and return a vector mask of MASK_TYPE such that
14200 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14201 Add the statements to SEQ. */
14204 vect_gen_while (gimple_seq
*seq
, tree mask_type
, tree start_index
,
14205 tree end_index
, const char *name
)
14207 tree cmp_type
= TREE_TYPE (start_index
);
14208 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
14209 cmp_type
, mask_type
,
14210 OPTIMIZE_FOR_SPEED
));
14211 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
14212 start_index
, end_index
,
14213 build_zero_cst (mask_type
));
14216 tmp
= make_temp_ssa_name (mask_type
, NULL
, name
);
14218 tmp
= make_ssa_name (mask_type
);
14219 gimple_call_set_lhs (call
, tmp
);
14220 gimple_seq_add_stmt (seq
, call
);
14224 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
14225 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14228 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
14231 tree tmp
= vect_gen_while (seq
, mask_type
, start_index
, end_index
);
14232 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
14235 /* Try to compute the vector types required to vectorize STMT_INFO,
14236 returning true on success and false if vectorization isn't possible.
14237 If GROUP_SIZE is nonzero and we're performing BB vectorization,
14238 take sure that the number of elements in the vectors is no bigger
14243 - Set *STMT_VECTYPE_OUT to:
14244 - NULL_TREE if the statement doesn't need to be vectorized;
14245 - the equivalent of STMT_VINFO_VECTYPE otherwise.
14247 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14248 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14249 statement does not help to determine the overall number of units. */
14252 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
14253 tree
*stmt_vectype_out
,
14254 tree
*nunits_vectype_out
,
14255 unsigned int group_size
)
14257 gimple
*stmt
= stmt_info
->stmt
;
14259 /* For BB vectorization, we should always have a group size once we've
14260 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14261 are tentative requests during things like early data reference
14262 analysis and pattern recognition. */
14263 if (is_a
<bb_vec_info
> (vinfo
))
14264 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
14268 *stmt_vectype_out
= NULL_TREE
;
14269 *nunits_vectype_out
= NULL_TREE
;
14271 if (gimple_get_lhs (stmt
) == NULL_TREE
14272 /* MASK_STORE has no lhs, but is ok. */
14273 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
14275 if (is_a
<gcall
*> (stmt
))
14277 /* Ignore calls with no lhs. These must be calls to
14278 #pragma omp simd functions, and what vectorization factor
14279 it really needs can't be determined until
14280 vectorizable_simd_clone_call. */
14281 if (dump_enabled_p ())
14282 dump_printf_loc (MSG_NOTE
, vect_location
,
14283 "defer to SIMD clone analysis.\n");
14284 return opt_result::success ();
14287 return opt_result::failure_at (stmt
,
14288 "not vectorized: irregular stmt.%G", stmt
);
14292 tree scalar_type
= NULL_TREE
;
14293 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
14295 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
14296 if (dump_enabled_p ())
14297 dump_printf_loc (MSG_NOTE
, vect_location
,
14298 "precomputed vectype: %T\n", vectype
);
14300 else if (vect_use_mask_type_p (stmt_info
))
14302 unsigned int precision
= stmt_info
->mask_precision
;
14303 scalar_type
= build_nonstandard_integer_type (precision
, 1);
14304 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
14306 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
14307 " data-type %T\n", scalar_type
);
14308 if (dump_enabled_p ())
14309 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
14313 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
14314 scalar_type
= TREE_TYPE (DR_REF (dr
));
14315 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
14316 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
14318 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
14320 if (dump_enabled_p ())
14323 dump_printf_loc (MSG_NOTE
, vect_location
,
14324 "get vectype for scalar type (group size %d):"
14325 " %T\n", group_size
, scalar_type
);
14327 dump_printf_loc (MSG_NOTE
, vect_location
,
14328 "get vectype for scalar type: %T\n", scalar_type
);
14330 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
14332 return opt_result::failure_at (stmt
,
14334 " unsupported data-type %T\n",
14337 if (dump_enabled_p ())
14338 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
14341 if (scalar_type
&& VECTOR_MODE_P (TYPE_MODE (scalar_type
)))
14342 return opt_result::failure_at (stmt
,
14343 "not vectorized: vector stmt in loop:%G",
14346 *stmt_vectype_out
= vectype
;
14348 /* Don't try to compute scalar types if the stmt produces a boolean
14349 vector; use the existing vector type instead. */
14350 tree nunits_vectype
= vectype
;
14351 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14353 /* The number of units is set according to the smallest scalar
14354 type (or the largest vector size, but we only support one
14355 vector size per vectorization). */
14356 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
14357 TREE_TYPE (vectype
));
14358 if (scalar_type
!= TREE_TYPE (vectype
))
14360 if (dump_enabled_p ())
14361 dump_printf_loc (MSG_NOTE
, vect_location
,
14362 "get vectype for smallest scalar type: %T\n",
14364 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
14366 if (!nunits_vectype
)
14367 return opt_result::failure_at
14368 (stmt
, "not vectorized: unsupported data-type %T\n",
14370 if (dump_enabled_p ())
14371 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
14376 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
14377 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)))
14378 return opt_result::failure_at (stmt
,
14379 "Not vectorized: Incompatible number "
14380 "of vector subparts between %T and %T\n",
14381 nunits_vectype
, *stmt_vectype_out
);
14383 if (dump_enabled_p ())
14385 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
14386 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
14387 dump_printf (MSG_NOTE
, "\n");
14390 *nunits_vectype_out
= nunits_vectype
;
14391 return opt_result::success ();
14394 /* Generate and return statement sequence that sets vector length LEN that is:
14396 min_of_start_and_end = min (START_INDEX, END_INDEX);
14397 left_len = END_INDEX - min_of_start_and_end;
14398 rhs = min (left_len, LEN_LIMIT);
14401 Note: the cost of the code generated by this function is modeled
14402 by vect_estimate_min_profitable_iters, so changes here may need
14403 corresponding changes there. */
14406 vect_gen_len (tree len
, tree start_index
, tree end_index
, tree len_limit
)
14408 gimple_seq stmts
= NULL
;
14409 tree len_type
= TREE_TYPE (len
);
14410 gcc_assert (TREE_TYPE (start_index
) == len_type
);
14412 tree min
= gimple_build (&stmts
, MIN_EXPR
, len_type
, start_index
, end_index
);
14413 tree left_len
= gimple_build (&stmts
, MINUS_EXPR
, len_type
, end_index
, min
);
14414 tree rhs
= gimple_build (&stmts
, MIN_EXPR
, len_type
, left_len
, len_limit
);
14415 gimple
* stmt
= gimple_build_assign (len
, rhs
);
14416 gimple_seq_add_stmt (&stmts
, stmt
);