1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
59 #include "optabs-libfuncs.h"
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
64 /* Return the vectorized type for the given statement. */
67 stmt_vectype (class _stmt_vec_info
*stmt_info
)
69 return STMT_VINFO_VECTYPE (stmt_info
);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
75 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
77 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
78 basic_block bb
= gimple_bb (stmt
);
79 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
85 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
87 return (bb
->loop_father
== loop
->inner
);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
95 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
96 enum vect_cost_for_stmt kind
,
97 stmt_vec_info stmt_info
, slp_tree node
,
98 tree vectype
, int misalign
,
99 enum vect_cost_model_location where
)
101 if ((kind
== vector_load
|| kind
== unaligned_load
)
102 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
103 kind
= vector_gather_load
;
104 if ((kind
== vector_store
|| kind
== unaligned_store
)
105 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
106 kind
= vector_scatter_store
;
108 stmt_info_for_cost si
109 = { count
, kind
, where
, stmt_info
, node
, vectype
, misalign
};
110 body_cost_vec
->safe_push (si
);
113 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
117 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
118 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
119 tree vectype
, int misalign
,
120 enum vect_cost_model_location where
)
122 return record_stmt_cost (body_cost_vec
, count
, kind
, stmt_info
, NULL
,
123 vectype
, misalign
, where
);
127 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
128 enum vect_cost_for_stmt kind
, slp_tree node
,
129 tree vectype
, int misalign
,
130 enum vect_cost_model_location where
)
132 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, node
,
133 vectype
, misalign
, where
);
137 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
138 enum vect_cost_for_stmt kind
,
139 enum vect_cost_model_location where
)
141 gcc_assert (kind
== cond_branch_taken
|| kind
== cond_branch_not_taken
142 || kind
== scalar_stmt
);
143 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, NULL
,
144 NULL_TREE
, 0, where
);
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
150 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
152 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
162 read_vector_array (vec_info
*vinfo
,
163 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
164 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
166 tree vect_type
, vect
, vect_name
, array_ref
;
169 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
170 vect_type
= TREE_TYPE (TREE_TYPE (array
));
171 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
172 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
173 build_int_cst (size_type_node
, n
),
174 NULL_TREE
, NULL_TREE
);
176 new_stmt
= gimple_build_assign (vect
, array_ref
);
177 vect_name
= make_ssa_name (vect
, new_stmt
);
178 gimple_assign_set_lhs (new_stmt
, vect_name
);
179 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
189 write_vector_array (vec_info
*vinfo
,
190 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
191 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
196 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
197 build_int_cst (size_type_node
, n
),
198 NULL_TREE
, NULL_TREE
);
200 new_stmt
= gimple_build_assign (array_ref
, vect
);
201 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
209 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
213 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
223 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
224 gimple_stmt_iterator
*gsi
, tree var
)
226 tree clobber
= build_clobber (TREE_TYPE (var
));
227 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
228 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
233 /* Function vect_mark_relevant.
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
238 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
239 enum vect_relevant relevant
, bool live_p
)
241 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
242 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE
, vect_location
,
246 "mark relevant %d, live %d: %G", relevant
, live_p
,
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE
, vect_location
,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
265 stmt_vec_info old_stmt_info
= stmt_info
;
266 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
268 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
269 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
271 if (live_p
&& relevant
== vect_unused_in_scope
)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE
, vect_location
,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
277 relevant
= vect_used_only_live
;
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE
, vect_location
,
282 "mark relevant %d, live %d: %G", relevant
, live_p
,
286 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
287 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
288 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
290 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE
, vect_location
,
295 "already marked relevant/live.\n");
299 worklist
->safe_push (stmt_info
);
303 /* Function is_simple_and_all_uses_invariant
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
309 loop_vec_info loop_vinfo
)
314 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
318 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
320 enum vect_def_type dt
= vect_uninitialized_def
;
322 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
326 "use not simple.\n");
330 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
336 /* Function vect_stmt_relevant_p.
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
346 CHECKME: what other side effects would the vectorizer allow? */
349 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
350 enum vect_relevant
*relevant
, bool *live_p
)
352 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
354 imm_use_iterator imm_iter
;
358 *relevant
= vect_unused_in_scope
;
361 /* cond stmt other than loop exit cond. */
362 if (is_ctrl_stmt (stmt_info
->stmt
)
363 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
364 *relevant
= vect_used_in_scope
;
366 /* changing memory. */
367 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
368 if (gimple_vdef (stmt_info
->stmt
)
369 && !gimple_clobber_p (stmt_info
->stmt
))
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE
, vect_location
,
373 "vec_stmt_relevant_p: stmt has vdefs.\n");
374 *relevant
= vect_used_in_scope
;
377 /* uses outside the loop. */
378 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
380 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
382 basic_block bb
= gimple_bb (USE_STMT (use_p
));
383 if (!flow_bb_inside_loop_p (loop
, bb
))
385 if (is_gimple_debug (USE_STMT (use_p
)))
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE
, vect_location
,
390 "vec_stmt_relevant_p: used out of loop.\n");
392 /* We expect all such uses to be in the loop exit phis
393 (because of loop closed form) */
394 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
395 gcc_assert (bb
== single_exit (loop
)->dest
);
402 if (*live_p
&& *relevant
== vect_unused_in_scope
403 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
405 if (dump_enabled_p ())
406 dump_printf_loc (MSG_NOTE
, vect_location
,
407 "vec_stmt_relevant_p: stmt live but not relevant.\n");
408 *relevant
= vect_used_only_live
;
411 return (*live_p
|| *relevant
);
415 /* Function exist_non_indexing_operands_for_use_p
417 USE is one of the uses attached to STMT_INFO. Check if USE is
418 used in STMT_INFO for anything other than indexing an array. */
421 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
425 /* USE corresponds to some operand in STMT. If there is no data
426 reference in STMT, then any operand that corresponds to USE
427 is not indexing an array. */
428 if (!STMT_VINFO_DATA_REF (stmt_info
))
431 /* STMT has a data_ref. FORNOW this means that its of one of
435 (This should have been verified in analyze_data_refs).
437 'var' in the second case corresponds to a def, not a use,
438 so USE cannot correspond to any operands that are not used
441 Therefore, all we need to check is if STMT falls into the
442 first case, and whether var corresponds to USE. */
444 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
445 if (!assign
|| !gimple_assign_copy_p (assign
))
447 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
448 if (call
&& gimple_call_internal_p (call
))
450 internal_fn ifn
= gimple_call_internal_fn (call
);
451 int mask_index
= internal_fn_mask_index (ifn
);
453 && use
== gimple_call_arg (call
, mask_index
))
455 int stored_value_index
= internal_fn_stored_value_index (ifn
);
456 if (stored_value_index
>= 0
457 && use
== gimple_call_arg (call
, stored_value_index
))
459 if (internal_gather_scatter_fn_p (ifn
)
460 && use
== gimple_call_arg (call
, 1))
466 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
468 operand
= gimple_assign_rhs1 (assign
);
469 if (TREE_CODE (operand
) != SSA_NAME
)
480 Function process_use.
483 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
484 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
485 that defined USE. This is done by calling mark_relevant and passing it
486 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
487 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
491 Generally, LIVE_P and RELEVANT are used to define the liveness and
492 relevance info of the DEF_STMT of this USE:
493 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
494 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
496 - case 1: If USE is used only for address computations (e.g. array indexing),
497 which does not need to be directly vectorized, then the liveness/relevance
498 of the respective DEF_STMT is left unchanged.
499 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
500 we skip DEF_STMT cause it had already been processed.
501 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
502 "relevant" will be modified accordingly.
504 Return true if everything is as expected. Return false otherwise. */
507 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
508 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
511 stmt_vec_info dstmt_vinfo
;
512 enum vect_def_type dt
;
514 /* case 1: we are only interested in uses that need to be vectorized. Uses
515 that are used for address computation are not considered relevant. */
516 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
517 return opt_result::success ();
519 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
520 return opt_result::failure_at (stmt_vinfo
->stmt
,
522 " unsupported use in stmt.\n");
525 return opt_result::success ();
527 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
528 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
530 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
531 We have to force the stmt live since the epilogue loop needs it to
532 continue computing the reduction. */
533 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
534 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
535 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
537 && bb
->loop_father
== def_bb
->loop_father
)
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_NOTE
, vect_location
,
541 "reduc-stmt defining reduc-phi in the same nest.\n");
542 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
543 return opt_result::success ();
546 /* case 3a: outer-loop stmt defining an inner-loop stmt:
547 outer-loop-header-bb:
553 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_NOTE
, vect_location
,
557 "outer-loop def-stmt defining inner-loop stmt.\n");
561 case vect_unused_in_scope
:
562 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
563 vect_used_in_scope
: vect_unused_in_scope
;
566 case vect_used_in_outer_by_reduction
:
567 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
568 relevant
= vect_used_by_reduction
;
571 case vect_used_in_outer
:
572 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
573 relevant
= vect_used_in_scope
;
576 case vect_used_in_scope
:
584 /* case 3b: inner-loop stmt defining an outer-loop stmt:
585 outer-loop-header-bb:
589 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
591 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
593 if (dump_enabled_p ())
594 dump_printf_loc (MSG_NOTE
, vect_location
,
595 "inner-loop def-stmt defining outer-loop stmt.\n");
599 case vect_unused_in_scope
:
600 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
601 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
602 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
605 case vect_used_by_reduction
:
606 case vect_used_only_live
:
607 relevant
= vect_used_in_outer_by_reduction
;
610 case vect_used_in_scope
:
611 relevant
= vect_used_in_outer
;
618 /* We are also not interested in uses on loop PHI backedges that are
619 inductions. Otherwise we'll needlessly vectorize the IV increment
620 and cause hybrid SLP for SLP inductions. Unless the PHI is live
622 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
623 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
624 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
625 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
626 loop_latch_edge (bb
->loop_father
))
629 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE
, vect_location
,
631 "induction value on backedge.\n");
632 return opt_result::success ();
636 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
637 return opt_result::success ();
641 /* Function vect_mark_stmts_to_be_vectorized.
643 Not all stmts in the loop need to be vectorized. For example:
652 Stmt 1 and 3 do not need to be vectorized, because loop control and
653 addressing of vectorized data-refs are handled differently.
655 This pass detects such stmts. */
658 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
660 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
661 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
662 unsigned int nbbs
= loop
->num_nodes
;
663 gimple_stmt_iterator si
;
667 enum vect_relevant relevant
;
669 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
671 auto_vec
<stmt_vec_info
, 64> worklist
;
673 /* 1. Init worklist. */
674 for (i
= 0; i
< nbbs
; i
++)
677 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
679 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
684 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
685 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
687 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
689 if (is_gimple_debug (gsi_stmt (si
)))
691 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
692 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE
, vect_location
,
694 "init: stmt relevant? %G", stmt_info
->stmt
);
696 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
697 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
701 /* 2. Process_worklist */
702 while (worklist
.length () > 0)
707 stmt_vec_info stmt_vinfo
= worklist
.pop ();
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE
, vect_location
,
710 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
712 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
713 (DEF_STMT) as relevant/irrelevant according to the relevance property
715 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
717 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
718 propagated as is to the DEF_STMTs of its USEs.
720 One exception is when STMT has been identified as defining a reduction
721 variable; in this case we set the relevance to vect_used_by_reduction.
722 This is because we distinguish between two kinds of relevant stmts -
723 those that are used by a reduction computation, and those that are
724 (also) used by a regular computation. This allows us later on to
725 identify stmts that are used solely by a reduction, and therefore the
726 order of the results that they produce does not have to be kept. */
728 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
730 case vect_reduction_def
:
731 gcc_assert (relevant
!= vect_unused_in_scope
);
732 if (relevant
!= vect_unused_in_scope
733 && relevant
!= vect_used_in_scope
734 && relevant
!= vect_used_by_reduction
735 && relevant
!= vect_used_only_live
)
736 return opt_result::failure_at
737 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
740 case vect_nested_cycle
:
741 if (relevant
!= vect_unused_in_scope
742 && relevant
!= vect_used_in_outer_by_reduction
743 && relevant
!= vect_used_in_outer
)
744 return opt_result::failure_at
745 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
748 case vect_double_reduction_def
:
749 if (relevant
!= vect_unused_in_scope
750 && relevant
!= vect_used_by_reduction
751 && relevant
!= vect_used_only_live
)
752 return opt_result::failure_at
753 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
760 if (is_pattern_stmt_p (stmt_vinfo
))
762 /* Pattern statements are not inserted into the code, so
763 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
764 have to scan the RHS or function arguments instead. */
765 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
767 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
768 tree op
= gimple_assign_rhs1 (assign
);
771 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
774 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
775 loop_vinfo
, relevant
, &worklist
, false);
778 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
779 loop_vinfo
, relevant
, &worklist
, false);
784 for (; i
< gimple_num_ops (assign
); i
++)
786 op
= gimple_op (assign
, i
);
787 if (TREE_CODE (op
) == SSA_NAME
)
790 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
797 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
799 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
801 tree arg
= gimple_call_arg (call
, i
);
803 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
811 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
813 tree op
= USE_FROM_PTR (use_p
);
815 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
821 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
823 gather_scatter_info gs_info
;
824 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
827 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
836 } /* while worklist */
838 return opt_result::success ();
841 /* Function vect_model_simple_cost.
843 Models cost for simple operations, i.e. those that only emit ncopies of a
844 single op. Right now, this does not account for multiple insns that could
845 be generated for the single vector op. We will handle that shortly. */
848 vect_model_simple_cost (vec_info
*,
849 stmt_vec_info stmt_info
, int ncopies
,
850 enum vect_def_type
*dt
,
853 stmt_vector_for_cost
*cost_vec
,
854 vect_cost_for_stmt kind
= vector_stmt
)
856 int inside_cost
= 0, prologue_cost
= 0;
858 gcc_assert (cost_vec
!= NULL
);
860 /* ??? Somehow we need to fix this at the callers. */
862 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
865 /* Cost the "broadcast" of a scalar operand in to a vector operand.
866 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
868 for (int i
= 0; i
< ndts
; i
++)
869 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
870 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
871 stmt_info
, 0, vect_prologue
);
873 /* Pass the inside-of-loop statements to the target-specific cost model. */
874 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
875 stmt_info
, 0, vect_body
);
877 if (dump_enabled_p ())
878 dump_printf_loc (MSG_NOTE
, vect_location
,
879 "vect_model_simple_cost: inside_cost = %d, "
880 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
884 /* Model cost for type demotion and promotion operations. PWR is
885 normally zero for single-step promotions and demotions. It will be
886 one if two-step promotion/demotion is required, and so on. NCOPIES
887 is the number of vector results (and thus number of instructions)
888 for the narrowest end of the operation chain. Each additional
889 step doubles the number of instructions required. If WIDEN_ARITH
890 is true the stmt is doing widening arithmetic. */
893 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
894 enum vect_def_type
*dt
,
895 unsigned int ncopies
, int pwr
,
896 stmt_vector_for_cost
*cost_vec
,
900 int inside_cost
= 0, prologue_cost
= 0;
902 for (i
= 0; i
< pwr
+ 1; i
++)
904 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
,
906 ? vector_stmt
: vec_promote_demote
,
907 stmt_info
, 0, vect_body
);
911 /* FORNOW: Assuming maximum 2 args per stmts. */
912 for (i
= 0; i
< 2; i
++)
913 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
914 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
915 stmt_info
, 0, vect_prologue
);
917 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE
, vect_location
,
919 "vect_model_promotion_demotion_cost: inside_cost = %d, "
920 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
923 /* Returns true if the current function returns DECL. */
926 cfun_returns (tree decl
)
930 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
932 greturn
*ret
= safe_dyn_cast
<greturn
*> (*gsi_last_bb (e
->src
));
935 if (gimple_return_retval (ret
) == decl
)
937 /* We often end up with an aggregate copy to the result decl,
938 handle that case as well. First skip intermediate clobbers
943 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
945 while (gimple_clobber_p (def
));
946 if (is_a
<gassign
*> (def
)
947 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
948 && gimple_assign_rhs1 (def
) == decl
)
954 /* Calculate cost of DR's memory access. */
956 vect_get_store_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
957 dr_alignment_support alignment_support_scheme
,
959 unsigned int *inside_cost
,
960 stmt_vector_for_cost
*body_cost_vec
)
962 switch (alignment_support_scheme
)
966 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
967 vector_store
, stmt_info
, 0,
970 if (dump_enabled_p ())
971 dump_printf_loc (MSG_NOTE
, vect_location
,
972 "vect_model_store_cost: aligned.\n");
976 case dr_unaligned_supported
:
978 /* Here, we assign an additional cost for the unaligned store. */
979 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
980 unaligned_store
, stmt_info
,
981 misalignment
, vect_body
);
982 if (dump_enabled_p ())
983 dump_printf_loc (MSG_NOTE
, vect_location
,
984 "vect_model_store_cost: unaligned supported by "
989 case dr_unaligned_unsupported
:
991 *inside_cost
= VECT_MAX_COST
;
993 if (dump_enabled_p ())
994 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
995 "vect_model_store_cost: unsupported access.\n");
1004 /* Calculate cost of DR's memory access. */
1006 vect_get_load_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1007 dr_alignment_support alignment_support_scheme
,
1009 bool add_realign_cost
, unsigned int *inside_cost
,
1010 unsigned int *prologue_cost
,
1011 stmt_vector_for_cost
*prologue_cost_vec
,
1012 stmt_vector_for_cost
*body_cost_vec
,
1013 bool record_prologue_costs
)
1015 switch (alignment_support_scheme
)
1019 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1020 stmt_info
, 0, vect_body
);
1022 if (dump_enabled_p ())
1023 dump_printf_loc (MSG_NOTE
, vect_location
,
1024 "vect_model_load_cost: aligned.\n");
1028 case dr_unaligned_supported
:
1030 /* Here, we assign an additional cost for the unaligned load. */
1031 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1032 unaligned_load
, stmt_info
,
1033 misalignment
, vect_body
);
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_NOTE
, vect_location
,
1037 "vect_model_load_cost: unaligned supported by "
1042 case dr_explicit_realign
:
1044 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1045 vector_load
, stmt_info
, 0, vect_body
);
1046 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1047 vec_perm
, stmt_info
, 0, vect_body
);
1049 /* FIXME: If the misalignment remains fixed across the iterations of
1050 the containing loop, the following cost should be added to the
1052 if (targetm
.vectorize
.builtin_mask_for_load
)
1053 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1054 stmt_info
, 0, vect_body
);
1056 if (dump_enabled_p ())
1057 dump_printf_loc (MSG_NOTE
, vect_location
,
1058 "vect_model_load_cost: explicit realign\n");
1062 case dr_explicit_realign_optimized
:
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_NOTE
, vect_location
,
1066 "vect_model_load_cost: unaligned software "
1069 /* Unaligned software pipeline has a load of an address, an initial
1070 load, and possibly a mask operation to "prime" the loop. However,
1071 if this is an access in a group of loads, which provide grouped
1072 access, then the above cost should only be considered for one
1073 access in the group. Inside the loop, there is a load op
1074 and a realignment op. */
1076 if (add_realign_cost
&& record_prologue_costs
)
1078 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1079 vector_stmt
, stmt_info
,
1081 if (targetm
.vectorize
.builtin_mask_for_load
)
1082 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1083 vector_stmt
, stmt_info
,
1087 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1088 stmt_info
, 0, vect_body
);
1089 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1090 stmt_info
, 0, vect_body
);
1092 if (dump_enabled_p ())
1093 dump_printf_loc (MSG_NOTE
, vect_location
,
1094 "vect_model_load_cost: explicit realign optimized"
1100 case dr_unaligned_unsupported
:
1102 *inside_cost
= VECT_MAX_COST
;
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1106 "vect_model_load_cost: unsupported access.\n");
1115 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1116 the loop preheader for the vectorized stmt STMT_VINFO. */
1119 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1120 gimple_stmt_iterator
*gsi
)
1123 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1125 vinfo
->insert_on_entry (stmt_vinfo
, new_stmt
);
1127 if (dump_enabled_p ())
1128 dump_printf_loc (MSG_NOTE
, vect_location
,
1129 "created new init_stmt: %G", new_stmt
);
1132 /* Function vect_init_vector.
1134 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1135 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1136 vector type a vector with all elements equal to VAL is created first.
1137 Place the initialization at GSI if it is not NULL. Otherwise, place the
1138 initialization at the loop preheader.
1139 Return the DEF of INIT_STMT.
1140 It will be used in the vectorization of STMT_INFO. */
1143 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1144 gimple_stmt_iterator
*gsi
)
1149 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1150 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1152 gcc_assert (VECTOR_TYPE_P (type
));
1153 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1155 /* Scalar boolean value should be transformed into
1156 all zeros or all ones value before building a vector. */
1157 if (VECTOR_BOOLEAN_TYPE_P (type
))
1159 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1160 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1162 if (CONSTANT_CLASS_P (val
))
1163 val
= integer_zerop (val
) ? false_val
: true_val
;
1166 new_temp
= make_ssa_name (TREE_TYPE (type
));
1167 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1168 val
, true_val
, false_val
);
1169 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1175 gimple_seq stmts
= NULL
;
1176 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1177 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1178 TREE_TYPE (type
), val
);
1180 /* ??? Condition vectorization expects us to do
1181 promotion of invariant/external defs. */
1182 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1183 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1184 !gsi_end_p (gsi2
); )
1186 init_stmt
= gsi_stmt (gsi2
);
1187 gsi_remove (&gsi2
, false);
1188 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1192 val
= build_vector_from_val (type
, val
);
1195 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1196 init_stmt
= gimple_build_assign (new_temp
, val
);
1197 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1202 /* Function vect_get_vec_defs_for_operand.
1204 OP is an operand in STMT_VINFO. This function returns a vector of
1205 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1207 In the case that OP is an SSA_NAME which is defined in the loop, then
1208 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1210 In case OP is an invariant or constant, a new stmt that creates a vector def
1211 needs to be introduced. VECTYPE may be used to specify a required type for
1212 vector invariant. */
1215 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1217 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1220 enum vect_def_type dt
;
1222 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE
, vect_location
,
1226 "vect_get_vec_defs_for_operand: %T\n", op
);
1228 stmt_vec_info def_stmt_info
;
1229 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1230 &def_stmt_info
, &def_stmt
);
1231 gcc_assert (is_simple_use
);
1232 if (def_stmt
&& dump_enabled_p ())
1233 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1235 vec_oprnds
->create (ncopies
);
1236 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1238 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1242 vector_type
= vectype
;
1243 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1244 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1245 vector_type
= truth_type_for (stmt_vectype
);
1247 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1249 gcc_assert (vector_type
);
1250 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1252 vec_oprnds
->quick_push (vop
);
1256 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1257 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1258 for (unsigned i
= 0; i
< ncopies
; ++i
)
1259 vec_oprnds
->quick_push (gimple_get_lhs
1260 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1265 /* Get vectorized definitions for OP0 and OP1. */
1268 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1270 tree op0
, vec
<tree
> *vec_oprnds0
, tree vectype0
,
1271 tree op1
, vec
<tree
> *vec_oprnds1
, tree vectype1
,
1272 tree op2
, vec
<tree
> *vec_oprnds2
, tree vectype2
,
1273 tree op3
, vec
<tree
> *vec_oprnds3
, tree vectype3
)
1278 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1280 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1282 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1284 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1289 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1290 op0
, vec_oprnds0
, vectype0
);
1292 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1293 op1
, vec_oprnds1
, vectype1
);
1295 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1296 op2
, vec_oprnds2
, vectype2
);
1298 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1299 op3
, vec_oprnds3
, vectype3
);
1304 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1306 tree op0
, vec
<tree
> *vec_oprnds0
,
1307 tree op1
, vec
<tree
> *vec_oprnds1
,
1308 tree op2
, vec
<tree
> *vec_oprnds2
,
1309 tree op3
, vec
<tree
> *vec_oprnds3
)
1311 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1312 op0
, vec_oprnds0
, NULL_TREE
,
1313 op1
, vec_oprnds1
, NULL_TREE
,
1314 op2
, vec_oprnds2
, NULL_TREE
,
1315 op3
, vec_oprnds3
, NULL_TREE
);
1318 /* Helper function called by vect_finish_replace_stmt and
1319 vect_finish_stmt_generation. Set the location of the new
1320 statement and create and return a stmt_vec_info for it. */
1323 vect_finish_stmt_generation_1 (vec_info
*,
1324 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1326 if (dump_enabled_p ())
1327 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1331 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1333 /* While EH edges will generally prevent vectorization, stmt might
1334 e.g. be in a must-not-throw region. Ensure newly created stmts
1335 that could throw are part of the same region. */
1336 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1337 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1338 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1341 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1344 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1345 which sets the same scalar result as STMT_INFO did. Create and return a
1346 stmt_vec_info for VEC_STMT. */
1349 vect_finish_replace_stmt (vec_info
*vinfo
,
1350 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1352 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1353 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1355 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1356 gsi_replace (&gsi
, vec_stmt
, true);
1358 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1361 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1362 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1365 vect_finish_stmt_generation (vec_info
*vinfo
,
1366 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1367 gimple_stmt_iterator
*gsi
)
1369 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1371 if (!gsi_end_p (*gsi
)
1372 && gimple_has_mem_ops (vec_stmt
))
1374 gimple
*at_stmt
= gsi_stmt (*gsi
);
1375 tree vuse
= gimple_vuse (at_stmt
);
1376 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1378 tree vdef
= gimple_vdef (at_stmt
);
1379 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1380 gimple_set_modified (vec_stmt
, true);
1381 /* If we have an SSA vuse and insert a store, update virtual
1382 SSA form to avoid triggering the renamer. Do so only
1383 if we can easily see all uses - which is what almost always
1384 happens with the way vectorized stmts are inserted. */
1385 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1386 && ((is_gimple_assign (vec_stmt
)
1387 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1388 || (is_gimple_call (vec_stmt
)
1389 && (!(gimple_call_flags (vec_stmt
)
1390 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
))
1391 || (gimple_call_lhs (vec_stmt
)
1392 && !is_gimple_reg (gimple_call_lhs (vec_stmt
)))))))
1394 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1395 gimple_set_vdef (vec_stmt
, new_vdef
);
1396 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1400 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1401 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1404 /* We want to vectorize a call to combined function CFN with function
1405 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1406 as the types of all inputs. Check whether this is possible using
1407 an internal function, returning its code if so or IFN_LAST if not. */
1410 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1411 tree vectype_out
, tree vectype_in
)
1414 if (internal_fn_p (cfn
))
1415 ifn
= as_internal_fn (cfn
);
1417 ifn
= associated_internal_fn (fndecl
);
1418 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1420 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1421 if (info
.vectorizable
)
1423 bool same_size_p
= TYPE_SIZE (vectype_in
) == TYPE_SIZE (vectype_out
);
1424 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1425 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1427 /* The type size of both the vectype_in and vectype_out should be
1428 exactly the same when vectype_out isn't participating the optab.
1429 While there is no restriction for type size when vectype_out
1430 is part of the optab query. */
1431 if (type0
!= vectype_out
&& type1
!= vectype_out
&& !same_size_p
)
1434 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1435 OPTIMIZE_FOR_SPEED
))
1443 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1444 gimple_stmt_iterator
*);
1446 /* Check whether a load or store statement in the loop described by
1447 LOOP_VINFO is possible in a loop using partial vectors. This is
1448 testing whether the vectorizer pass has the appropriate support,
1449 as well as whether the target does.
1451 VLS_TYPE says whether the statement is a load or store and VECTYPE
1452 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1453 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1454 says how the load or store is going to be implemented and GROUP_SIZE
1455 is the number of load or store statements in the containing group.
1456 If the access is a gather load or scatter store, GS_INFO describes
1457 its arguments. If the load or store is conditional, SCALAR_MASK is the
1458 condition under which it occurs.
1460 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1461 vectors is not supported, otherwise record the required rgroup control
1465 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo
, tree vectype
,
1467 vec_load_store_type vls_type
,
1469 vect_memory_access_type
1471 gather_scatter_info
*gs_info
,
1474 /* Invariant loads need no special support. */
1475 if (memory_access_type
== VMAT_INVARIANT
)
1478 unsigned int nvectors
;
1480 nvectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1482 nvectors
= vect_get_num_copies (loop_vinfo
, vectype
);
1484 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1485 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
1486 machine_mode vecmode
= TYPE_MODE (vectype
);
1487 bool is_load
= (vls_type
== VLS_LOAD
);
1488 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1491 = (is_load
? vect_load_lanes_supported (vectype
, group_size
, true)
1492 : vect_store_lanes_supported (vectype
, group_size
, true));
1493 if (ifn
== IFN_MASK_LEN_LOAD_LANES
|| ifn
== IFN_MASK_LEN_STORE_LANES
)
1494 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, 1);
1495 else if (ifn
== IFN_MASK_LOAD_LANES
|| ifn
== IFN_MASK_STORE_LANES
)
1496 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1500 if (dump_enabled_p ())
1501 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1502 "can't operate on partial vectors because"
1503 " the target doesn't have an appropriate"
1504 " load/store-lanes instruction.\n");
1505 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1510 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1512 internal_fn ifn
= (is_load
1513 ? IFN_MASK_GATHER_LOAD
1514 : IFN_MASK_SCATTER_STORE
);
1515 internal_fn len_ifn
= (is_load
1516 ? IFN_MASK_LEN_GATHER_LOAD
1517 : IFN_MASK_LEN_SCATTER_STORE
);
1518 if (internal_gather_scatter_fn_supported_p (len_ifn
, vectype
,
1519 gs_info
->memory_type
,
1520 gs_info
->offset_vectype
,
1522 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, 1);
1523 else if (internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1524 gs_info
->memory_type
,
1525 gs_info
->offset_vectype
,
1527 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1531 if (dump_enabled_p ())
1532 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1533 "can't operate on partial vectors because"
1534 " the target doesn't have an appropriate"
1535 " gather load or scatter store instruction.\n");
1536 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1541 if (memory_access_type
!= VMAT_CONTIGUOUS
1542 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1544 /* Element X of the data must come from iteration i * VF + X of the
1545 scalar loop. We need more work to support other mappings. */
1546 if (dump_enabled_p ())
1547 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1548 "can't operate on partial vectors because an"
1549 " access isn't contiguous.\n");
1550 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1554 if (!VECTOR_MODE_P (vecmode
))
1556 if (dump_enabled_p ())
1557 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1558 "can't operate on partial vectors when emulating"
1559 " vector operations.\n");
1560 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1564 /* We might load more scalars than we need for permuting SLP loads.
1565 We checked in get_group_load_store_type that the extra elements
1566 don't leak into a new vector. */
1567 auto group_memory_nvectors
= [](poly_uint64 size
, poly_uint64 nunits
)
1569 unsigned int nvectors
;
1570 if (can_div_away_from_zero_p (size
, nunits
, &nvectors
))
1575 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1576 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1577 machine_mode mask_mode
;
1579 bool using_partial_vectors_p
= false;
1580 if (get_len_load_store_mode (vecmode
, is_load
).exists (&vmode
))
1582 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1583 unsigned factor
= (vecmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vecmode
);
1584 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, factor
);
1585 using_partial_vectors_p
= true;
1587 else if (targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1588 && can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1590 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1591 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1592 using_partial_vectors_p
= true;
1595 if (!using_partial_vectors_p
)
1597 if (dump_enabled_p ())
1598 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1599 "can't operate on partial vectors because the"
1600 " target doesn't have the appropriate partial"
1601 " vectorization load or store.\n");
1602 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1606 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1607 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1608 that needs to be applied to all loads and stores in a vectorized loop.
1609 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1610 otherwise return VEC_MASK & LOOP_MASK.
1612 MASK_TYPE is the type of both masks. If new statements are needed,
1613 insert them before GSI. */
1616 prepare_vec_mask (loop_vec_info loop_vinfo
, tree mask_type
, tree loop_mask
,
1617 tree vec_mask
, gimple_stmt_iterator
*gsi
)
1619 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1623 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1625 if (loop_vinfo
->vec_cond_masked_set
.contains ({ vec_mask
, loop_mask
}))
1628 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1629 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1630 vec_mask
, loop_mask
);
1632 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1636 /* Determine whether we can use a gather load or scatter store to vectorize
1637 strided load or store STMT_INFO by truncating the current offset to a
1638 smaller width. We need to be able to construct an offset vector:
1640 { 0, X, X*2, X*3, ... }
1642 without loss of precision, where X is STMT_INFO's DR_STEP.
1644 Return true if this is possible, describing the gather load or scatter
1645 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1648 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1649 loop_vec_info loop_vinfo
, bool masked_p
,
1650 gather_scatter_info
*gs_info
)
1652 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1653 data_reference
*dr
= dr_info
->dr
;
1654 tree step
= DR_STEP (dr
);
1655 if (TREE_CODE (step
) != INTEGER_CST
)
1657 /* ??? Perhaps we could use range information here? */
1658 if (dump_enabled_p ())
1659 dump_printf_loc (MSG_NOTE
, vect_location
,
1660 "cannot truncate variable step.\n");
1664 /* Get the number of bits in an element. */
1665 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1666 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1667 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1669 /* Set COUNT to the upper limit on the number of elements - 1.
1670 Start with the maximum vectorization factor. */
1671 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1673 /* Try lowering COUNT to the number of scalar latch iterations. */
1674 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1675 widest_int max_iters
;
1676 if (max_loop_iterations (loop
, &max_iters
)
1677 && max_iters
< count
)
1678 count
= max_iters
.to_shwi ();
1680 /* Try scales of 1 and the element size. */
1681 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1682 wi::overflow_type overflow
= wi::OVF_NONE
;
1683 for (int i
= 0; i
< 2; ++i
)
1685 int scale
= scales
[i
];
1687 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1690 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1691 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1694 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1695 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1697 /* Find the narrowest viable offset type. */
1698 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1699 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1702 /* See whether the target supports the operation with an offset
1703 no narrower than OFFSET_TYPE. */
1704 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1705 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1706 vectype
, memory_type
, offset_type
, scale
,
1707 &gs_info
->ifn
, &gs_info
->offset_vectype
)
1708 || gs_info
->ifn
== IFN_LAST
)
1711 gs_info
->decl
= NULL_TREE
;
1712 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1713 but we don't need to store that here. */
1714 gs_info
->base
= NULL_TREE
;
1715 gs_info
->element_type
= TREE_TYPE (vectype
);
1716 gs_info
->offset
= fold_convert (offset_type
, step
);
1717 gs_info
->offset_dt
= vect_constant_def
;
1718 gs_info
->scale
= scale
;
1719 gs_info
->memory_type
= memory_type
;
1723 if (overflow
&& dump_enabled_p ())
1724 dump_printf_loc (MSG_NOTE
, vect_location
,
1725 "truncating gather/scatter offset to %d bits"
1726 " might change its value.\n", element_bits
);
1731 /* Return true if we can use gather/scatter internal functions to
1732 vectorize STMT_INFO, which is a grouped or strided load or store.
1733 MASKED_P is true if load or store is conditional. When returning
1734 true, fill in GS_INFO with the information required to perform the
1738 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1739 loop_vec_info loop_vinfo
, bool masked_p
,
1740 gather_scatter_info
*gs_info
)
1742 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1743 || gs_info
->ifn
== IFN_LAST
)
1744 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1747 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1748 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1750 gcc_assert (TYPE_PRECISION (new_offset_type
)
1751 >= TYPE_PRECISION (old_offset_type
));
1752 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1754 if (dump_enabled_p ())
1755 dump_printf_loc (MSG_NOTE
, vect_location
,
1756 "using gather/scatter for strided/grouped access,"
1757 " scale = %d\n", gs_info
->scale
);
1762 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1763 elements with a known constant step. Return -1 if that step
1764 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1767 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
1769 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1770 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
1774 /* If the target supports a permute mask that reverses the elements in
1775 a vector of type VECTYPE, return that mask, otherwise return null. */
1778 perm_mask_for_reverse (tree vectype
)
1780 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1782 /* The encoding has a single stepped pattern. */
1783 vec_perm_builder
sel (nunits
, 1, 3);
1784 for (int i
= 0; i
< 3; ++i
)
1785 sel
.quick_push (nunits
- 1 - i
);
1787 vec_perm_indices
indices (sel
, 1, nunits
);
1788 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), TYPE_MODE (vectype
),
1791 return vect_gen_perm_mask_checked (vectype
, indices
);
1794 /* A subroutine of get_load_store_type, with a subset of the same
1795 arguments. Handle the case where STMT_INFO is a load or store that
1796 accesses consecutive elements with a negative step. Sets *POFFSET
1797 to the offset to be applied to the DR for the first access. */
1799 static vect_memory_access_type
1800 get_negative_load_store_type (vec_info
*vinfo
,
1801 stmt_vec_info stmt_info
, tree vectype
,
1802 vec_load_store_type vls_type
,
1803 unsigned int ncopies
, poly_int64
*poffset
)
1805 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1806 dr_alignment_support alignment_support_scheme
;
1810 if (dump_enabled_p ())
1811 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1812 "multiple types with negative step.\n");
1813 return VMAT_ELEMENTWISE
;
1816 /* For backward running DRs the first access in vectype actually is
1817 N-1 elements before the address of the DR. */
1818 *poffset
= ((-TYPE_VECTOR_SUBPARTS (vectype
) + 1)
1819 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype
))));
1821 int misalignment
= dr_misalignment (dr_info
, vectype
, *poffset
);
1822 alignment_support_scheme
1823 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
, misalignment
);
1824 if (alignment_support_scheme
!= dr_aligned
1825 && alignment_support_scheme
!= dr_unaligned_supported
)
1827 if (dump_enabled_p ())
1828 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1829 "negative step but alignment required.\n");
1831 return VMAT_ELEMENTWISE
;
1834 if (vls_type
== VLS_STORE_INVARIANT
)
1836 if (dump_enabled_p ())
1837 dump_printf_loc (MSG_NOTE
, vect_location
,
1838 "negative step with invariant source;"
1839 " no permute needed.\n");
1840 return VMAT_CONTIGUOUS_DOWN
;
1843 if (!perm_mask_for_reverse (vectype
))
1845 if (dump_enabled_p ())
1846 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1847 "negative step and reversing not supported.\n");
1849 return VMAT_ELEMENTWISE
;
1852 return VMAT_CONTIGUOUS_REVERSE
;
1855 /* STMT_INFO is either a masked or unconditional store. Return the value
1859 vect_get_store_rhs (stmt_vec_info stmt_info
)
1861 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1863 gcc_assert (gimple_assign_single_p (assign
));
1864 return gimple_assign_rhs1 (assign
);
1866 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1868 internal_fn ifn
= gimple_call_internal_fn (call
);
1869 int index
= internal_fn_stored_value_index (ifn
);
1870 gcc_assert (index
>= 0);
1871 return gimple_call_arg (call
, index
);
1876 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1878 This function returns a vector type which can be composed with NETLS pieces,
1879 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1880 same vector size as the return vector. It checks target whether supports
1881 pieces-size vector mode for construction firstly, if target fails to, check
1882 pieces-size scalar mode for construction further. It returns NULL_TREE if
1883 fails to find the available composition.
1885 For example, for (vtype=V16QI, nelts=4), we can probably get:
1886 - V16QI with PTYPE V4QI.
1887 - V4SI with PTYPE SI.
1891 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
1893 gcc_assert (VECTOR_TYPE_P (vtype
));
1894 gcc_assert (known_gt (nelts
, 0U));
1896 machine_mode vmode
= TYPE_MODE (vtype
);
1897 if (!VECTOR_MODE_P (vmode
))
1900 /* When we are asked to compose the vector from its components let
1901 that happen directly. */
1902 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype
), nelts
))
1904 *ptype
= TREE_TYPE (vtype
);
1908 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
1909 unsigned int pbsize
;
1910 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
1912 /* First check if vec_init optab supports construction from
1913 vector pieces directly. */
1914 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
1915 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
1917 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
1918 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
1919 != CODE_FOR_nothing
))
1921 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
1925 /* Otherwise check if exists an integer type of the same piece size and
1926 if vec_init optab supports construction from it directly. */
1927 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
1928 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
1929 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
1930 != CODE_FOR_nothing
))
1932 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
1933 return build_vector_type (*ptype
, nelts
);
1940 /* A subroutine of get_load_store_type, with a subset of the same
1941 arguments. Handle the case where STMT_INFO is part of a grouped load
1944 For stores, the statements in the group are all consecutive
1945 and there is no gap at the end. For loads, the statements in the
1946 group might not be consecutive; there can be gaps between statements
1947 as well as at the end. */
1950 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
1951 tree vectype
, slp_tree slp_node
,
1952 bool masked_p
, vec_load_store_type vls_type
,
1953 vect_memory_access_type
*memory_access_type
,
1954 poly_int64
*poffset
,
1955 dr_alignment_support
*alignment_support_scheme
,
1957 gather_scatter_info
*gs_info
,
1958 internal_fn
*lanes_ifn
)
1960 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1961 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
1962 stmt_vec_info first_stmt_info
;
1963 unsigned int group_size
;
1964 unsigned HOST_WIDE_INT gap
;
1965 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1967 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1968 group_size
= DR_GROUP_SIZE (first_stmt_info
);
1969 gap
= DR_GROUP_GAP (first_stmt_info
);
1973 first_stmt_info
= stmt_info
;
1977 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
1978 bool single_element_p
= (stmt_info
== first_stmt_info
1979 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
1980 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1982 /* True if the vectorized statements would access beyond the last
1983 statement in the group. */
1984 bool overrun_p
= false;
1986 /* True if we can cope with such overrun by peeling for gaps, so that
1987 there is at least one final scalar iteration after the vector loop. */
1988 bool can_overrun_p
= (!masked_p
1989 && vls_type
== VLS_LOAD
1993 /* There can only be a gap at the end of the group if the stride is
1994 known at compile time. */
1995 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
1997 /* Stores can't yet have gaps. */
1998 gcc_assert (slp_node
|| vls_type
== VLS_LOAD
|| gap
== 0);
2002 /* For SLP vectorization we directly vectorize a subchain
2003 without permutation. */
2004 if (! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
2006 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node
)[0]);
2007 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2009 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2010 separated by the stride, until we have a complete vector.
2011 Fall back to scalar accesses if that isn't possible. */
2012 if (multiple_p (nunits
, group_size
))
2013 *memory_access_type
= VMAT_STRIDED_SLP
;
2015 *memory_access_type
= VMAT_ELEMENTWISE
;
2019 overrun_p
= loop_vinfo
&& gap
!= 0;
2020 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2022 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2023 "Grouped store with gaps requires"
2024 " non-consecutive accesses\n");
2027 /* An overrun is fine if the trailing elements are smaller
2028 than the alignment boundary B. Every vector access will
2029 be a multiple of B and so we are guaranteed to access a
2030 non-gap element in the same B-sized block. */
2032 && gap
< (vect_known_alignment_in_bytes (first_dr_info
,
2034 / vect_get_scalar_dr_size (first_dr_info
)))
2037 /* If the gap splits the vector in half and the target
2038 can do half-vector operations avoid the epilogue peeling
2039 by simply loading half of the vector only. Usually
2040 the construction with an upper zero half will be elided. */
2041 dr_alignment_support alss
;
2042 int misalign
= dr_misalignment (first_dr_info
, vectype
);
2046 && (((alss
= vect_supportable_dr_alignment (vinfo
, first_dr_info
,
2047 vectype
, misalign
)))
2049 || alss
== dr_unaligned_supported
)
2050 && known_eq (nunits
, (group_size
- gap
) * 2)
2051 && known_eq (nunits
, group_size
)
2052 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2056 if (overrun_p
&& !can_overrun_p
)
2058 if (dump_enabled_p ())
2059 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2060 "Peeling for outer loop is not supported\n");
2063 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2066 if (single_element_p
)
2067 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2068 only correct for single element "interleaving" SLP. */
2069 *memory_access_type
= get_negative_load_store_type
2070 (vinfo
, stmt_info
, vectype
, vls_type
, 1, poffset
);
2073 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2074 separated by the stride, until we have a complete vector.
2075 Fall back to scalar accesses if that isn't possible. */
2076 if (multiple_p (nunits
, group_size
))
2077 *memory_access_type
= VMAT_STRIDED_SLP
;
2079 *memory_access_type
= VMAT_ELEMENTWISE
;
2082 else if (cmp
== 0 && loop_vinfo
)
2084 gcc_assert (vls_type
== VLS_LOAD
);
2085 *memory_access_type
= VMAT_INVARIANT
;
2086 /* Invariant accesses perform only component accesses, alignment
2087 is irrelevant for them. */
2088 *alignment_support_scheme
= dr_unaligned_supported
;
2091 *memory_access_type
= VMAT_CONTIGUOUS
;
2093 /* When we have a contiguous access across loop iterations
2094 but the access in the loop doesn't cover the full vector
2095 we can end up with no gap recorded but still excess
2096 elements accessed, see PR103116. Make sure we peel for
2097 gaps if necessary and sufficient and give up if not.
2099 If there is a combination of the access not covering the full
2100 vector and a gap recorded then we may need to peel twice. */
2102 && *memory_access_type
== VMAT_CONTIGUOUS
2103 && SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
2104 && !multiple_p (group_size
* LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
2107 unsigned HOST_WIDE_INT cnunits
, cvf
;
2109 || !nunits
.is_constant (&cnunits
)
2110 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&cvf
)
2111 /* Peeling for gaps assumes that a single scalar iteration
2112 is enough to make sure the last vector iteration doesn't
2113 access excess elements.
2114 ??? Enhancements include peeling multiple iterations
2115 or using masked loads with a static mask. */
2116 || (group_size
* cvf
) % cnunits
+ group_size
- gap
< cnunits
)
2118 if (dump_enabled_p ())
2119 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2120 "peeling for gaps insufficient for "
2130 /* We can always handle this case using elementwise accesses,
2131 but see if something more efficient is available. */
2132 *memory_access_type
= VMAT_ELEMENTWISE
;
2134 /* If there is a gap at the end of the group then these optimizations
2135 would access excess elements in the last iteration. */
2136 bool would_overrun_p
= (gap
!= 0);
2137 /* An overrun is fine if the trailing elements are smaller than the
2138 alignment boundary B. Every vector access will be a multiple of B
2139 and so we are guaranteed to access a non-gap element in the
2140 same B-sized block. */
2143 && gap
< (vect_known_alignment_in_bytes (first_dr_info
, vectype
)
2144 / vect_get_scalar_dr_size (first_dr_info
)))
2145 would_overrun_p
= false;
2147 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2148 && (can_overrun_p
|| !would_overrun_p
)
2149 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2151 /* First cope with the degenerate case of a single-element
2153 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2158 /* Otherwise try using LOAD/STORE_LANES. */
2160 = vls_type
== VLS_LOAD
2161 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2162 : vect_store_lanes_supported (vectype
, group_size
,
2164 if (*lanes_ifn
!= IFN_LAST
)
2166 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2167 overrun_p
= would_overrun_p
;
2170 /* If that fails, try using permuting loads. */
2171 else if (vls_type
== VLS_LOAD
2172 ? vect_grouped_load_supported (vectype
,
2175 : vect_grouped_store_supported (vectype
, group_size
))
2177 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2178 overrun_p
= would_overrun_p
;
2183 /* As a last resort, trying using a gather load or scatter store.
2185 ??? Although the code can handle all group sizes correctly,
2186 it probably isn't a win to use separate strided accesses based
2187 on nearby locations. Or, even if it's a win over scalar code,
2188 it might not be a win over vectorizing at a lower VF, if that
2189 allows us to use contiguous accesses. */
2190 if (*memory_access_type
== VMAT_ELEMENTWISE
2193 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2195 *memory_access_type
= VMAT_GATHER_SCATTER
;
2198 if (*memory_access_type
== VMAT_GATHER_SCATTER
2199 || *memory_access_type
== VMAT_ELEMENTWISE
)
2201 *alignment_support_scheme
= dr_unaligned_supported
;
2202 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2206 *misalignment
= dr_misalignment (first_dr_info
, vectype
, *poffset
);
2207 *alignment_support_scheme
2208 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, vectype
,
2212 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2214 /* STMT is the leader of the group. Check the operands of all the
2215 stmts of the group. */
2216 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2217 while (next_stmt_info
)
2219 tree op
= vect_get_store_rhs (next_stmt_info
);
2220 enum vect_def_type dt
;
2221 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2223 if (dump_enabled_p ())
2224 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2225 "use not simple.\n");
2228 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2234 gcc_assert (can_overrun_p
);
2235 if (dump_enabled_p ())
2236 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2237 "Data access with gaps requires scalar "
2239 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2245 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2246 if there is a memory access type that the vectorized form can use,
2247 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2248 or scatters, fill in GS_INFO accordingly. In addition
2249 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2250 the target does not support the alignment scheme. *MISALIGNMENT
2251 is set according to the alignment of the access (including
2252 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2254 SLP says whether we're performing SLP rather than loop vectorization.
2255 MASKED_P is true if the statement is conditional on a vectorized mask.
2256 VECTYPE is the vector type that the vectorized statements will use.
2257 NCOPIES is the number of vector statements that will be needed. */
2260 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2261 tree vectype
, slp_tree slp_node
,
2262 bool masked_p
, vec_load_store_type vls_type
,
2263 unsigned int ncopies
,
2264 vect_memory_access_type
*memory_access_type
,
2265 poly_int64
*poffset
,
2266 dr_alignment_support
*alignment_support_scheme
,
2268 gather_scatter_info
*gs_info
,
2269 internal_fn
*lanes_ifn
)
2271 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2272 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2273 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2275 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2277 *memory_access_type
= VMAT_GATHER_SCATTER
;
2278 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2280 /* When using internal functions, we rely on pattern recognition
2281 to convert the type of the offset to the type that the target
2282 requires, with the result being a call to an internal function.
2283 If that failed for some reason (e.g. because another pattern
2284 took priority), just handle cases in which the offset already
2285 has the right type. */
2286 else if (gs_info
->ifn
!= IFN_LAST
2287 && !is_gimple_call (stmt_info
->stmt
)
2288 && !tree_nop_conversion_p (TREE_TYPE (gs_info
->offset
),
2289 TREE_TYPE (gs_info
->offset_vectype
)))
2291 if (dump_enabled_p ())
2292 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2293 "%s offset requires a conversion\n",
2294 vls_type
== VLS_LOAD
? "gather" : "scatter");
2297 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2298 &gs_info
->offset_dt
,
2299 &gs_info
->offset_vectype
))
2301 if (dump_enabled_p ())
2302 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2303 "%s index use not simple.\n",
2304 vls_type
== VLS_LOAD
? "gather" : "scatter");
2307 else if (gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
2309 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant ()
2310 || !TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
).is_constant ()
2311 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2312 (gs_info
->offset_vectype
),
2313 TYPE_VECTOR_SUBPARTS (vectype
)))
2315 if (dump_enabled_p ())
2316 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2317 "unsupported vector types for emulated "
2322 /* Gather-scatter accesses perform only component accesses, alignment
2323 is irrelevant for them. */
2324 *alignment_support_scheme
= dr_unaligned_supported
;
2326 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) || slp_node
)
2328 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
,
2330 vls_type
, memory_access_type
, poffset
,
2331 alignment_support_scheme
,
2332 misalignment
, gs_info
, lanes_ifn
))
2335 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2337 gcc_assert (!slp_node
);
2339 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2341 *memory_access_type
= VMAT_GATHER_SCATTER
;
2343 *memory_access_type
= VMAT_ELEMENTWISE
;
2344 /* Alignment is irrelevant here. */
2345 *alignment_support_scheme
= dr_unaligned_supported
;
2349 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2352 gcc_assert (vls_type
== VLS_LOAD
);
2353 *memory_access_type
= VMAT_INVARIANT
;
2354 /* Invariant accesses perform only component accesses, alignment
2355 is irrelevant for them. */
2356 *alignment_support_scheme
= dr_unaligned_supported
;
2361 *memory_access_type
= get_negative_load_store_type
2362 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
, poffset
);
2364 *memory_access_type
= VMAT_CONTIGUOUS
;
2365 *misalignment
= dr_misalignment (STMT_VINFO_DR_INFO (stmt_info
),
2367 *alignment_support_scheme
2368 = vect_supportable_dr_alignment (vinfo
,
2369 STMT_VINFO_DR_INFO (stmt_info
),
2370 vectype
, *misalignment
);
2374 if ((*memory_access_type
== VMAT_ELEMENTWISE
2375 || *memory_access_type
== VMAT_STRIDED_SLP
)
2376 && !nunits
.is_constant ())
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2380 "Not using elementwise accesses due to variable "
2381 "vectorization factor.\n");
2385 if (*alignment_support_scheme
== dr_unaligned_unsupported
)
2387 if (dump_enabled_p ())
2388 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2389 "unsupported unaligned access\n");
2393 /* FIXME: At the moment the cost model seems to underestimate the
2394 cost of using elementwise accesses. This check preserves the
2395 traditional behavior until that can be fixed. */
2396 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2397 if (!first_stmt_info
)
2398 first_stmt_info
= stmt_info
;
2399 if (*memory_access_type
== VMAT_ELEMENTWISE
2400 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2401 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2402 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2403 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2407 "not falling back to elementwise accesses\n");
2413 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2414 conditional operation STMT_INFO. When returning true, store the mask
2415 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2416 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2417 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2420 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2421 slp_tree slp_node
, unsigned mask_index
,
2422 tree
*mask
, slp_tree
*mask_node
,
2423 vect_def_type
*mask_dt_out
, tree
*mask_vectype_out
)
2425 enum vect_def_type mask_dt
;
2427 slp_tree mask_node_1
;
2428 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, mask_index
,
2429 mask
, &mask_node_1
, &mask_dt
, &mask_vectype
))
2431 if (dump_enabled_p ())
2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2433 "mask use not simple.\n");
2437 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask
)))
2439 if (dump_enabled_p ())
2440 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2441 "mask argument is not a boolean.\n");
2445 /* If the caller is not prepared for adjusting an external/constant
2446 SLP mask vector type fail. */
2449 && SLP_TREE_DEF_TYPE (mask_node_1
) != vect_internal_def
)
2451 if (dump_enabled_p ())
2452 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2453 "SLP mask argument is not vectorized.\n");
2457 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2459 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
),
2462 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2464 if (dump_enabled_p ())
2465 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2466 "could not find an appropriate vector mask type.\n");
2470 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2471 TYPE_VECTOR_SUBPARTS (vectype
)))
2473 if (dump_enabled_p ())
2474 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2475 "vector mask type %T"
2476 " does not match vector data type %T.\n",
2477 mask_vectype
, vectype
);
2482 *mask_dt_out
= mask_dt
;
2483 *mask_vectype_out
= mask_vectype
;
2485 *mask_node
= mask_node_1
;
2489 /* Return true if stored value is suitable for vectorizing store
2490 statement STMT_INFO. When returning true, store the scalar stored
2491 in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
2492 the type of the vectorized store value in
2493 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2496 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2497 slp_tree slp_node
, tree
*rhs
, slp_tree
*rhs_node
,
2498 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2499 vec_load_store_type
*vls_type_out
)
2502 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2504 if (gimple_call_internal_p (call
)
2505 && internal_store_fn_p (gimple_call_internal_fn (call
)))
2506 op_no
= internal_fn_stored_value_index (gimple_call_internal_fn (call
));
2509 op_no
= vect_slp_child_index_for_operand
2510 (stmt_info
->stmt
, op_no
, STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
2512 enum vect_def_type rhs_dt
;
2514 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, op_no
,
2515 rhs
, rhs_node
, &rhs_dt
, &rhs_vectype
))
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2519 "use not simple.\n");
2523 /* In the case this is a store from a constant make sure
2524 native_encode_expr can handle it. */
2525 if (CONSTANT_CLASS_P (*rhs
) && native_encode_expr (*rhs
, NULL
, 64) == 0)
2527 if (dump_enabled_p ())
2528 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2529 "cannot encode constant as a byte sequence.\n");
2533 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2534 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2536 if (dump_enabled_p ())
2537 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2538 "incompatible vector types.\n");
2542 *rhs_dt_out
= rhs_dt
;
2543 *rhs_vectype_out
= rhs_vectype
;
2544 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2545 *vls_type_out
= VLS_STORE_INVARIANT
;
2547 *vls_type_out
= VLS_STORE
;
2551 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2552 Note that we support masks with floating-point type, in which case the
2553 floats are interpreted as a bitmask. */
2556 vect_build_all_ones_mask (vec_info
*vinfo
,
2557 stmt_vec_info stmt_info
, tree masktype
)
2559 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2560 return build_int_cst (masktype
, -1);
2561 else if (VECTOR_BOOLEAN_TYPE_P (masktype
)
2562 || TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2564 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2565 mask
= build_vector_from_val (masktype
, mask
);
2566 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2568 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2572 for (int j
= 0; j
< 6; ++j
)
2574 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2575 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2576 mask
= build_vector_from_val (masktype
, mask
);
2577 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2582 /* Build an all-zero merge value of type VECTYPE while vectorizing
2583 STMT_INFO as a gather load. */
2586 vect_build_zero_merge_argument (vec_info
*vinfo
,
2587 stmt_vec_info stmt_info
, tree vectype
)
2590 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2591 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2592 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2596 for (int j
= 0; j
< 6; ++j
)
2598 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2599 merge
= build_real (TREE_TYPE (vectype
), r
);
2603 merge
= build_vector_from_val (vectype
, merge
);
2604 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2607 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2608 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2609 the gather load operation. If the load is conditional, MASK is the
2610 vectorized condition, otherwise MASK is null. PTR is the base
2611 pointer and OFFSET is the vectorized offset. */
2614 vect_build_one_gather_load_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2615 gimple_stmt_iterator
*gsi
,
2616 gather_scatter_info
*gs_info
,
2617 tree ptr
, tree offset
, tree mask
)
2619 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2620 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2621 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2622 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2623 /* ptrtype */ arglist
= TREE_CHAIN (arglist
);
2624 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2625 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2626 tree scaletype
= TREE_VALUE (arglist
);
2628 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2630 || TREE_CODE (masktype
) == INTEGER_TYPE
2631 || types_compatible_p (srctype
, masktype
)));
2634 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2636 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2637 TYPE_VECTOR_SUBPARTS (idxtype
)));
2638 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2639 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2640 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2641 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2645 tree src_op
= NULL_TREE
;
2646 tree mask_op
= NULL_TREE
;
2649 if (!useless_type_conversion_p (masktype
, TREE_TYPE (mask
)))
2651 tree utype
, optype
= TREE_TYPE (mask
);
2652 if (VECTOR_TYPE_P (masktype
)
2653 || TYPE_MODE (masktype
) == TYPE_MODE (optype
))
2656 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2657 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2658 tree mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask
);
2660 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2661 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2663 if (!useless_type_conversion_p (masktype
, utype
))
2665 gcc_assert (TYPE_PRECISION (utype
)
2666 <= TYPE_PRECISION (masktype
));
2667 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
2668 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2669 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2672 src_op
= build_zero_cst (srctype
);
2683 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2684 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2687 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2688 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2691 if (!useless_type_conversion_p (vectype
, rettype
))
2693 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2694 TYPE_VECTOR_SUBPARTS (rettype
)));
2695 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2696 gimple_call_set_lhs (new_stmt
, op
);
2697 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2698 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2699 new_stmt
= gimple_build_assign (NULL_TREE
, VIEW_CONVERT_EXPR
, op
);
2705 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2706 instructions before GSI. GS_INFO describes the scatter store operation.
2707 PTR is the base pointer, OFFSET the vectorized offsets and OPRND the
2708 vectorized data to store.
2709 If the store is conditional, MASK is the vectorized condition, otherwise
2713 vect_build_one_scatter_store_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2714 gimple_stmt_iterator
*gsi
,
2715 gather_scatter_info
*gs_info
,
2716 tree ptr
, tree offset
, tree oprnd
, tree mask
)
2718 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2719 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2720 /* tree ptrtype = TREE_VALUE (arglist); */ arglist
= TREE_CHAIN (arglist
);
2721 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2722 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2723 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2724 tree scaletype
= TREE_VALUE (arglist
);
2725 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
2726 && TREE_CODE (rettype
) == VOID_TYPE
);
2728 tree mask_arg
= NULL_TREE
;
2732 tree optype
= TREE_TYPE (mask_arg
);
2734 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
2737 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2738 tree var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2739 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
2741 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2742 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2744 if (!useless_type_conversion_p (masktype
, utype
))
2746 gcc_assert (TYPE_PRECISION (utype
) <= TYPE_PRECISION (masktype
));
2747 tree var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
2748 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2749 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2755 mask_arg
= build_int_cst (masktype
, -1);
2756 mask_arg
= vect_init_vector (vinfo
, stmt_info
, mask_arg
, masktype
, NULL
);
2760 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
2762 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
2763 TYPE_VECTOR_SUBPARTS (srctype
)));
2764 tree var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
2765 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
2766 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
2767 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2772 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2774 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2775 TYPE_VECTOR_SUBPARTS (idxtype
)));
2776 tree var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2777 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2778 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2779 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2783 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2785 = gimple_build_call (gs_info
->decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
2789 /* Prepare the base and offset in GS_INFO for vectorization.
2790 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2791 to the vectorized offset argument for the first copy of STMT_INFO.
2792 STMT_INFO is the statement described by GS_INFO and LOOP is the
2796 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo
,
2797 class loop
*loop
, stmt_vec_info stmt_info
,
2798 slp_tree slp_node
, gather_scatter_info
*gs_info
,
2799 tree
*dataref_ptr
, vec
<tree
> *vec_offset
)
2801 gimple_seq stmts
= NULL
;
2802 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2806 edge pe
= loop_preheader_edge (loop
);
2807 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2808 gcc_assert (!new_bb
);
2811 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_offset
);
2815 = vect_get_num_copies (loop_vinfo
, gs_info
->offset_vectype
);
2816 vect_get_vec_defs_for_operand (loop_vinfo
, stmt_info
, ncopies
,
2817 gs_info
->offset
, vec_offset
,
2818 gs_info
->offset_vectype
);
2822 /* Prepare to implement a grouped or strided load or store using
2823 the gather load or scatter store operation described by GS_INFO.
2824 STMT_INFO is the load or store statement.
2826 Set *DATAREF_BUMP to the amount that should be added to the base
2827 address after each copy of the vectorized statement. Set *VEC_OFFSET
2828 to an invariant offset vector in which element I has the value
2829 I * DR_STEP / SCALE. */
2832 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2833 loop_vec_info loop_vinfo
,
2834 gimple_stmt_iterator
*gsi
,
2835 gather_scatter_info
*gs_info
,
2836 tree
*dataref_bump
, tree
*vec_offset
,
2837 vec_loop_lens
*loop_lens
)
2839 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2840 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2842 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
))
2844 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
2845 ivtmp_8 = _31 * 16 (step in bytes);
2846 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2847 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
2849 = vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
, 1, vectype
, 0, 0);
2851 = fold_build2 (MULT_EXPR
, sizetype
,
2852 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
2854 *dataref_bump
= force_gimple_operand_gsi (gsi
, tmp
, true, NULL_TREE
, true,
2860 = size_binop (MULT_EXPR
,
2861 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
2862 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2863 *dataref_bump
= cse_and_gimplify_to_preheader (loop_vinfo
, bump
);
2866 /* The offset given in GS_INFO can have pointer type, so use the element
2867 type of the vector instead. */
2868 tree offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
2870 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2871 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
2872 ssize_int (gs_info
->scale
));
2873 step
= fold_convert (offset_type
, step
);
2875 /* Create {0, X, X*2, X*3, ...}. */
2876 tree offset
= fold_build2 (VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
2877 build_zero_cst (offset_type
), step
);
2878 *vec_offset
= cse_and_gimplify_to_preheader (loop_vinfo
, offset
);
2881 /* Prepare the pointer IVs which needs to be updated by a variable amount.
2882 Such variable amount is the outcome of .SELECT_VL. In this case, we can
2883 allow each iteration process the flexible number of elements as long as
2884 the number <= vf elments.
2886 Return data reference according to SELECT_VL.
2887 If new statements are needed, insert them before GSI. */
2890 vect_get_loop_variant_data_ptr_increment (
2891 vec_info
*vinfo
, tree aggr_type
, gimple_stmt_iterator
*gsi
,
2892 vec_loop_lens
*loop_lens
, dr_vec_info
*dr_info
,
2893 vect_memory_access_type memory_access_type
)
2895 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2896 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
2898 /* gather/scatter never reach here. */
2899 gcc_assert (memory_access_type
!= VMAT_GATHER_SCATTER
);
2901 /* When we support SELECT_VL pattern, we dynamic adjust
2902 the memory address by .SELECT_VL result.
2904 The result of .SELECT_VL is the number of elements to
2905 be processed of each iteration. So the memory address
2906 adjustment operation should be:
2908 addr = addr + .SELECT_VL (ARG..) * step;
2911 = vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
, 1, aggr_type
, 0, 0);
2912 tree len_type
= TREE_TYPE (loop_len
);
2913 /* Since the outcome of .SELECT_VL is element size, we should adjust
2914 it into bytesize so that it can be used in address pointer variable
2915 amount IVs adjustment. */
2916 tree tmp
= fold_build2 (MULT_EXPR
, len_type
, loop_len
,
2917 wide_int_to_tree (len_type
, wi::to_widest (step
)));
2918 tree bump
= make_temp_ssa_name (len_type
, NULL
, "ivtmp");
2919 gassign
*assign
= gimple_build_assign (bump
, tmp
);
2920 gsi_insert_before (gsi
, assign
, GSI_SAME_STMT
);
2924 /* Return the amount that should be added to a vector pointer to move
2925 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2926 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2930 vect_get_data_ptr_increment (vec_info
*vinfo
, gimple_stmt_iterator
*gsi
,
2931 dr_vec_info
*dr_info
, tree aggr_type
,
2932 vect_memory_access_type memory_access_type
,
2933 vec_loop_lens
*loop_lens
= nullptr)
2935 if (memory_access_type
== VMAT_INVARIANT
)
2936 return size_zero_node
;
2938 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2939 if (loop_vinfo
&& LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
))
2940 return vect_get_loop_variant_data_ptr_increment (vinfo
, aggr_type
, gsi
,
2942 memory_access_type
);
2944 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
2945 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
2946 if (tree_int_cst_sgn (step
) == -1)
2947 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
2951 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2954 vectorizable_bswap (vec_info
*vinfo
,
2955 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
2956 gimple
**vec_stmt
, slp_tree slp_node
,
2958 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
2961 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
2962 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2965 op
= gimple_call_arg (stmt
, 0);
2966 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2967 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2969 /* Multiple types in SLP are handled by creating the appropriate number of
2970 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2975 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2977 gcc_assert (ncopies
>= 1);
2979 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
2983 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
2984 unsigned word_bytes
;
2985 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
2988 /* The encoding uses one stepped pattern for each byte in the word. */
2989 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
2990 for (unsigned i
= 0; i
< 3; ++i
)
2991 for (unsigned j
= 0; j
< word_bytes
; ++j
)
2992 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
2994 vec_perm_indices
indices (elts
, 1, num_bytes
);
2995 machine_mode vmode
= TYPE_MODE (char_vectype
);
2996 if (!can_vec_perm_const_p (vmode
, vmode
, indices
))
3002 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3004 if (dump_enabled_p ())
3005 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3006 "incompatible vector types for invariants\n");
3010 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3011 DUMP_VECT_SCOPE ("vectorizable_bswap");
3012 record_stmt_cost (cost_vec
,
3013 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3014 record_stmt_cost (cost_vec
,
3016 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
,
3017 vec_perm
, stmt_info
, 0, vect_body
);
3021 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3024 vec
<tree
> vec_oprnds
= vNULL
;
3025 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
3027 /* Arguments are ready. create the new vector stmt. */
3030 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3033 tree tem
= make_ssa_name (char_vectype
);
3034 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3035 char_vectype
, vop
));
3036 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3037 tree tem2
= make_ssa_name (char_vectype
);
3038 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3039 tem
, tem
, bswap_vconst
);
3040 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3041 tem
= make_ssa_name (vectype
);
3042 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3044 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3046 slp_node
->push_vec_def (new_stmt
);
3048 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3052 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3054 vec_oprnds
.release ();
3058 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3059 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3060 in a single step. On success, store the binary pack code in
3064 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3065 code_helper
*convert_code
)
3067 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3068 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3072 int multi_step_cvt
= 0;
3073 auto_vec
<tree
, 8> interm_types
;
3074 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3075 &code
, &multi_step_cvt
, &interm_types
)
3079 *convert_code
= code
;
3083 /* Function vectorizable_call.
3085 Check if STMT_INFO performs a function call that can be vectorized.
3086 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3087 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3088 Return true if STMT_INFO is vectorizable in this way. */
3091 vectorizable_call (vec_info
*vinfo
,
3092 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3093 gimple
**vec_stmt
, slp_tree slp_node
,
3094 stmt_vector_for_cost
*cost_vec
)
3100 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3101 tree vectype_out
, vectype_in
;
3102 poly_uint64 nunits_in
;
3103 poly_uint64 nunits_out
;
3104 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3105 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3106 tree fndecl
, new_temp
, rhs_type
;
3107 enum vect_def_type dt
[4]
3108 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3109 vect_unknown_def_type
};
3110 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3111 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3112 int ndts
= ARRAY_SIZE (dt
);
3114 auto_vec
<tree
, 8> vargs
;
3115 enum { NARROW
, NONE
, WIDEN
} modifier
;
3118 tree clz_ctz_arg1
= NULL_TREE
;
3120 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3123 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3127 /* Is STMT_INFO a vectorizable call? */
3128 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3132 if (gimple_call_internal_p (stmt
)
3133 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3134 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3135 /* Handled by vectorizable_load and vectorizable_store. */
3138 if (gimple_call_lhs (stmt
) == NULL_TREE
3139 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3142 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3144 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3146 /* Process function arguments. */
3147 rhs_type
= NULL_TREE
;
3148 vectype_in
= NULL_TREE
;
3149 nargs
= gimple_call_num_args (stmt
);
3151 /* Bail out if the function has more than four arguments, we do not have
3152 interesting builtin functions to vectorize with more than two arguments
3153 except for fma. No arguments is also not good. */
3154 if (nargs
== 0 || nargs
> 4)
3157 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3158 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3159 if (cfn
== CFN_GOMP_SIMD_LANE
)
3162 rhs_type
= unsigned_type_node
;
3164 /* Similarly pretend IFN_CLZ and IFN_CTZ only has one argument, the second
3165 argument just says whether it is well-defined at zero or not and what
3166 value should be returned for it. */
3167 if ((cfn
== CFN_CLZ
|| cfn
== CFN_CTZ
) && nargs
== 2)
3170 clz_ctz_arg1
= gimple_call_arg (stmt
, 1);
3174 if (internal_fn_p (cfn
))
3175 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3177 for (i
= 0; i
< nargs
; i
++)
3179 if ((int) i
== mask_opno
)
3181 if (!vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_opno
,
3182 &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3187 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3188 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3190 if (dump_enabled_p ())
3191 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3192 "use not simple.\n");
3196 /* We can only handle calls with arguments of the same type. */
3198 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3200 if (dump_enabled_p ())
3201 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3202 "argument types differ.\n");
3206 rhs_type
= TREE_TYPE (op
);
3209 vectype_in
= vectypes
[i
];
3210 else if (vectypes
[i
]
3211 && !types_compatible_p (vectypes
[i
], vectype_in
))
3213 if (dump_enabled_p ())
3214 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3215 "argument vector types differ.\n");
3219 /* If all arguments are external or constant defs, infer the vector type
3220 from the scalar type. */
3222 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3224 gcc_assert (vectype_in
);
3227 if (dump_enabled_p ())
3228 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3229 "no vectype for scalar type %T\n", rhs_type
);
3234 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3235 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3237 if (dump_enabled_p ())
3238 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3239 "mixed mask and nonmask vector types\n");
3243 if (vect_emulated_vector_p (vectype_in
) || vect_emulated_vector_p (vectype_out
))
3245 if (dump_enabled_p ())
3246 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3247 "use emulated vector type for call\n");
3252 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3253 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3254 if (known_eq (nunits_in
* 2, nunits_out
))
3256 else if (known_eq (nunits_out
, nunits_in
))
3258 else if (known_eq (nunits_out
* 2, nunits_in
))
3263 /* We only handle functions that do not read or clobber memory. */
3264 if (gimple_vuse (stmt
))
3266 if (dump_enabled_p ())
3267 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3268 "function reads from or writes to memory.\n");
3272 /* For now, we only vectorize functions if a target specific builtin
3273 is available. TODO -- in some cases, it might be profitable to
3274 insert the calls for pieces of the vector, in order to be able
3275 to vectorize other operations in the loop. */
3277 internal_fn ifn
= IFN_LAST
;
3278 tree callee
= gimple_call_fndecl (stmt
);
3280 /* First try using an internal function. */
3281 code_helper convert_code
= MAX_TREE_CODES
;
3283 && (modifier
== NONE
3284 || (modifier
== NARROW
3285 && simple_integer_narrowing (vectype_out
, vectype_in
,
3287 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3290 /* If that fails, try asking for a target-specific built-in function. */
3291 if (ifn
== IFN_LAST
)
3293 if (cfn
!= CFN_LAST
)
3294 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3295 (cfn
, vectype_out
, vectype_in
);
3296 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3297 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3298 (callee
, vectype_out
, vectype_in
);
3301 if (ifn
== IFN_LAST
&& !fndecl
)
3303 if (cfn
== CFN_GOMP_SIMD_LANE
3306 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3307 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3308 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3309 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3311 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3312 { 0, 1, 2, ... vf - 1 } vector. */
3313 gcc_assert (nargs
== 0);
3315 else if (modifier
== NONE
3316 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3317 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3318 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3319 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3320 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3321 slp_op
, vectype_in
, cost_vec
);
3324 if (dump_enabled_p ())
3325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3326 "function is not vectorizable.\n");
3333 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3334 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3336 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3338 /* Sanity check: make sure that at least one copy of the vectorized stmt
3339 needs to be generated. */
3340 gcc_assert (ncopies
>= 1);
3342 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
3343 internal_fn cond_fn
= get_conditional_internal_fn (ifn
);
3344 internal_fn cond_len_fn
= get_len_internal_fn (ifn
);
3345 int len_opno
= internal_fn_len_index (cond_len_fn
);
3346 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3347 vec_loop_lens
*lens
= (loop_vinfo
? &LOOP_VINFO_LENS (loop_vinfo
) : NULL
);
3348 if (!vec_stmt
) /* transformation not required. */
3351 for (i
= 0; i
< nargs
; ++i
)
3352 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
],
3354 ? vectypes
[i
] : vectype_in
))
3356 if (dump_enabled_p ())
3357 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3358 "incompatible vector types for invariants\n");
3361 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3362 DUMP_VECT_SCOPE ("vectorizable_call");
3363 vect_model_simple_cost (vinfo
, stmt_info
,
3364 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3365 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3366 record_stmt_cost (cost_vec
, ncopies
/ 2,
3367 vec_promote_demote
, stmt_info
, 0, vect_body
);
3370 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
3371 && (reduc_idx
>= 0 || mask_opno
>= 0))
3374 && (cond_fn
== IFN_LAST
3375 || !direct_internal_fn_supported_p (cond_fn
, vectype_out
,
3376 OPTIMIZE_FOR_SPEED
))
3377 && (cond_len_fn
== IFN_LAST
3378 || !direct_internal_fn_supported_p (cond_len_fn
, vectype_out
,
3379 OPTIMIZE_FOR_SPEED
)))
3381 if (dump_enabled_p ())
3382 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3383 "can't use a fully-masked loop because no"
3384 " conditional operation is available.\n");
3385 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
3389 unsigned int nvectors
3391 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3393 tree scalar_mask
= NULL_TREE
;
3395 scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3396 if (cond_len_fn
!= IFN_LAST
3397 && direct_internal_fn_supported_p (cond_len_fn
, vectype_out
,
3398 OPTIMIZE_FOR_SPEED
))
3399 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype_out
,
3402 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype_out
,
3411 if (dump_enabled_p ())
3412 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3415 scalar_dest
= gimple_call_lhs (stmt
);
3416 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3418 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3419 bool len_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
);
3420 unsigned int vect_nargs
= nargs
;
3426 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3429 else if (reduc_idx
>= 0)
3432 else if (masked_loop_p
&& reduc_idx
>= 0)
3440 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3442 tree prev_res
= NULL_TREE
;
3443 vargs
.safe_grow (vect_nargs
, true);
3444 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3445 for (j
= 0; j
< ncopies
; ++j
)
3447 /* Build argument list for the vectorized call. */
3450 vec
<tree
> vec_oprnds0
;
3452 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3453 vec_oprnds0
= vec_defs
[0];
3455 /* Arguments are ready. Create the new vector stmt. */
3456 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3459 if (masked_loop_p
&& reduc_idx
>= 0)
3461 unsigned int vec_num
= vec_oprnds0
.length ();
3462 /* Always true for SLP. */
3463 gcc_assert (ncopies
== 1);
3464 vargs
[varg
++] = vect_get_loop_mask (loop_vinfo
,
3465 gsi
, masks
, vec_num
,
3469 for (k
= 0; k
< nargs
; k
++)
3471 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3472 vargs
[varg
++] = vec_oprndsk
[i
];
3474 if (masked_loop_p
&& reduc_idx
>= 0)
3475 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3477 vargs
[varg
++] = clz_ctz_arg1
;
3480 if (modifier
== NARROW
)
3482 /* We don't define any narrowing conditional functions
3484 gcc_assert (mask_opno
< 0);
3485 tree half_res
= make_ssa_name (vectype_in
);
3487 = gimple_build_call_internal_vec (ifn
, vargs
);
3488 gimple_call_set_lhs (call
, half_res
);
3489 gimple_call_set_nothrow (call
, true);
3490 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3493 prev_res
= half_res
;
3496 new_temp
= make_ssa_name (vec_dest
);
3497 new_stmt
= vect_gimple_build (new_temp
, convert_code
,
3498 prev_res
, half_res
);
3499 vect_finish_stmt_generation (vinfo
, stmt_info
,
3504 if (len_opno
>= 0 && len_loop_p
)
3506 unsigned int vec_num
= vec_oprnds0
.length ();
3507 /* Always true for SLP. */
3508 gcc_assert (ncopies
== 1);
3510 = vect_get_loop_len (loop_vinfo
, gsi
, lens
, vec_num
,
3513 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
3514 tree bias
= build_int_cst (intQI_type_node
, biasval
);
3515 vargs
[len_opno
] = len
;
3516 vargs
[len_opno
+ 1] = bias
;
3518 else if (mask_opno
>= 0 && masked_loop_p
)
3520 unsigned int vec_num
= vec_oprnds0
.length ();
3521 /* Always true for SLP. */
3522 gcc_assert (ncopies
== 1);
3523 tree mask
= vect_get_loop_mask (loop_vinfo
,
3524 gsi
, masks
, vec_num
,
3526 vargs
[mask_opno
] = prepare_vec_mask
3527 (loop_vinfo
, TREE_TYPE (mask
), mask
,
3528 vargs
[mask_opno
], gsi
);
3532 if (ifn
!= IFN_LAST
)
3533 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3535 call
= gimple_build_call_vec (fndecl
, vargs
);
3536 new_temp
= make_ssa_name (vec_dest
, call
);
3537 gimple_call_set_lhs (call
, new_temp
);
3538 gimple_call_set_nothrow (call
, true);
3539 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3542 slp_node
->push_vec_def (new_stmt
);
3548 if (masked_loop_p
&& reduc_idx
>= 0)
3549 vargs
[varg
++] = vect_get_loop_mask (loop_vinfo
, gsi
, masks
, ncopies
,
3551 for (i
= 0; i
< nargs
; i
++)
3553 op
= gimple_call_arg (stmt
, i
);
3556 vec_defs
.quick_push (vNULL
);
3557 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3561 vargs
[varg
++] = vec_defs
[i
][j
];
3563 if (masked_loop_p
&& reduc_idx
>= 0)
3564 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3566 vargs
[varg
++] = clz_ctz_arg1
;
3568 if (len_opno
>= 0 && len_loop_p
)
3570 tree len
= vect_get_loop_len (loop_vinfo
, gsi
, lens
, ncopies
,
3573 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
3574 tree bias
= build_int_cst (intQI_type_node
, biasval
);
3575 vargs
[len_opno
] = len
;
3576 vargs
[len_opno
+ 1] = bias
;
3578 else if (mask_opno
>= 0 && masked_loop_p
)
3580 tree mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
, ncopies
,
3583 = prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
3584 vargs
[mask_opno
], gsi
);
3588 if (cfn
== CFN_GOMP_SIMD_LANE
)
3590 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3592 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3593 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3594 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3595 new_temp
= make_ssa_name (vec_dest
);
3596 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3597 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3599 else if (modifier
== NARROW
)
3601 /* We don't define any narrowing conditional functions at
3603 gcc_assert (mask_opno
< 0);
3604 tree half_res
= make_ssa_name (vectype_in
);
3605 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3606 gimple_call_set_lhs (call
, half_res
);
3607 gimple_call_set_nothrow (call
, true);
3608 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3611 prev_res
= half_res
;
3614 new_temp
= make_ssa_name (vec_dest
);
3615 new_stmt
= vect_gimple_build (new_temp
, convert_code
, prev_res
,
3617 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3622 if (ifn
!= IFN_LAST
)
3623 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3625 call
= gimple_build_call_vec (fndecl
, vargs
);
3626 new_temp
= make_ssa_name (vec_dest
, call
);
3627 gimple_call_set_lhs (call
, new_temp
);
3628 gimple_call_set_nothrow (call
, true);
3629 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3633 if (j
== (modifier
== NARROW
? 1 : 0))
3634 *vec_stmt
= new_stmt
;
3635 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3637 for (i
= 0; i
< nargs
; i
++)
3639 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3640 vec_oprndsi
.release ();
3643 else if (modifier
== NARROW
)
3645 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3646 /* We don't define any narrowing conditional functions at present. */
3647 gcc_assert (mask_opno
< 0);
3648 for (j
= 0; j
< ncopies
; ++j
)
3650 /* Build argument list for the vectorized call. */
3652 vargs
.create (nargs
* 2);
3658 vec
<tree
> vec_oprnds0
;
3660 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3661 vec_oprnds0
= vec_defs
[0];
3663 /* Arguments are ready. Create the new vector stmt. */
3664 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3668 for (k
= 0; k
< nargs
; k
++)
3670 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3671 vargs
.quick_push (vec_oprndsk
[i
]);
3672 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3675 if (ifn
!= IFN_LAST
)
3676 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3678 call
= gimple_build_call_vec (fndecl
, vargs
);
3679 new_temp
= make_ssa_name (vec_dest
, call
);
3680 gimple_call_set_lhs (call
, new_temp
);
3681 gimple_call_set_nothrow (call
, true);
3682 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3683 slp_node
->push_vec_def (call
);
3688 for (i
= 0; i
< nargs
; i
++)
3690 op
= gimple_call_arg (stmt
, i
);
3693 vec_defs
.quick_push (vNULL
);
3694 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
3695 op
, &vec_defs
[i
], vectypes
[i
]);
3697 vec_oprnd0
= vec_defs
[i
][2*j
];
3698 vec_oprnd1
= vec_defs
[i
][2*j
+1];
3700 vargs
.quick_push (vec_oprnd0
);
3701 vargs
.quick_push (vec_oprnd1
);
3704 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3705 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3706 gimple_call_set_lhs (new_stmt
, new_temp
);
3707 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3709 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3713 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3715 for (i
= 0; i
< nargs
; i
++)
3717 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3718 vec_oprndsi
.release ();
3722 /* No current target implements this case. */
3727 /* The call in STMT might prevent it from being removed in dce.
3728 We however cannot remove it here, due to the way the ssa name
3729 it defines is mapped to the new definition. So just replace
3730 rhs of the statement with something harmless. */
3735 stmt_info
= vect_orig_stmt (stmt_info
);
3736 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3739 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3740 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3746 struct simd_call_arg_info
3750 HOST_WIDE_INT linear_step
;
3751 enum vect_def_type dt
;
3753 bool simd_lane_linear
;
3756 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3757 is linear within simd lane (but not within whole loop), note it in
3761 vect_simd_lane_linear (tree op
, class loop
*loop
,
3762 struct simd_call_arg_info
*arginfo
)
3764 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3766 if (!is_gimple_assign (def_stmt
)
3767 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3768 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3771 tree base
= gimple_assign_rhs1 (def_stmt
);
3772 HOST_WIDE_INT linear_step
= 0;
3773 tree v
= gimple_assign_rhs2 (def_stmt
);
3774 while (TREE_CODE (v
) == SSA_NAME
)
3777 def_stmt
= SSA_NAME_DEF_STMT (v
);
3778 if (is_gimple_assign (def_stmt
))
3779 switch (gimple_assign_rhs_code (def_stmt
))
3782 t
= gimple_assign_rhs2 (def_stmt
);
3783 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3785 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3786 v
= gimple_assign_rhs1 (def_stmt
);
3789 t
= gimple_assign_rhs2 (def_stmt
);
3790 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3792 linear_step
= tree_to_shwi (t
);
3793 v
= gimple_assign_rhs1 (def_stmt
);
3796 t
= gimple_assign_rhs1 (def_stmt
);
3797 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3798 || (TYPE_PRECISION (TREE_TYPE (v
))
3799 < TYPE_PRECISION (TREE_TYPE (t
))))
3808 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3810 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3811 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3816 arginfo
->linear_step
= linear_step
;
3818 arginfo
->simd_lane_linear
= true;
3824 /* Function vectorizable_simd_clone_call.
3826 Check if STMT_INFO performs a function call that can be vectorized
3827 by calling a simd clone of the function.
3828 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3829 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3830 Return true if STMT_INFO is vectorizable in this way. */
3833 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3834 gimple_stmt_iterator
*gsi
,
3835 gimple
**vec_stmt
, slp_tree slp_node
,
3836 stmt_vector_for_cost
*)
3841 tree vec_oprnd0
= NULL_TREE
;
3844 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3845 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3846 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3847 tree fndecl
, new_temp
;
3849 auto_vec
<simd_call_arg_info
> arginfo
;
3850 vec
<tree
> vargs
= vNULL
;
3852 tree lhs
, rtype
, ratype
;
3853 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3854 int masked_call_offset
= 0;
3856 /* Is STMT a vectorizable call? */
3857 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3861 fndecl
= gimple_call_fndecl (stmt
);
3862 if (fndecl
== NULL_TREE
3863 && gimple_call_internal_p (stmt
, IFN_MASK_CALL
))
3865 fndecl
= gimple_call_arg (stmt
, 0);
3866 gcc_checking_assert (TREE_CODE (fndecl
) == ADDR_EXPR
);
3867 fndecl
= TREE_OPERAND (fndecl
, 0);
3868 gcc_checking_assert (TREE_CODE (fndecl
) == FUNCTION_DECL
);
3869 masked_call_offset
= 1;
3871 if (fndecl
== NULL_TREE
)
3874 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3875 if (node
== NULL
|| node
->simd_clones
== NULL
)
3878 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3881 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3885 if (gimple_call_lhs (stmt
)
3886 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3889 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3891 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3893 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3896 /* Process function arguments. */
3897 nargs
= gimple_call_num_args (stmt
) - masked_call_offset
;
3899 /* Bail out if the function has zero arguments. */
3903 vec
<tree
>& simd_clone_info
= (slp_node
? SLP_TREE_SIMD_CLONE_INFO (slp_node
)
3904 : STMT_VINFO_SIMD_CLONE_INFO (stmt_info
));
3905 arginfo
.reserve (nargs
, true);
3906 auto_vec
<slp_tree
> slp_op
;
3907 slp_op
.safe_grow_cleared (nargs
);
3909 for (i
= 0; i
< nargs
; i
++)
3911 simd_call_arg_info thisarginfo
;
3914 thisarginfo
.linear_step
= 0;
3915 thisarginfo
.align
= 0;
3916 thisarginfo
.op
= NULL_TREE
;
3917 thisarginfo
.simd_lane_linear
= false;
3919 int op_no
= i
+ masked_call_offset
;
3921 op_no
= vect_slp_child_index_for_operand (stmt
, op_no
, false);
3922 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3923 op_no
, &op
, &slp_op
[i
],
3924 &thisarginfo
.dt
, &thisarginfo
.vectype
)
3925 || thisarginfo
.dt
== vect_uninitialized_def
)
3927 if (dump_enabled_p ())
3928 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3929 "use not simple.\n");
3933 if (thisarginfo
.dt
== vect_constant_def
3934 || thisarginfo
.dt
== vect_external_def
)
3936 /* With SLP we determine the vector type of constants/externals
3937 at analysis time, handling conflicts via
3938 vect_maybe_update_slp_op_vectype. At transform time
3939 we have a vector type recorded for SLP. */
3940 gcc_assert (!vec_stmt
3942 || thisarginfo
.vectype
!= NULL_TREE
);
3944 thisarginfo
.vectype
= get_vectype_for_scalar_type (vinfo
,
3949 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3951 /* For linear arguments, the analyze phase should have saved
3952 the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. */
3953 if (i
* 3 + 4 <= simd_clone_info
.length ()
3954 && simd_clone_info
[i
* 3 + 2])
3956 gcc_assert (vec_stmt
);
3957 thisarginfo
.linear_step
= tree_to_shwi (simd_clone_info
[i
* 3 + 2]);
3958 thisarginfo
.op
= simd_clone_info
[i
* 3 + 1];
3959 thisarginfo
.simd_lane_linear
3960 = (simd_clone_info
[i
* 3 + 3] == boolean_true_node
);
3961 /* If loop has been peeled for alignment, we need to adjust it. */
3962 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3963 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3964 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3966 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3967 tree step
= simd_clone_info
[i
* 3 + 2];
3968 tree opt
= TREE_TYPE (thisarginfo
.op
);
3969 bias
= fold_convert (TREE_TYPE (step
), bias
);
3970 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3972 = fold_build2 (POINTER_TYPE_P (opt
)
3973 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3974 thisarginfo
.op
, bias
);
3978 && thisarginfo
.dt
!= vect_constant_def
3979 && thisarginfo
.dt
!= vect_external_def
3981 && TREE_CODE (op
) == SSA_NAME
3982 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3984 && tree_fits_shwi_p (iv
.step
))
3986 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3987 thisarginfo
.op
= iv
.base
;
3989 else if ((thisarginfo
.dt
== vect_constant_def
3990 || thisarginfo
.dt
== vect_external_def
)
3991 && POINTER_TYPE_P (TREE_TYPE (op
)))
3992 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3993 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3995 if (POINTER_TYPE_P (TREE_TYPE (op
))
3996 && !thisarginfo
.linear_step
3998 && thisarginfo
.dt
!= vect_constant_def
3999 && thisarginfo
.dt
!= vect_external_def
4001 && TREE_CODE (op
) == SSA_NAME
)
4002 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
4004 arginfo
.quick_push (thisarginfo
);
4007 poly_uint64 vf
= loop_vinfo
? LOOP_VINFO_VECT_FACTOR (loop_vinfo
) : 1;
4008 unsigned group_size
= slp_node
? SLP_TREE_LANES (slp_node
) : 1;
4009 unsigned int badness
= 0;
4010 struct cgraph_node
*bestn
= NULL
;
4011 if (simd_clone_info
.exists ())
4012 bestn
= cgraph_node::get (simd_clone_info
[0]);
4014 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4015 n
= n
->simdclone
->next_clone
)
4017 unsigned int this_badness
= 0;
4018 unsigned int num_calls
;
4019 /* The number of arguments in the call and the number of parameters in
4020 the simdclone should match. However, when the simdclone is
4021 'inbranch', it could have one more paramater than nargs when using
4022 an inbranch simdclone to call a non-inbranch call, either in a
4023 non-masked loop using a all true constant mask, or inside a masked
4024 loop using it's mask. */
4025 size_t simd_nargs
= n
->simdclone
->nargs
;
4026 if (!masked_call_offset
&& n
->simdclone
->inbranch
)
4028 if (!constant_multiple_p (vf
* group_size
, n
->simdclone
->simdlen
,
4030 || (!n
->simdclone
->inbranch
&& (masked_call_offset
> 0))
4031 || (nargs
!= simd_nargs
))
4034 this_badness
+= exact_log2 (num_calls
) * 4096;
4035 if (n
->simdclone
->inbranch
)
4036 this_badness
+= 8192;
4037 int target_badness
= targetm
.simd_clone
.usable (n
);
4038 if (target_badness
< 0)
4040 this_badness
+= target_badness
* 512;
4041 for (i
= 0; i
< nargs
; i
++)
4043 switch (n
->simdclone
->args
[i
].arg_type
)
4045 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4046 if (!useless_type_conversion_p
4047 (n
->simdclone
->args
[i
].orig_type
,
4048 TREE_TYPE (gimple_call_arg (stmt
,
4049 i
+ masked_call_offset
))))
4051 else if (arginfo
[i
].dt
== vect_constant_def
4052 || arginfo
[i
].dt
== vect_external_def
4053 || arginfo
[i
].linear_step
)
4056 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4057 if (arginfo
[i
].dt
!= vect_constant_def
4058 && arginfo
[i
].dt
!= vect_external_def
)
4061 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4062 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4063 if (arginfo
[i
].dt
== vect_constant_def
4064 || arginfo
[i
].dt
== vect_external_def
4065 || (arginfo
[i
].linear_step
4066 != n
->simdclone
->args
[i
].linear_step
))
4069 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4070 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4071 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4072 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4073 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4074 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4078 case SIMD_CLONE_ARG_TYPE_MASK
:
4079 /* While we can create a traditional data vector from
4080 an incoming integer mode mask we have no good way to
4081 force generate an integer mode mask from a traditional
4082 boolean vector input. */
4083 if (SCALAR_INT_MODE_P (n
->simdclone
->mask_mode
)
4084 && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo
[i
].vectype
)))
4086 else if (!SCALAR_INT_MODE_P (n
->simdclone
->mask_mode
)
4087 && SCALAR_INT_MODE_P (TYPE_MODE (arginfo
[i
].vectype
)))
4088 this_badness
+= 2048;
4091 if (i
== (size_t) -1)
4093 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4098 if (arginfo
[i
].align
)
4099 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4100 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4102 if (i
== (size_t) -1)
4104 if (masked_call_offset
== 0
4105 && n
->simdclone
->inbranch
4106 && n
->simdclone
->nargs
> nargs
)
4108 gcc_assert (n
->simdclone
->args
[n
->simdclone
->nargs
- 1].arg_type
==
4109 SIMD_CLONE_ARG_TYPE_MASK
);
4110 /* Penalize using a masked SIMD clone in a non-masked loop, that is
4111 not in a branch, as we'd have to construct an all-true mask. */
4112 if (!loop_vinfo
|| !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
4115 if (bestn
== NULL
|| this_badness
< badness
)
4118 badness
= this_badness
;
4125 unsigned int num_mask_args
= 0;
4126 if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4127 for (i
= 0; i
< nargs
; i
++)
4128 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_MASK
)
4131 for (i
= 0; i
< nargs
; i
++)
4133 if ((arginfo
[i
].dt
== vect_constant_def
4134 || arginfo
[i
].dt
== vect_external_def
)
4135 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4137 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
,
4138 i
+ masked_call_offset
));
4139 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4141 if (arginfo
[i
].vectype
== NULL
4142 || !constant_multiple_p (bestn
->simdclone
->simdlen
,
4143 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4147 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
4148 && VECTOR_BOOLEAN_TYPE_P (bestn
->simdclone
->args
[i
].vector_type
))
4150 if (dump_enabled_p ())
4151 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4152 "vector mask arguments are not supported.\n");
4156 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_MASK
)
4158 tree clone_arg_vectype
= bestn
->simdclone
->args
[i
].vector_type
;
4159 if (bestn
->simdclone
->mask_mode
== VOIDmode
)
4161 if (maybe_ne (TYPE_VECTOR_SUBPARTS (clone_arg_vectype
),
4162 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4164 /* FORNOW we only have partial support for vector-type masks
4165 that can't hold all of simdlen. */
4166 if (dump_enabled_p ())
4167 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4169 "in-branch vector clones are not yet"
4170 " supported for mismatched vector sizes.\n");
4174 else if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4176 if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo
[i
].vectype
))
4177 || maybe_ne (exact_div (bestn
->simdclone
->simdlen
,
4179 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4181 /* FORNOW we only have partial support for integer-type masks
4182 that represent the same number of lanes as the
4183 vectorized mask inputs. */
4184 if (dump_enabled_p ())
4185 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4187 "in-branch vector clones are not yet "
4188 "supported for mismatched vector sizes.\n");
4194 if (dump_enabled_p ())
4195 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4197 "in-branch vector clones not supported"
4198 " on this target.\n");
4204 fndecl
= bestn
->decl
;
4205 nunits
= bestn
->simdclone
->simdlen
;
4207 ncopies
= vector_unroll_factor (vf
* group_size
, nunits
);
4209 ncopies
= vector_unroll_factor (vf
, nunits
);
4211 /* If the function isn't const, only allow it in simd loops where user
4212 has asserted that at least nunits consecutive iterations can be
4213 performed using SIMD instructions. */
4214 if ((loop
== NULL
|| maybe_lt ((unsigned) loop
->safelen
, nunits
))
4215 && gimple_vuse (stmt
))
4218 /* Sanity check: make sure that at least one copy of the vectorized stmt
4219 needs to be generated. */
4220 gcc_assert (ncopies
>= 1);
4222 if (!vec_stmt
) /* transformation not required. */
4225 for (unsigned i
= 0; i
< nargs
; ++i
)
4226 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
], arginfo
[i
].vectype
))
4228 if (dump_enabled_p ())
4229 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4230 "incompatible vector types for invariants\n");
4233 /* When the original call is pure or const but the SIMD ABI dictates
4234 an aggregate return we will have to use a virtual definition and
4235 in a loop eventually even need to add a virtual PHI. That's
4236 not straight-forward so allow to fix this up via renaming. */
4237 if (gimple_call_lhs (stmt
)
4238 && !gimple_vdef (stmt
)
4239 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn
->decl
))) == ARRAY_TYPE
)
4240 vinfo
->any_known_not_updated_vssa
= true;
4241 /* ??? For SLP code-gen we end up inserting after the last
4242 vector argument def rather than at the original call position
4243 so automagic virtual operand updating doesn't work. */
4244 if (gimple_vuse (stmt
) && slp_node
)
4245 vinfo
->any_known_not_updated_vssa
= true;
4246 simd_clone_info
.safe_push (bestn
->decl
);
4247 for (i
= 0; i
< bestn
->simdclone
->nargs
; i
++)
4249 switch (bestn
->simdclone
->args
[i
].arg_type
)
4253 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4254 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4256 simd_clone_info
.safe_grow_cleared (i
* 3 + 1, true);
4257 simd_clone_info
.safe_push (arginfo
[i
].op
);
4258 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4259 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4260 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4261 simd_clone_info
.safe_push (ls
);
4262 tree sll
= arginfo
[i
].simd_lane_linear
4263 ? boolean_true_node
: boolean_false_node
;
4264 simd_clone_info
.safe_push (sll
);
4267 case SIMD_CLONE_ARG_TYPE_MASK
:
4269 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
4270 vect_record_loop_mask (loop_vinfo
,
4271 &LOOP_VINFO_MASKS (loop_vinfo
),
4272 ncopies
, vectype
, op
);
4278 if (!bestn
->simdclone
->inbranch
&& loop_vinfo
)
4280 if (dump_enabled_p ()
4281 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
4282 dump_printf_loc (MSG_NOTE
, vect_location
,
4283 "can't use a fully-masked loop because a"
4284 " non-masked simd clone was selected.\n");
4285 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
4288 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4289 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4290 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4291 dt, slp_node, cost_vec); */
4297 if (dump_enabled_p ())
4298 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4301 scalar_dest
= gimple_call_lhs (stmt
);
4302 vec_dest
= NULL_TREE
;
4307 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4308 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4309 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4312 rtype
= TREE_TYPE (ratype
);
4316 auto_vec
<vec
<tree
> > vec_oprnds
;
4317 auto_vec
<unsigned> vec_oprnds_i
;
4318 vec_oprnds_i
.safe_grow_cleared (nargs
, true);
4321 vec_oprnds
.reserve_exact (nargs
);
4322 vect_get_slp_defs (vinfo
, slp_node
, &vec_oprnds
);
4325 vec_oprnds
.safe_grow_cleared (nargs
, true);
4326 for (j
= 0; j
< ncopies
; ++j
)
4328 poly_uint64 callee_nelements
;
4329 poly_uint64 caller_nelements
;
4330 /* Build argument list for the vectorized call. */
4332 vargs
.create (nargs
);
4336 for (i
= 0; i
< nargs
; i
++)
4338 unsigned int k
, l
, m
, o
;
4340 op
= gimple_call_arg (stmt
, i
+ masked_call_offset
);
4341 switch (bestn
->simdclone
->args
[i
].arg_type
)
4343 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4344 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4345 caller_nelements
= TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
);
4346 callee_nelements
= TYPE_VECTOR_SUBPARTS (atype
);
4347 o
= vector_unroll_factor (nunits
, callee_nelements
);
4348 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4350 if (known_lt (callee_nelements
, caller_nelements
))
4352 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4353 if (!constant_multiple_p (caller_nelements
,
4354 callee_nelements
, &k
))
4357 gcc_assert ((k
& (k
- 1)) == 0);
4361 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4362 ncopies
* o
/ k
, op
,
4364 vec_oprnds_i
[i
] = 0;
4365 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4369 vec_oprnd0
= arginfo
[i
].op
;
4370 if ((m
& (k
- 1)) == 0)
4371 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4373 arginfo
[i
].op
= vec_oprnd0
;
4375 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4377 bitsize_int ((m
& (k
- 1)) * prec
));
4379 = gimple_build_assign (make_ssa_name (atype
),
4381 vect_finish_stmt_generation (vinfo
, stmt_info
,
4383 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4387 if (!constant_multiple_p (callee_nelements
,
4388 caller_nelements
, &k
))
4390 gcc_assert ((k
& (k
- 1)) == 0);
4391 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4393 vec_alloc (ctor_elts
, k
);
4396 for (l
= 0; l
< k
; l
++)
4398 if (m
== 0 && l
== 0)
4401 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4405 vec_oprnds_i
[i
] = 0;
4406 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4409 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4410 arginfo
[i
].op
= vec_oprnd0
;
4413 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4417 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0
),
4420 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, atype
,
4423 = gimple_build_assign (make_ssa_name (atype
),
4425 vect_finish_stmt_generation (vinfo
, stmt_info
,
4427 vargs
.safe_push (gimple_get_lhs (new_stmt
));
4430 vargs
.safe_push (vec_oprnd0
);
4433 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4435 = gimple_build_assign (make_ssa_name (atype
),
4437 vect_finish_stmt_generation (vinfo
, stmt_info
,
4439 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4444 case SIMD_CLONE_ARG_TYPE_MASK
:
4445 if (bestn
->simdclone
->mask_mode
== VOIDmode
)
4447 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4448 tree elt_type
= TREE_TYPE (atype
);
4449 tree one
= fold_convert (elt_type
, integer_one_node
);
4450 tree zero
= fold_convert (elt_type
, integer_zero_node
);
4451 callee_nelements
= TYPE_VECTOR_SUBPARTS (atype
);
4452 caller_nelements
= TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
);
4453 o
= vector_unroll_factor (nunits
, callee_nelements
);
4454 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4456 if (maybe_lt (callee_nelements
, caller_nelements
))
4458 /* The mask type has fewer elements than simdlen. */
4463 else if (known_eq (callee_nelements
, caller_nelements
))
4465 /* The SIMD clone function has the same number of
4466 elements as the current function. */
4470 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4474 vec_oprnds_i
[i
] = 0;
4476 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4478 && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
4480 vec_loop_masks
*loop_masks
4481 = &LOOP_VINFO_MASKS (loop_vinfo
);
4483 = vect_get_loop_mask (loop_vinfo
, gsi
,
4484 loop_masks
, ncopies
,
4487 = prepare_vec_mask (loop_vinfo
,
4488 TREE_TYPE (loop_mask
),
4489 loop_mask
, vec_oprnd0
,
4491 loop_vinfo
->vec_cond_masked_set
.add ({ vec_oprnd0
,
4496 = build3 (VEC_COND_EXPR
, atype
, vec_oprnd0
,
4497 build_vector_from_val (atype
, one
),
4498 build_vector_from_val (atype
, zero
));
4500 = gimple_build_assign (make_ssa_name (atype
),
4502 vect_finish_stmt_generation (vinfo
, stmt_info
,
4504 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4508 /* The mask type has more elements than simdlen. */
4515 else if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4517 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4518 /* Guess the number of lanes represented by atype. */
4519 poly_uint64 atype_subparts
4520 = exact_div (bestn
->simdclone
->simdlen
,
4522 o
= vector_unroll_factor (nunits
, atype_subparts
);
4523 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4528 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4532 vec_oprnds_i
[i
] = 0;
4534 if (maybe_lt (atype_subparts
,
4535 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4537 /* The mask argument has fewer elements than the
4542 else if (known_eq (atype_subparts
,
4543 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4545 /* The vector mask argument matches the input
4546 in the number of lanes, but not necessarily
4548 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4549 tree st
= lang_hooks
.types
.type_for_mode
4550 (TYPE_MODE (TREE_TYPE (vec_oprnd0
)), 1);
4551 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, st
,
4554 = gimple_build_assign (make_ssa_name (st
),
4556 vect_finish_stmt_generation (vinfo
, stmt_info
,
4558 if (!types_compatible_p (atype
, st
))
4561 = gimple_build_assign (make_ssa_name (atype
),
4565 vect_finish_stmt_generation (vinfo
, stmt_info
,
4568 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4572 /* The mask argument has more elements than the
4582 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4583 vargs
.safe_push (op
);
4585 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4586 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4591 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4592 &stmts
, true, NULL_TREE
);
4596 edge pe
= loop_preheader_edge (loop
);
4597 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4598 gcc_assert (!new_bb
);
4600 if (arginfo
[i
].simd_lane_linear
)
4602 vargs
.safe_push (arginfo
[i
].op
);
4605 tree phi_res
= copy_ssa_name (op
);
4606 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4607 add_phi_arg (new_phi
, arginfo
[i
].op
,
4608 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4610 = POINTER_TYPE_P (TREE_TYPE (op
))
4611 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4612 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4613 ? sizetype
: TREE_TYPE (op
);
4615 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4617 tree tcst
= wide_int_to_tree (type
, cst
);
4618 tree phi_arg
= copy_ssa_name (op
);
4620 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4621 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4622 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4623 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4625 arginfo
[i
].op
= phi_res
;
4626 vargs
.safe_push (phi_res
);
4631 = POINTER_TYPE_P (TREE_TYPE (op
))
4632 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4633 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4634 ? sizetype
: TREE_TYPE (op
);
4636 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4638 tree tcst
= wide_int_to_tree (type
, cst
);
4639 new_temp
= make_ssa_name (TREE_TYPE (op
));
4641 = gimple_build_assign (new_temp
, code
,
4642 arginfo
[i
].op
, tcst
);
4643 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4644 vargs
.safe_push (new_temp
);
4647 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4648 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4649 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4650 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4651 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4652 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4658 if (masked_call_offset
== 0
4659 && bestn
->simdclone
->inbranch
4660 && bestn
->simdclone
->nargs
> nargs
)
4663 size_t mask_i
= bestn
->simdclone
->nargs
- 1;
4665 gcc_assert (bestn
->simdclone
->args
[mask_i
].arg_type
==
4666 SIMD_CLONE_ARG_TYPE_MASK
);
4668 tree masktype
= bestn
->simdclone
->args
[mask_i
].vector_type
;
4669 callee_nelements
= TYPE_VECTOR_SUBPARTS (masktype
);
4670 o
= vector_unroll_factor (nunits
, callee_nelements
);
4671 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4673 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
4675 vec_loop_masks
*loop_masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
4676 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
4677 ncopies
, vectype
, j
);
4680 mask
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
4683 if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4685 /* This means we are dealing with integer mask modes.
4686 First convert to an integer type with the same size as
4687 the current vector type. */
4688 unsigned HOST_WIDE_INT intermediate_size
4689 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask
)));
4691 build_nonstandard_integer_type (intermediate_size
, 1);
4692 mask
= build1 (VIEW_CONVERT_EXPR
, mid_int_type
, mask
);
4694 = gimple_build_assign (make_ssa_name (mid_int_type
),
4696 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
4697 /* Then zero-extend to the mask mode. */
4698 mask
= fold_build1 (NOP_EXPR
, masktype
,
4699 gimple_get_lhs (new_stmt
));
4701 else if (bestn
->simdclone
->mask_mode
== VOIDmode
)
4703 tree one
= fold_convert (TREE_TYPE (masktype
),
4705 tree zero
= fold_convert (TREE_TYPE (masktype
),
4707 mask
= build3 (VEC_COND_EXPR
, masktype
, mask
,
4708 build_vector_from_val (masktype
, one
),
4709 build_vector_from_val (masktype
, zero
));
4714 new_stmt
= gimple_build_assign (make_ssa_name (masktype
), mask
);
4715 vect_finish_stmt_generation (vinfo
, stmt_info
,
4717 mask
= gimple_assign_lhs (new_stmt
);
4718 vargs
.safe_push (mask
);
4722 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4726 || known_eq (TYPE_VECTOR_SUBPARTS (rtype
), nunits
));
4728 new_temp
= create_tmp_var (ratype
);
4729 else if (useless_type_conversion_p (vectype
, rtype
))
4730 new_temp
= make_ssa_name (vec_dest
, new_call
);
4732 new_temp
= make_ssa_name (rtype
, new_call
);
4733 gimple_call_set_lhs (new_call
, new_temp
);
4735 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4736 gimple
*new_stmt
= new_call
;
4740 if (!multiple_p (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
4743 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4744 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4745 k
= vector_unroll_factor (nunits
,
4746 TYPE_VECTOR_SUBPARTS (vectype
));
4747 gcc_assert ((k
& (k
- 1)) == 0);
4748 for (l
= 0; l
< k
; l
++)
4753 t
= build_fold_addr_expr (new_temp
);
4754 t
= build2 (MEM_REF
, vectype
, t
,
4755 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4758 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4759 bitsize_int (prec
), bitsize_int (l
* prec
));
4760 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4761 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4763 if (j
== 0 && l
== 0)
4764 *vec_stmt
= new_stmt
;
4766 SLP_TREE_VEC_DEFS (slp_node
)
4767 .quick_push (gimple_assign_lhs (new_stmt
));
4769 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4773 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4776 else if (!multiple_p (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
4779 if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype
),
4780 TYPE_VECTOR_SUBPARTS (rtype
), &k
))
4782 gcc_assert ((k
& (k
- 1)) == 0);
4783 if ((j
& (k
- 1)) == 0)
4784 vec_alloc (ret_ctor_elts
, k
);
4788 o
= vector_unroll_factor (nunits
,
4789 TYPE_VECTOR_SUBPARTS (rtype
));
4790 for (m
= 0; m
< o
; m
++)
4792 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4793 size_int (m
), NULL_TREE
, NULL_TREE
);
4794 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4796 vect_finish_stmt_generation (vinfo
, stmt_info
,
4798 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4799 gimple_assign_lhs (new_stmt
));
4801 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4804 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4805 if ((j
& (k
- 1)) != k
- 1)
4807 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4809 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4810 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4812 if ((unsigned) j
== k
- 1)
4813 *vec_stmt
= new_stmt
;
4815 SLP_TREE_VEC_DEFS (slp_node
)
4816 .quick_push (gimple_assign_lhs (new_stmt
));
4818 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4823 tree t
= build_fold_addr_expr (new_temp
);
4824 t
= build2 (MEM_REF
, vectype
, t
,
4825 build_int_cst (TREE_TYPE (t
), 0));
4826 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4827 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4828 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4830 else if (!useless_type_conversion_p (vectype
, rtype
))
4832 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
4834 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4835 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4840 *vec_stmt
= new_stmt
;
4842 SLP_TREE_VEC_DEFS (slp_node
).quick_push (gimple_get_lhs (new_stmt
));
4844 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4847 for (i
= 0; i
< nargs
; ++i
)
4849 vec
<tree
> oprndsi
= vec_oprnds
[i
];
4854 /* Mark the clone as no longer being a candidate for GC. */
4855 bestn
->gc_candidate
= false;
4857 /* The call in STMT might prevent it from being removed in dce.
4858 We however cannot remove it here, due to the way the ssa name
4859 it defines is mapped to the new definition. So just replace
4860 rhs of the statement with something harmless. */
4868 type
= TREE_TYPE (scalar_dest
);
4869 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4870 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4873 new_stmt
= gimple_build_nop ();
4874 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4875 unlink_stmt_vdef (stmt
);
4881 /* Function vect_gen_widened_results_half
4883 Create a vector stmt whose code, type, number of arguments, and result
4884 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4885 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4886 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4887 needs to be created (DECL is a function-decl of a target-builtin).
4888 STMT_INFO is the original scalar stmt that we are vectorizing. */
4891 vect_gen_widened_results_half (vec_info
*vinfo
, code_helper ch
,
4892 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4893 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4894 stmt_vec_info stmt_info
)
4899 /* Generate half of the widened result: */
4900 if (op_type
!= binary_op
)
4902 new_stmt
= vect_gimple_build (vec_dest
, ch
, vec_oprnd0
, vec_oprnd1
);
4903 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4904 gimple_set_lhs (new_stmt
, new_temp
);
4905 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4911 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4912 For multi-step conversions store the resulting vectors and call the function
4913 recursively. When NARROW_SRC_P is true, there's still a conversion after
4914 narrowing, don't store the vectors in the SLP_NODE or in vector info of
4915 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
4918 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4920 stmt_vec_info stmt_info
,
4921 vec
<tree
> &vec_dsts
,
4922 gimple_stmt_iterator
*gsi
,
4923 slp_tree slp_node
, code_helper code
,
4927 tree vop0
, vop1
, new_tmp
, vec_dest
;
4929 vec_dest
= vec_dsts
.pop ();
4931 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4933 /* Create demotion operation. */
4934 vop0
= (*vec_oprnds
)[i
];
4935 vop1
= (*vec_oprnds
)[i
+ 1];
4936 gimple
*new_stmt
= vect_gimple_build (vec_dest
, code
, vop0
, vop1
);
4937 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4938 gimple_set_lhs (new_stmt
, new_tmp
);
4939 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4940 if (multi_step_cvt
|| narrow_src_p
)
4941 /* Store the resulting vector for next recursive call,
4942 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
4943 (*vec_oprnds
)[i
/2] = new_tmp
;
4946 /* This is the last step of the conversion sequence. Store the
4947 vectors in SLP_NODE or in vector info of the scalar statement
4948 (or in STMT_VINFO_RELATED_STMT chain). */
4950 slp_node
->push_vec_def (new_stmt
);
4952 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4956 /* For multi-step demotion operations we first generate demotion operations
4957 from the source type to the intermediate types, and then combine the
4958 results (stored in VEC_OPRNDS) in demotion operation to the destination
4962 /* At each level of recursion we have half of the operands we had at the
4964 vec_oprnds
->truncate ((i
+1)/2);
4965 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
4967 stmt_info
, vec_dsts
, gsi
,
4968 slp_node
, VEC_PACK_TRUNC_EXPR
,
4972 vec_dsts
.quick_push (vec_dest
);
4976 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4977 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4978 STMT_INFO. For multi-step conversions store the resulting vectors and
4979 call the function recursively. */
4982 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
4983 vec
<tree
> *vec_oprnds0
,
4984 vec
<tree
> *vec_oprnds1
,
4985 stmt_vec_info stmt_info
, tree vec_dest
,
4986 gimple_stmt_iterator
*gsi
,
4988 code_helper ch2
, int op_type
)
4991 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4992 gimple
*new_stmt1
, *new_stmt2
;
4993 vec
<tree
> vec_tmp
= vNULL
;
4995 vec_tmp
.create (vec_oprnds0
->length () * 2);
4996 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4998 if (op_type
== binary_op
)
4999 vop1
= (*vec_oprnds1
)[i
];
5003 /* Generate the two halves of promotion operation. */
5004 new_stmt1
= vect_gen_widened_results_half (vinfo
, ch1
, vop0
, vop1
,
5005 op_type
, vec_dest
, gsi
,
5007 new_stmt2
= vect_gen_widened_results_half (vinfo
, ch2
, vop0
, vop1
,
5008 op_type
, vec_dest
, gsi
,
5010 if (is_gimple_call (new_stmt1
))
5012 new_tmp1
= gimple_call_lhs (new_stmt1
);
5013 new_tmp2
= gimple_call_lhs (new_stmt2
);
5017 new_tmp1
= gimple_assign_lhs (new_stmt1
);
5018 new_tmp2
= gimple_assign_lhs (new_stmt2
);
5021 /* Store the results for the next step. */
5022 vec_tmp
.quick_push (new_tmp1
);
5023 vec_tmp
.quick_push (new_tmp2
);
5026 vec_oprnds0
->release ();
5027 *vec_oprnds0
= vec_tmp
;
5030 /* Create vectorized promotion stmts for widening stmts using only half the
5031 potential vector size for input. */
5033 vect_create_half_widening_stmts (vec_info
*vinfo
,
5034 vec
<tree
> *vec_oprnds0
,
5035 vec
<tree
> *vec_oprnds1
,
5036 stmt_vec_info stmt_info
, tree vec_dest
,
5037 gimple_stmt_iterator
*gsi
,
5046 vec
<tree
> vec_tmp
= vNULL
;
5048 vec_tmp
.create (vec_oprnds0
->length ());
5049 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
5051 tree new_tmp1
, new_tmp2
, new_tmp3
, out_type
;
5053 gcc_assert (op_type
== binary_op
);
5054 vop1
= (*vec_oprnds1
)[i
];
5056 /* Widen the first vector input. */
5057 out_type
= TREE_TYPE (vec_dest
);
5058 new_tmp1
= make_ssa_name (out_type
);
5059 new_stmt1
= gimple_build_assign (new_tmp1
, NOP_EXPR
, vop0
);
5060 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt1
, gsi
);
5061 if (VECTOR_TYPE_P (TREE_TYPE (vop1
)))
5063 /* Widen the second vector input. */
5064 new_tmp2
= make_ssa_name (out_type
);
5065 new_stmt2
= gimple_build_assign (new_tmp2
, NOP_EXPR
, vop1
);
5066 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt2
, gsi
);
5067 /* Perform the operation. With both vector inputs widened. */
5068 new_stmt3
= vect_gimple_build (vec_dest
, code1
, new_tmp1
, new_tmp2
);
5072 /* Perform the operation. With the single vector input widened. */
5073 new_stmt3
= vect_gimple_build (vec_dest
, code1
, new_tmp1
, vop1
);
5076 new_tmp3
= make_ssa_name (vec_dest
, new_stmt3
);
5077 gimple_assign_set_lhs (new_stmt3
, new_tmp3
);
5078 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt3
, gsi
);
5080 /* Store the results for the next step. */
5081 vec_tmp
.quick_push (new_tmp3
);
5084 vec_oprnds0
->release ();
5085 *vec_oprnds0
= vec_tmp
;
5089 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5090 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5091 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5092 Return true if STMT_INFO is vectorizable in this way. */
5095 vectorizable_conversion (vec_info
*vinfo
,
5096 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5097 gimple
**vec_stmt
, slp_tree slp_node
,
5098 stmt_vector_for_cost
*cost_vec
)
5100 tree vec_dest
, cvt_op
= NULL_TREE
;
5102 tree op0
, op1
= NULL_TREE
;
5103 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5105 code_helper code
, code1
, code2
;
5106 code_helper codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
5108 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5110 poly_uint64 nunits_in
;
5111 poly_uint64 nunits_out
;
5112 tree vectype_out
, vectype_in
;
5114 tree lhs_type
, rhs_type
;
5115 /* For conversions between floating point and integer, there're 2 NARROW
5116 cases. NARROW_SRC is for FLOAT_EXPR, means
5117 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5118 This is safe when the range of the source integer can fit into the lower
5119 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5120 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5121 For other conversions, when there's narrowing, NARROW_DST is used as
5123 enum { NARROW_SRC
, NARROW_DST
, NONE
, WIDEN
} modifier
;
5124 vec
<tree
> vec_oprnds0
= vNULL
;
5125 vec
<tree
> vec_oprnds1
= vNULL
;
5127 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5128 int multi_step_cvt
= 0;
5129 vec
<tree
> interm_types
= vNULL
;
5130 tree intermediate_type
, cvt_type
= NULL_TREE
;
5132 unsigned short fltsz
;
5134 /* Is STMT a vectorizable conversion? */
5136 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5139 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5143 gimple
* stmt
= stmt_info
->stmt
;
5144 if (!(is_gimple_assign (stmt
) || is_gimple_call (stmt
)))
5147 if (gimple_get_lhs (stmt
) == NULL_TREE
5148 || TREE_CODE (gimple_get_lhs (stmt
)) != SSA_NAME
)
5151 if (TREE_CODE (gimple_get_lhs (stmt
)) != SSA_NAME
)
5154 if (is_gimple_assign (stmt
))
5156 code
= gimple_assign_rhs_code (stmt
);
5157 op_type
= TREE_CODE_LENGTH ((tree_code
) code
);
5159 else if (gimple_call_internal_p (stmt
))
5161 code
= gimple_call_internal_fn (stmt
);
5162 op_type
= gimple_call_num_args (stmt
);
5167 bool widen_arith
= (code
== WIDEN_MULT_EXPR
5168 || code
== WIDEN_LSHIFT_EXPR
5169 || widening_fn_p (code
));
5172 && !CONVERT_EXPR_CODE_P (code
)
5173 && code
!= FIX_TRUNC_EXPR
5174 && code
!= FLOAT_EXPR
)
5177 /* Check types of lhs and rhs. */
5178 scalar_dest
= gimple_get_lhs (stmt
);
5179 lhs_type
= TREE_TYPE (scalar_dest
);
5180 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5182 /* Check the operands of the operation. */
5183 slp_tree slp_op0
, slp_op1
= NULL
;
5184 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5185 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
5187 if (dump_enabled_p ())
5188 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5189 "use not simple.\n");
5193 rhs_type
= TREE_TYPE (op0
);
5194 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
5195 && !((INTEGRAL_TYPE_P (lhs_type
)
5196 && INTEGRAL_TYPE_P (rhs_type
))
5197 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
5198 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
5201 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5202 && ((INTEGRAL_TYPE_P (lhs_type
)
5203 && !type_has_mode_precision_p (lhs_type
))
5204 || (INTEGRAL_TYPE_P (rhs_type
)
5205 && !type_has_mode_precision_p (rhs_type
))))
5207 if (dump_enabled_p ())
5208 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5209 "type conversion to/from bit-precision unsupported."
5214 if (op_type
== binary_op
)
5216 gcc_assert (code
== WIDEN_MULT_EXPR
5217 || code
== WIDEN_LSHIFT_EXPR
5218 || widening_fn_p (code
));
5220 op1
= is_gimple_assign (stmt
) ? gimple_assign_rhs2 (stmt
) :
5221 gimple_call_arg (stmt
, 0);
5223 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
5224 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
5226 if (dump_enabled_p ())
5227 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5228 "use not simple.\n");
5231 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5234 vectype_in
= vectype1_in
;
5237 /* If op0 is an external or constant def, infer the vector type
5238 from the scalar type. */
5240 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
5242 gcc_assert (vectype_in
);
5245 if (dump_enabled_p ())
5246 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5247 "no vectype for scalar type %T\n", rhs_type
);
5252 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5253 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5255 if (dump_enabled_p ())
5256 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5257 "can't convert between boolean and non "
5258 "boolean vectors %T\n", rhs_type
);
5263 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
5264 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5265 if (known_eq (nunits_out
, nunits_in
))
5270 else if (multiple_p (nunits_out
, nunits_in
))
5271 modifier
= NARROW_DST
;
5274 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
5278 /* Multiple types in SLP are handled by creating the appropriate number of
5279 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5283 else if (modifier
== NARROW_DST
)
5284 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
5286 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
5288 /* Sanity check: make sure that at least one copy of the vectorized stmt
5289 needs to be generated. */
5290 gcc_assert (ncopies
>= 1);
5292 bool found_mode
= false;
5293 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
5294 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
5295 opt_scalar_mode rhs_mode_iter
;
5297 /* Supportable by target? */
5301 if (code
!= FIX_TRUNC_EXPR
5302 && code
!= FLOAT_EXPR
5303 && !CONVERT_EXPR_CODE_P (code
))
5305 gcc_assert (code
.is_tree_code ());
5306 if (supportable_convert_operation ((tree_code
) code
, vectype_out
,
5313 /* For conversions between float and integer types try whether
5314 we can use intermediate signed integer types to support the
5316 if (GET_MODE_SIZE (lhs_mode
) != GET_MODE_SIZE (rhs_mode
)
5317 && (code
== FLOAT_EXPR
||
5318 (code
== FIX_TRUNC_EXPR
&& !flag_trapping_math
)))
5320 bool demotion
= GET_MODE_SIZE (rhs_mode
) > GET_MODE_SIZE (lhs_mode
);
5321 bool float_expr_p
= code
== FLOAT_EXPR
;
5322 unsigned short target_size
;
5323 scalar_mode intermediate_mode
;
5326 intermediate_mode
= lhs_mode
;
5327 target_size
= GET_MODE_SIZE (rhs_mode
);
5331 target_size
= GET_MODE_SIZE (lhs_mode
);
5332 if (!int_mode_for_size
5333 (GET_MODE_BITSIZE (rhs_mode
), 0).exists (&intermediate_mode
))
5336 code1
= float_expr_p
? code
: NOP_EXPR
;
5337 codecvt1
= float_expr_p
? NOP_EXPR
: code
;
5338 opt_scalar_mode mode_iter
;
5339 FOR_EACH_2XWIDER_MODE (mode_iter
, intermediate_mode
)
5341 intermediate_mode
= mode_iter
.require ();
5343 if (GET_MODE_SIZE (intermediate_mode
) > target_size
)
5346 scalar_mode cvt_mode
;
5347 if (!int_mode_for_size
5348 (GET_MODE_BITSIZE (intermediate_mode
), 0).exists (&cvt_mode
))
5351 cvt_type
= build_nonstandard_integer_type
5352 (GET_MODE_BITSIZE (cvt_mode
), 0);
5354 /* Check if the intermediate type can hold OP0's range.
5355 When converting from float to integer this is not necessary
5356 because values that do not fit the (smaller) target type are
5357 unspecified anyway. */
5358 if (demotion
&& float_expr_p
)
5360 wide_int op_min_value
, op_max_value
;
5361 if (!vect_get_range_info (op0
, &op_min_value
, &op_max_value
))
5364 if (cvt_type
== NULL_TREE
5365 || (wi::min_precision (op_max_value
, SIGNED
)
5366 > TYPE_PRECISION (cvt_type
))
5367 || (wi::min_precision (op_min_value
, SIGNED
)
5368 > TYPE_PRECISION (cvt_type
)))
5372 cvt_type
= get_vectype_for_scalar_type (vinfo
, cvt_type
, slp_node
);
5373 /* This should only happened for SLP as long as loop vectorizer
5374 only supports same-sized vector. */
5375 if (cvt_type
== NULL_TREE
5376 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type
), nunits_in
)
5377 || !supportable_convert_operation ((tree_code
) code1
,
5380 || !supportable_convert_operation ((tree_code
) codecvt1
,
5392 interm_types
.safe_push (cvt_type
);
5393 cvt_type
= NULL_TREE
;
5401 if (dump_enabled_p ())
5402 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5403 "conversion not supported by target.\n");
5407 if (known_eq (nunits_in
, nunits_out
))
5409 if (!(code
.is_tree_code ()
5410 && supportable_half_widening_operation ((tree_code
) code
,
5411 vectype_out
, vectype_in
,
5415 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5418 if (supportable_widening_operation (vinfo
, code
, stmt_info
,
5419 vectype_out
, vectype_in
, &code1
,
5420 &code2
, &multi_step_cvt
,
5423 /* Binary widening operation can only be supported directly by the
5425 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5429 if (code
!= FLOAT_EXPR
5430 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
5433 fltsz
= GET_MODE_SIZE (lhs_mode
);
5434 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
5436 rhs_mode
= rhs_mode_iter
.require ();
5437 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
5441 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5442 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5443 if (cvt_type
== NULL_TREE
)
5446 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5449 gcc_assert (code
.is_tree_code ());
5450 if (!supportable_convert_operation ((tree_code
) code
, vectype_out
,
5455 else if (!supportable_widening_operation (vinfo
, code
,
5456 stmt_info
, vectype_out
,
5457 cvt_type
, &codecvt1
,
5458 &codecvt2
, &multi_step_cvt
,
5462 gcc_assert (multi_step_cvt
== 0);
5464 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
5467 &code2
, &multi_step_cvt
,
5478 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5479 codecvt2
= ERROR_MARK
;
5483 interm_types
.safe_push (cvt_type
);
5484 cvt_type
= NULL_TREE
;
5489 gcc_assert (op_type
== unary_op
);
5490 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
5491 &code1
, &multi_step_cvt
,
5495 if (GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
5498 if (code
== FIX_TRUNC_EXPR
)
5501 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5502 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5503 if (cvt_type
== NULL_TREE
)
5505 if (supportable_convert_operation ((tree_code
) code
, cvt_type
, vectype_in
,
5510 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
5511 &code1
, &multi_step_cvt
,
5515 /* If op0 can be represented with low precision integer,
5516 truncate it to cvt_type and the do FLOAT_EXPR. */
5517 else if (code
== FLOAT_EXPR
)
5519 wide_int op_min_value
, op_max_value
;
5520 if (!vect_get_range_info (op0
, &op_min_value
, &op_max_value
))
5524 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode
), 0);
5525 if (cvt_type
== NULL_TREE
5526 || (wi::min_precision (op_max_value
, SIGNED
)
5527 > TYPE_PRECISION (cvt_type
))
5528 || (wi::min_precision (op_min_value
, SIGNED
)
5529 > TYPE_PRECISION (cvt_type
)))
5532 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_out
);
5533 if (cvt_type
== NULL_TREE
)
5535 if (!supportable_narrowing_operation (NOP_EXPR
, cvt_type
, vectype_in
,
5536 &code1
, &multi_step_cvt
,
5539 if (supportable_convert_operation ((tree_code
) code
, vectype_out
,
5543 modifier
= NARROW_SRC
;
5554 if (!vec_stmt
) /* transformation not required. */
5557 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
5558 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
5560 if (dump_enabled_p ())
5561 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5562 "incompatible vector types for invariants\n");
5565 DUMP_VECT_SCOPE ("vectorizable_conversion");
5566 if (modifier
== NONE
)
5568 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
5569 vect_model_simple_cost (vinfo
, stmt_info
,
5570 ncopies
* (1 + multi_step_cvt
),
5571 dt
, ndts
, slp_node
, cost_vec
);
5573 else if (modifier
== NARROW_SRC
|| modifier
== NARROW_DST
)
5575 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
5576 /* The final packing step produces one vector result per copy. */
5577 unsigned int nvectors
5578 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
5579 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5580 multi_step_cvt
, cost_vec
,
5585 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5586 /* The initial unpacking step produces two vector results
5587 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5588 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5589 unsigned int nvectors
5591 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5593 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5594 multi_step_cvt
, cost_vec
,
5597 interm_types
.release ();
5602 if (dump_enabled_p ())
5603 dump_printf_loc (MSG_NOTE
, vect_location
,
5604 "transform conversion. ncopies = %d.\n", ncopies
);
5606 if (op_type
== binary_op
)
5608 if (CONSTANT_CLASS_P (op0
))
5609 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5610 else if (CONSTANT_CLASS_P (op1
))
5611 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5614 /* In case of multi-step conversion, we first generate conversion operations
5615 to the intermediate types, and then from that types to the final one.
5616 We create vector destinations for the intermediate type (TYPES) received
5617 from supportable_*_operation, and store them in the correct order
5618 for future use in vect_create_vectorized_*_stmts (). */
5619 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5620 bool widen_or_narrow_float_p
5621 = cvt_type
&& (modifier
== WIDEN
|| modifier
== NARROW_SRC
);
5622 vec_dest
= vect_create_destination_var (scalar_dest
,
5623 widen_or_narrow_float_p
5624 ? cvt_type
: vectype_out
);
5625 vec_dsts
.quick_push (vec_dest
);
5629 for (i
= interm_types
.length () - 1;
5630 interm_types
.iterate (i
, &intermediate_type
); i
--)
5632 vec_dest
= vect_create_destination_var (scalar_dest
,
5634 vec_dsts
.quick_push (vec_dest
);
5639 vec_dest
= vect_create_destination_var (scalar_dest
,
5640 widen_or_narrow_float_p
5641 ? vectype_out
: cvt_type
);
5646 if (modifier
== WIDEN
)
5648 else if (modifier
== NARROW_SRC
|| modifier
== NARROW_DST
)
5651 ninputs
= vect_pow2 (multi_step_cvt
);
5659 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5661 /* vec_dest is intermediate type operand when multi_step_cvt. */
5665 vec_dest
= vec_dsts
[0];
5668 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5670 /* Arguments are ready, create the new vector stmt. */
5674 gcc_assert (multi_step_cvt
== 1);
5675 new_stmt
= vect_gimple_build (cvt_op
, codecvt1
, vop0
);
5676 new_temp
= make_ssa_name (cvt_op
, new_stmt
);
5677 gimple_assign_set_lhs (new_stmt
, new_temp
);
5678 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5681 new_stmt
= vect_gimple_build (vec_dest
, code1
, vop0
);
5682 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5683 gimple_set_lhs (new_stmt
, new_temp
);
5684 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5687 slp_node
->push_vec_def (new_stmt
);
5689 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5694 /* In case the vectorization factor (VF) is bigger than the number
5695 of elements that we can fit in a vectype (nunits), we have to
5696 generate more than one vector stmt - i.e - we need to "unroll"
5697 the vector stmt by a factor VF/nunits. */
5698 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5700 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
5702 if (code
== WIDEN_LSHIFT_EXPR
)
5704 int oprnds_size
= vec_oprnds0
.length ();
5705 vec_oprnds1
.create (oprnds_size
);
5706 for (i
= 0; i
< oprnds_size
; ++i
)
5707 vec_oprnds1
.quick_push (op1
);
5709 /* Arguments are ready. Create the new vector stmts. */
5710 for (i
= multi_step_cvt
; i
>= 0; i
--)
5712 tree this_dest
= vec_dsts
[i
];
5713 code_helper c1
= code1
, c2
= code2
;
5714 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5719 if (known_eq (nunits_out
, nunits_in
))
5720 vect_create_half_widening_stmts (vinfo
, &vec_oprnds0
, &vec_oprnds1
,
5721 stmt_info
, this_dest
, gsi
, c1
,
5724 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5725 &vec_oprnds1
, stmt_info
,
5730 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5735 new_temp
= make_ssa_name (vec_dest
);
5736 new_stmt
= vect_gimple_build (new_temp
, codecvt1
, vop0
);
5737 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5740 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
5743 slp_node
->push_vec_def (new_stmt
);
5745 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5751 /* In case the vectorization factor (VF) is bigger than the number
5752 of elements that we can fit in a vectype (nunits), we have to
5753 generate more than one vector stmt - i.e - we need to "unroll"
5754 the vector stmt by a factor VF/nunits. */
5755 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5757 /* Arguments are ready. Create the new vector stmts. */
5758 if (cvt_type
&& modifier
== NARROW_DST
)
5759 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5761 new_temp
= make_ssa_name (vec_dest
);
5762 gimple
*new_stmt
= vect_gimple_build (new_temp
, codecvt1
, vop0
);
5763 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5764 vec_oprnds0
[i
] = new_temp
;
5767 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5769 stmt_info
, vec_dsts
, gsi
,
5771 modifier
== NARROW_SRC
);
5772 /* After demoting op0 to cvt_type, convert it to dest. */
5773 if (cvt_type
&& code
== FLOAT_EXPR
)
5775 for (unsigned int i
= 0; i
!= vec_oprnds0
.length() / 2; i
++)
5777 /* Arguments are ready, create the new vector stmt. */
5778 gcc_assert (TREE_CODE_LENGTH ((tree_code
) codecvt1
) == unary_op
);
5780 = vect_gimple_build (vec_dest
, codecvt1
, vec_oprnds0
[i
]);
5781 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5782 gimple_set_lhs (new_stmt
, new_temp
);
5783 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5785 /* This is the last step of the conversion sequence. Store the
5786 vectors in SLP_NODE or in vector info of the scalar statement
5787 (or in STMT_VINFO_RELATED_STMT chain). */
5789 slp_node
->push_vec_def (new_stmt
);
5791 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5797 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5799 vec_oprnds0
.release ();
5800 vec_oprnds1
.release ();
5801 interm_types
.release ();
5806 /* Return true if we can assume from the scalar form of STMT_INFO that
5807 neither the scalar nor the vector forms will generate code. STMT_INFO
5808 is known not to involve a data reference. */
5811 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5813 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5817 tree lhs
= gimple_assign_lhs (stmt
);
5818 tree_code code
= gimple_assign_rhs_code (stmt
);
5819 tree rhs
= gimple_assign_rhs1 (stmt
);
5821 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5824 if (CONVERT_EXPR_CODE_P (code
))
5825 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5830 /* Function vectorizable_assignment.
5832 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5833 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5834 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5835 Return true if STMT_INFO is vectorizable in this way. */
5838 vectorizable_assignment (vec_info
*vinfo
,
5839 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5840 gimple
**vec_stmt
, slp_tree slp_node
,
5841 stmt_vector_for_cost
*cost_vec
)
5846 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5848 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5852 vec
<tree
> vec_oprnds
= vNULL
;
5854 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5855 enum tree_code code
;
5858 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5861 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5865 /* Is vectorizable assignment? */
5866 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5870 scalar_dest
= gimple_assign_lhs (stmt
);
5871 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5874 if (STMT_VINFO_DATA_REF (stmt_info
))
5877 code
= gimple_assign_rhs_code (stmt
);
5878 if (!(gimple_assign_single_p (stmt
)
5879 || code
== PAREN_EXPR
5880 || CONVERT_EXPR_CODE_P (code
)))
5883 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5884 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5886 /* Multiple types in SLP are handled by creating the appropriate number of
5887 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5892 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5894 gcc_assert (ncopies
>= 1);
5897 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5898 &dt
[0], &vectype_in
))
5900 if (dump_enabled_p ())
5901 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5902 "use not simple.\n");
5906 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5908 /* We can handle NOP_EXPR conversions that do not change the number
5909 of elements or the vector size. */
5910 if ((CONVERT_EXPR_CODE_P (code
)
5911 || code
== VIEW_CONVERT_EXPR
)
5913 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5914 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5915 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5918 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5920 if (dump_enabled_p ())
5921 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5922 "can't convert between boolean and non "
5923 "boolean vectors %T\n", TREE_TYPE (op
));
5928 /* We do not handle bit-precision changes. */
5929 if ((CONVERT_EXPR_CODE_P (code
)
5930 || code
== VIEW_CONVERT_EXPR
)
5931 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5932 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5933 || (INTEGRAL_TYPE_P (TREE_TYPE (op
))
5934 && !type_has_mode_precision_p (TREE_TYPE (op
))))
5935 /* But a conversion that does not change the bit-pattern is ok. */
5936 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5937 && INTEGRAL_TYPE_P (TREE_TYPE (op
))
5938 && (((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5939 > TYPE_PRECISION (TREE_TYPE (op
)))
5940 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5941 || (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5942 == TYPE_PRECISION (TREE_TYPE (op
))))))
5944 if (dump_enabled_p ())
5945 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5946 "type conversion to/from bit-precision "
5951 if (!vec_stmt
) /* transformation not required. */
5954 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
5956 if (dump_enabled_p ())
5957 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5958 "incompatible vector types for invariants\n");
5961 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5962 DUMP_VECT_SCOPE ("vectorizable_assignment");
5963 if (!vect_nop_conversion_p (stmt_info
))
5964 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5970 if (dump_enabled_p ())
5971 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5974 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5977 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
5979 /* Arguments are ready. create the new vector stmt. */
5980 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5982 if (CONVERT_EXPR_CODE_P (code
)
5983 || code
== VIEW_CONVERT_EXPR
)
5984 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5985 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5986 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5987 gimple_assign_set_lhs (new_stmt
, new_temp
);
5988 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5990 slp_node
->push_vec_def (new_stmt
);
5992 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5995 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5997 vec_oprnds
.release ();
6002 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6003 either as shift by a scalar or by a vector. */
6006 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
6009 machine_mode vec_mode
;
6014 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
6018 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
6020 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
6022 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
6024 || (optab_handler (optab
, TYPE_MODE (vectype
))
6025 == CODE_FOR_nothing
))
6029 vec_mode
= TYPE_MODE (vectype
);
6030 icode
= (int) optab_handler (optab
, vec_mode
);
6031 if (icode
== CODE_FOR_nothing
)
6038 /* Function vectorizable_shift.
6040 Check if STMT_INFO performs a shift operation that can be vectorized.
6041 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6042 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6043 Return true if STMT_INFO is vectorizable in this way. */
6046 vectorizable_shift (vec_info
*vinfo
,
6047 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6048 gimple
**vec_stmt
, slp_tree slp_node
,
6049 stmt_vector_for_cost
*cost_vec
)
6053 tree op0
, op1
= NULL
;
6054 tree vec_oprnd1
= NULL_TREE
;
6056 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6057 enum tree_code code
;
6058 machine_mode vec_mode
;
6062 machine_mode optab_op2_mode
;
6063 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
6065 poly_uint64 nunits_in
;
6066 poly_uint64 nunits_out
;
6071 vec
<tree
> vec_oprnds0
= vNULL
;
6072 vec
<tree
> vec_oprnds1
= vNULL
;
6075 bool scalar_shift_arg
= true;
6076 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6077 bool incompatible_op1_vectype_p
= false;
6079 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6082 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6083 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
6087 /* Is STMT a vectorizable binary/unary operation? */
6088 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6092 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6095 code
= gimple_assign_rhs_code (stmt
);
6097 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
6098 || code
== RROTATE_EXPR
))
6101 scalar_dest
= gimple_assign_lhs (stmt
);
6102 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6103 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
6105 if (dump_enabled_p ())
6106 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6107 "bit-precision shifts not supported.\n");
6112 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6113 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6115 if (dump_enabled_p ())
6116 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6117 "use not simple.\n");
6120 /* If op0 is an external or constant def, infer the vector type
6121 from the scalar type. */
6123 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
6125 gcc_assert (vectype
);
6128 if (dump_enabled_p ())
6129 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6130 "no vectype for scalar type\n");
6134 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6135 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6136 if (maybe_ne (nunits_out
, nunits_in
))
6139 stmt_vec_info op1_def_stmt_info
;
6141 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
6142 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
6144 if (dump_enabled_p ())
6145 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6146 "use not simple.\n");
6150 /* Multiple types in SLP are handled by creating the appropriate number of
6151 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6156 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6158 gcc_assert (ncopies
>= 1);
6160 /* Determine whether the shift amount is a vector, or scalar. If the
6161 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6163 if ((dt
[1] == vect_internal_def
6164 || dt
[1] == vect_induction_def
6165 || dt
[1] == vect_nested_cycle
)
6167 scalar_shift_arg
= false;
6168 else if (dt
[1] == vect_constant_def
6169 || dt
[1] == vect_external_def
6170 || dt
[1] == vect_internal_def
)
6172 /* In SLP, need to check whether the shift count is the same,
6173 in loops if it is a constant or invariant, it is always
6177 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
6178 stmt_vec_info slpstmt_info
;
6180 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
6182 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
6183 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
6184 scalar_shift_arg
= false;
6187 /* For internal SLP defs we have to make sure we see scalar stmts
6188 for all vector elements.
6189 ??? For different vectors we could resort to a different
6190 scalar shift operand but code-generation below simply always
6192 if (dt
[1] == vect_internal_def
6193 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
6195 scalar_shift_arg
= false;
6198 /* If the shift amount is computed by a pattern stmt we cannot
6199 use the scalar amount directly thus give up and use a vector
6201 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
6202 scalar_shift_arg
= false;
6206 if (dump_enabled_p ())
6207 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6208 "operand mode requires invariant argument.\n");
6212 /* Vector shifted by vector. */
6213 bool was_scalar_shift_arg
= scalar_shift_arg
;
6214 if (!scalar_shift_arg
)
6216 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
6217 if (dump_enabled_p ())
6218 dump_printf_loc (MSG_NOTE
, vect_location
,
6219 "vector/vector shift/rotate found.\n");
6222 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
6224 incompatible_op1_vectype_p
6225 = (op1_vectype
== NULL_TREE
6226 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
6227 TYPE_VECTOR_SUBPARTS (vectype
))
6228 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
6229 if (incompatible_op1_vectype_p
6231 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
6232 || slp_op1
->refcnt
!= 1))
6234 if (dump_enabled_p ())
6235 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6236 "unusable type for last operand in"
6237 " vector/vector shift/rotate.\n");
6241 /* See if the machine has a vector shifted by scalar insn and if not
6242 then see if it has a vector shifted by vector insn. */
6245 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
6247 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
6249 if (dump_enabled_p ())
6250 dump_printf_loc (MSG_NOTE
, vect_location
,
6251 "vector/scalar shift/rotate found.\n");
6255 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
6257 && (optab_handler (optab
, TYPE_MODE (vectype
))
6258 != CODE_FOR_nothing
))
6260 scalar_shift_arg
= false;
6262 if (dump_enabled_p ())
6263 dump_printf_loc (MSG_NOTE
, vect_location
,
6264 "vector/vector shift/rotate found.\n");
6267 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
6271 /* Unlike the other binary operators, shifts/rotates have
6272 the rhs being int, instead of the same type as the lhs,
6273 so make sure the scalar is the right type if we are
6274 dealing with vectors of long long/long/short/char. */
6275 incompatible_op1_vectype_p
6277 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
6279 if (incompatible_op1_vectype_p
6280 && dt
[1] == vect_internal_def
)
6282 if (dump_enabled_p ())
6283 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6284 "unusable type for last operand in"
6285 " vector/vector shift/rotate.\n");
6292 /* Supportable by target? */
6295 if (dump_enabled_p ())
6296 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6300 vec_mode
= TYPE_MODE (vectype
);
6301 icode
= (int) optab_handler (optab
, vec_mode
);
6302 if (icode
== CODE_FOR_nothing
)
6304 if (dump_enabled_p ())
6305 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6306 "op not supported by target.\n");
6309 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6310 if (vect_emulated_vector_p (vectype
))
6313 if (!vec_stmt
) /* transformation not required. */
6316 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6317 || ((!scalar_shift_arg
|| dt
[1] == vect_internal_def
)
6318 && (!incompatible_op1_vectype_p
6319 || dt
[1] == vect_constant_def
)
6320 && !vect_maybe_update_slp_op_vectype
6322 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
6324 if (dump_enabled_p ())
6325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6326 "incompatible vector types for invariants\n");
6329 /* Now adjust the constant shift amount in place. */
6331 && incompatible_op1_vectype_p
6332 && dt
[1] == vect_constant_def
)
6334 for (unsigned i
= 0;
6335 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
6337 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
6338 = fold_convert (TREE_TYPE (vectype
),
6339 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
6340 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
6344 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
6345 DUMP_VECT_SCOPE ("vectorizable_shift");
6346 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
6347 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
6353 if (dump_enabled_p ())
6354 dump_printf_loc (MSG_NOTE
, vect_location
,
6355 "transform binary/unary operation.\n");
6357 if (incompatible_op1_vectype_p
&& !slp_node
)
6359 gcc_assert (!scalar_shift_arg
&& was_scalar_shift_arg
);
6360 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
6361 if (dt
[1] != vect_constant_def
)
6362 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
6363 TREE_TYPE (vectype
), NULL
);
6367 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6369 if (scalar_shift_arg
&& dt
[1] != vect_internal_def
)
6371 /* Vector shl and shr insn patterns can be defined with scalar
6372 operand 2 (shift operand). In this case, use constant or loop
6373 invariant op1 directly, without extending it to vector mode
6375 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
6376 if (!VECTOR_MODE_P (optab_op2_mode
))
6378 if (dump_enabled_p ())
6379 dump_printf_loc (MSG_NOTE
, vect_location
,
6380 "operand 1 using scalar mode.\n");
6382 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
6383 vec_oprnds1
.quick_push (vec_oprnd1
);
6384 /* Store vec_oprnd1 for every vector stmt to be created.
6385 We check during the analysis that all the shift arguments
6387 TODO: Allow different constants for different vector
6388 stmts generated for an SLP instance. */
6390 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
6391 vec_oprnds1
.quick_push (vec_oprnd1
);
6394 else if (!scalar_shift_arg
&& slp_node
&& incompatible_op1_vectype_p
)
6396 if (was_scalar_shift_arg
)
6398 /* If the argument was the same in all lanes create
6399 the correctly typed vector shift amount directly. */
6400 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
6401 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
6402 !loop_vinfo
? gsi
: NULL
);
6403 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
6404 !loop_vinfo
? gsi
: NULL
);
6405 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
6406 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
6407 vec_oprnds1
.quick_push (vec_oprnd1
);
6409 else if (dt
[1] == vect_constant_def
)
6410 /* The constant shift amount has been adjusted in place. */
6413 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
6416 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6417 (a special case for certain kind of vector shifts); otherwise,
6418 operand 1 should be of a vector type (the usual case). */
6419 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6421 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
6423 /* Arguments are ready. Create the new vector stmt. */
6424 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6426 /* For internal defs where we need to use a scalar shift arg
6427 extract the first lane. */
6428 if (scalar_shift_arg
&& dt
[1] == vect_internal_def
)
6430 vop1
= vec_oprnds1
[0];
6431 new_temp
= make_ssa_name (TREE_TYPE (TREE_TYPE (vop1
)));
6433 = gimple_build_assign (new_temp
,
6434 build3 (BIT_FIELD_REF
, TREE_TYPE (new_temp
),
6436 TYPE_SIZE (TREE_TYPE (new_temp
)),
6437 bitsize_zero_node
));
6438 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6442 vop1
= vec_oprnds1
[i
];
6443 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
6444 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6445 gimple_assign_set_lhs (new_stmt
, new_temp
);
6446 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6448 slp_node
->push_vec_def (new_stmt
);
6450 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6454 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6456 vec_oprnds0
.release ();
6457 vec_oprnds1
.release ();
6462 /* Function vectorizable_operation.
6464 Check if STMT_INFO performs a binary, unary or ternary operation that can
6466 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6467 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6468 Return true if STMT_INFO is vectorizable in this way. */
6471 vectorizable_operation (vec_info
*vinfo
,
6472 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6473 gimple
**vec_stmt
, slp_tree slp_node
,
6474 stmt_vector_for_cost
*cost_vec
)
6478 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
6480 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6481 enum tree_code code
, orig_code
;
6482 machine_mode vec_mode
;
6486 bool target_support_p
;
6487 enum vect_def_type dt
[3]
6488 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
6490 poly_uint64 nunits_in
;
6491 poly_uint64 nunits_out
;
6493 int ncopies
, vec_num
;
6495 vec
<tree
> vec_oprnds0
= vNULL
;
6496 vec
<tree
> vec_oprnds1
= vNULL
;
6497 vec
<tree
> vec_oprnds2
= vNULL
;
6498 tree vop0
, vop1
, vop2
;
6499 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6501 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6504 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6508 /* Is STMT a vectorizable binary/unary operation? */
6509 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6513 /* Loads and stores are handled in vectorizable_{load,store}. */
6514 if (STMT_VINFO_DATA_REF (stmt_info
))
6517 orig_code
= code
= gimple_assign_rhs_code (stmt
);
6519 /* Shifts are handled in vectorizable_shift. */
6520 if (code
== LSHIFT_EXPR
6521 || code
== RSHIFT_EXPR
6522 || code
== LROTATE_EXPR
6523 || code
== RROTATE_EXPR
)
6526 /* Comparisons are handled in vectorizable_comparison. */
6527 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
6530 /* Conditions are handled in vectorizable_condition. */
6531 if (code
== COND_EXPR
)
6534 /* For pointer addition and subtraction, we should use the normal
6535 plus and minus for the vector operation. */
6536 if (code
== POINTER_PLUS_EXPR
)
6538 if (code
== POINTER_DIFF_EXPR
)
6541 /* Support only unary or binary operations. */
6542 op_type
= TREE_CODE_LENGTH (code
);
6543 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
6545 if (dump_enabled_p ())
6546 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6547 "num. args = %d (not unary/binary/ternary op).\n",
6552 scalar_dest
= gimple_assign_lhs (stmt
);
6553 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6555 /* Most operations cannot handle bit-precision types without extra
6557 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
6559 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
6560 /* Exception are bitwise binary operations. */
6561 && code
!= BIT_IOR_EXPR
6562 && code
!= BIT_XOR_EXPR
6563 && code
!= BIT_AND_EXPR
)
6565 if (dump_enabled_p ())
6566 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6567 "bit-precision arithmetic not supported.\n");
6572 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6573 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6575 if (dump_enabled_p ())
6576 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6577 "use not simple.\n");
6580 bool is_invariant
= (dt
[0] == vect_external_def
6581 || dt
[0] == vect_constant_def
);
6582 /* If op0 is an external or constant def, infer the vector type
6583 from the scalar type. */
6586 /* For boolean type we cannot determine vectype by
6587 invariant value (don't know whether it is a vector
6588 of booleans or vector of integers). We use output
6589 vectype because operations on boolean don't change
6591 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6593 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6595 if (dump_enabled_p ())
6596 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6597 "not supported operation on bool value.\n");
6600 vectype
= vectype_out
;
6603 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6607 gcc_assert (vectype
);
6610 if (dump_enabled_p ())
6611 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6612 "no vectype for scalar type %T\n",
6618 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6619 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6620 if (maybe_ne (nunits_out
, nunits_in
))
6623 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6624 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6625 if (op_type
== binary_op
|| op_type
== ternary_op
)
6627 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6628 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6630 if (dump_enabled_p ())
6631 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6632 "use not simple.\n");
6635 is_invariant
&= (dt
[1] == vect_external_def
6636 || dt
[1] == vect_constant_def
);
6638 && maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype2
)))
6641 if (op_type
== ternary_op
)
6643 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6644 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6646 if (dump_enabled_p ())
6647 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6648 "use not simple.\n");
6651 is_invariant
&= (dt
[2] == vect_external_def
6652 || dt
[2] == vect_constant_def
);
6654 && maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype3
)))
6658 /* Multiple types in SLP are handled by creating the appropriate number of
6659 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6664 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6668 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6672 gcc_assert (ncopies
>= 1);
6674 /* Reject attempts to combine mask types with nonmask types, e.g. if
6675 we have an AND between a (nonmask) boolean loaded from memory and
6676 a (mask) boolean result of a comparison.
6678 TODO: We could easily fix these cases up using pattern statements. */
6679 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6680 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6681 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6683 if (dump_enabled_p ())
6684 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6685 "mixed mask and nonmask vector types\n");
6689 /* Supportable by target? */
6691 vec_mode
= TYPE_MODE (vectype
);
6692 if (code
== MULT_HIGHPART_EXPR
)
6693 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6696 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6699 if (dump_enabled_p ())
6700 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6704 target_support_p
= (optab_handler (optab
, vec_mode
) != CODE_FOR_nothing
6705 || optab_libfunc (optab
, vec_mode
));
6708 bool using_emulated_vectors_p
= vect_emulated_vector_p (vectype
);
6709 if (!target_support_p
|| using_emulated_vectors_p
)
6711 if (dump_enabled_p ())
6712 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6713 "op not supported by target.\n");
6714 /* When vec_mode is not a vector mode and we verified ops we
6715 do not have to lower like AND are natively supported let
6716 those through even when the mode isn't word_mode. For
6717 ops we have to lower the lowering code assumes we are
6718 dealing with word_mode. */
6719 if ((((code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== NEGATE_EXPR
)
6720 || !target_support_p
)
6721 && maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
))
6722 /* Check only during analysis. */
6723 || (!vec_stmt
&& !vect_can_vectorize_without_simd_p (code
)))
6725 if (dump_enabled_p ())
6726 dump_printf (MSG_NOTE
, "using word mode not possible.\n");
6729 if (dump_enabled_p ())
6730 dump_printf_loc (MSG_NOTE
, vect_location
,
6731 "proceeding using word mode.\n");
6732 using_emulated_vectors_p
= true;
6735 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6736 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6737 vec_loop_lens
*lens
= (loop_vinfo
? &LOOP_VINFO_LENS (loop_vinfo
) : NULL
);
6738 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6739 internal_fn cond_len_fn
= get_conditional_len_internal_fn (code
);
6741 /* If operating on inactive elements could generate spurious traps,
6742 we need to restrict the operation to active lanes. Note that this
6743 specifically doesn't apply to unhoisted invariants, since they
6744 operate on the same value for every lane.
6746 Similarly, if this operation is part of a reduction, a fully-masked
6747 loop should only change the active lanes of the reduction chain,
6748 keeping the inactive lanes as-is. */
6749 bool mask_out_inactive
= ((!is_invariant
&& gimple_could_trap_p (stmt
))
6752 if (!vec_stmt
) /* transformation not required. */
6755 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
6756 && mask_out_inactive
)
6758 if (cond_len_fn
!= IFN_LAST
6759 && direct_internal_fn_supported_p (cond_len_fn
, vectype
,
6760 OPTIMIZE_FOR_SPEED
))
6761 vect_record_loop_len (loop_vinfo
, lens
, ncopies
* vec_num
, vectype
,
6763 else if (cond_fn
!= IFN_LAST
6764 && direct_internal_fn_supported_p (cond_fn
, vectype
,
6765 OPTIMIZE_FOR_SPEED
))
6766 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6770 if (dump_enabled_p ())
6771 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6772 "can't use a fully-masked loop because no"
6773 " conditional operation is available.\n");
6774 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
6778 /* Put types on constant and invariant SLP children. */
6780 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6781 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6782 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6784 if (dump_enabled_p ())
6785 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6786 "incompatible vector types for invariants\n");
6790 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6791 DUMP_VECT_SCOPE ("vectorizable_operation");
6792 vect_model_simple_cost (vinfo
, stmt_info
,
6793 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6794 if (using_emulated_vectors_p
)
6796 /* The above vect_model_simple_cost call handles constants
6797 in the prologue and (mis-)costs one of the stmts as
6798 vector stmt. See below for the actual lowering that will
6801 = slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
;
6814 /* Bit operations do not have extra cost and are accounted
6815 as vector stmt by vect_model_simple_cost. */
6821 /* We also need to materialize two large constants. */
6822 record_stmt_cost (cost_vec
, 2, scalar_stmt
, stmt_info
,
6824 record_stmt_cost (cost_vec
, n
, scalar_stmt
, stmt_info
,
6833 if (dump_enabled_p ())
6834 dump_printf_loc (MSG_NOTE
, vect_location
,
6835 "transform binary/unary operation.\n");
6837 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6838 bool len_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
);
6840 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6841 vectors with unsigned elements, but the result is signed. So, we
6842 need to compute the MINUS_EXPR into vectype temporary and
6843 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6844 tree vec_cvt_dest
= NULL_TREE
;
6845 if (orig_code
== POINTER_DIFF_EXPR
)
6847 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6848 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6852 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6854 /* In case the vectorization factor (VF) is bigger than the number
6855 of elements that we can fit in a vectype (nunits), we have to generate
6856 more than one vector stmt - i.e - we need to "unroll" the
6857 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6858 from one copy of the vector stmt to the next, in the field
6859 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6860 stages to find the correct vector defs to be used when vectorizing
6861 stmts that use the defs of the current stmt. The example below
6862 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6863 we need to create 4 vectorized stmts):
6865 before vectorization:
6866 RELATED_STMT VEC_STMT
6870 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6872 RELATED_STMT VEC_STMT
6873 VS1_0: vx0 = memref0 VS1_1 -
6874 VS1_1: vx1 = memref1 VS1_2 -
6875 VS1_2: vx2 = memref2 VS1_3 -
6876 VS1_3: vx3 = memref3 - -
6877 S1: x = load - VS1_0
6880 step2: vectorize stmt S2 (done here):
6881 To vectorize stmt S2 we first need to find the relevant vector
6882 def for the first operand 'x'. This is, as usual, obtained from
6883 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6884 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6885 relevant vector def 'vx0'. Having found 'vx0' we can generate
6886 the vector stmt VS2_0, and as usual, record it in the
6887 STMT_VINFO_VEC_STMT of stmt S2.
6888 When creating the second copy (VS2_1), we obtain the relevant vector
6889 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6890 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6891 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6892 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6893 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6894 chain of stmts and pointers:
6895 RELATED_STMT VEC_STMT
6896 VS1_0: vx0 = memref0 VS1_1 -
6897 VS1_1: vx1 = memref1 VS1_2 -
6898 VS1_2: vx2 = memref2 VS1_3 -
6899 VS1_3: vx3 = memref3 - -
6900 S1: x = load - VS1_0
6901 VS2_0: vz0 = vx0 + v1 VS2_1 -
6902 VS2_1: vz1 = vx1 + v1 VS2_2 -
6903 VS2_2: vz2 = vx2 + v1 VS2_3 -
6904 VS2_3: vz3 = vx3 + v1 - -
6905 S2: z = x + 1 - VS2_0 */
6907 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6908 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
6909 /* Arguments are ready. Create the new vector stmt. */
6910 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6912 gimple
*new_stmt
= NULL
;
6913 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6914 ? vec_oprnds1
[i
] : NULL_TREE
);
6915 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
6916 if (using_emulated_vectors_p
6917 && (code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== NEGATE_EXPR
))
6919 /* Lower the operation. This follows vector lowering. */
6920 unsigned int width
= vector_element_bits (vectype
);
6921 tree inner_type
= TREE_TYPE (vectype
);
6923 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode
), 1);
6924 HOST_WIDE_INT max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
6925 tree low_bits
= build_replicated_int_cst (word_type
, width
, max
>> 1);
6927 = build_replicated_int_cst (word_type
, width
, max
& ~(max
>> 1));
6928 tree wvop0
= make_ssa_name (word_type
);
6929 new_stmt
= gimple_build_assign (wvop0
, VIEW_CONVERT_EXPR
,
6930 build1 (VIEW_CONVERT_EXPR
,
6932 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6933 tree result_low
, signs
;
6934 if (code
== PLUS_EXPR
|| code
== MINUS_EXPR
)
6936 tree wvop1
= make_ssa_name (word_type
);
6937 new_stmt
= gimple_build_assign (wvop1
, VIEW_CONVERT_EXPR
,
6938 build1 (VIEW_CONVERT_EXPR
,
6940 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6941 signs
= make_ssa_name (word_type
);
6942 new_stmt
= gimple_build_assign (signs
,
6943 BIT_XOR_EXPR
, wvop0
, wvop1
);
6944 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6945 tree b_low
= make_ssa_name (word_type
);
6946 new_stmt
= gimple_build_assign (b_low
,
6947 BIT_AND_EXPR
, wvop1
, low_bits
);
6948 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6949 tree a_low
= make_ssa_name (word_type
);
6950 if (code
== PLUS_EXPR
)
6951 new_stmt
= gimple_build_assign (a_low
,
6952 BIT_AND_EXPR
, wvop0
, low_bits
);
6954 new_stmt
= gimple_build_assign (a_low
,
6955 BIT_IOR_EXPR
, wvop0
, high_bits
);
6956 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6957 if (code
== MINUS_EXPR
)
6959 new_stmt
= gimple_build_assign (NULL_TREE
,
6960 BIT_NOT_EXPR
, signs
);
6961 signs
= make_ssa_name (word_type
);
6962 gimple_assign_set_lhs (new_stmt
, signs
);
6963 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6965 new_stmt
= gimple_build_assign (NULL_TREE
,
6966 BIT_AND_EXPR
, signs
, high_bits
);
6967 signs
= make_ssa_name (word_type
);
6968 gimple_assign_set_lhs (new_stmt
, signs
);
6969 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6970 result_low
= make_ssa_name (word_type
);
6971 new_stmt
= gimple_build_assign (result_low
, code
, a_low
, b_low
);
6972 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6976 tree a_low
= make_ssa_name (word_type
);
6977 new_stmt
= gimple_build_assign (a_low
,
6978 BIT_AND_EXPR
, wvop0
, low_bits
);
6979 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6980 signs
= make_ssa_name (word_type
);
6981 new_stmt
= gimple_build_assign (signs
, BIT_NOT_EXPR
, wvop0
);
6982 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6983 new_stmt
= gimple_build_assign (NULL_TREE
,
6984 BIT_AND_EXPR
, signs
, high_bits
);
6985 signs
= make_ssa_name (word_type
);
6986 gimple_assign_set_lhs (new_stmt
, signs
);
6987 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6988 result_low
= make_ssa_name (word_type
);
6989 new_stmt
= gimple_build_assign (result_low
,
6990 MINUS_EXPR
, high_bits
, a_low
);
6991 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6993 new_stmt
= gimple_build_assign (NULL_TREE
, BIT_XOR_EXPR
, result_low
,
6995 result_low
= make_ssa_name (word_type
);
6996 gimple_assign_set_lhs (new_stmt
, result_low
);
6997 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6998 new_stmt
= gimple_build_assign (NULL_TREE
, VIEW_CONVERT_EXPR
,
6999 build1 (VIEW_CONVERT_EXPR
,
7000 vectype
, result_low
));
7001 new_temp
= make_ssa_name (vectype
);
7002 gimple_assign_set_lhs (new_stmt
, new_temp
);
7003 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7005 else if ((masked_loop_p
|| len_loop_p
) && mask_out_inactive
)
7009 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
,
7010 vec_num
* ncopies
, vectype
, i
);
7013 mask
= build_minus_one_cst (truth_type_for (vectype
));
7014 auto_vec
<tree
> vops (6);
7015 vops
.quick_push (mask
);
7016 vops
.quick_push (vop0
);
7018 vops
.quick_push (vop1
);
7020 vops
.quick_push (vop2
);
7023 /* Perform the operation on active elements only and take
7024 inactive elements from the reduction chain input. */
7026 vops
.quick_push (reduc_idx
== 1 ? vop1
: vop0
);
7030 auto else_value
= targetm
.preferred_else_value
7031 (cond_fn
, vectype
, vops
.length () - 1, &vops
[1]);
7032 vops
.quick_push (else_value
);
7036 tree len
= vect_get_loop_len (loop_vinfo
, gsi
, lens
,
7037 vec_num
* ncopies
, vectype
, i
, 1);
7039 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
7040 tree bias
= build_int_cst (intQI_type_node
, biasval
);
7041 vops
.quick_push (len
);
7042 vops
.quick_push (bias
);
7045 = gimple_build_call_internal_vec (masked_loop_p
? cond_fn
7048 new_temp
= make_ssa_name (vec_dest
, call
);
7049 gimple_call_set_lhs (call
, new_temp
);
7050 gimple_call_set_nothrow (call
, true);
7051 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
7056 tree mask
= NULL_TREE
;
7057 /* When combining two masks check if either of them is elsewhere
7058 combined with a loop mask, if that's the case we can mark that the
7059 new combined mask doesn't need to be combined with a loop mask. */
7061 && code
== BIT_AND_EXPR
7062 && VECTOR_BOOLEAN_TYPE_P (vectype
))
7064 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op0
,
7067 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
,
7068 vec_num
* ncopies
, vectype
, i
);
7070 vop0
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
7074 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op1
,
7077 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
,
7078 vec_num
* ncopies
, vectype
, i
);
7080 vop1
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
7085 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
7086 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7087 gimple_assign_set_lhs (new_stmt
, new_temp
);
7088 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7089 if (using_emulated_vectors_p
)
7090 suppress_warning (new_stmt
, OPT_Wvector_operation_performance
);
7092 /* Enter the combined value into the vector cond hash so we don't
7093 AND it with a loop mask again. */
7095 loop_vinfo
->vec_cond_masked_set
.add ({ new_temp
, mask
});
7100 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
7101 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
7103 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
7104 gimple_assign_set_lhs (new_stmt
, new_temp
);
7105 vect_finish_stmt_generation (vinfo
, stmt_info
,
7110 slp_node
->push_vec_def (new_stmt
);
7112 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
7116 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7118 vec_oprnds0
.release ();
7119 vec_oprnds1
.release ();
7120 vec_oprnds2
.release ();
7125 /* A helper function to ensure data reference DR_INFO's base alignment. */
7128 ensure_base_align (dr_vec_info
*dr_info
)
7130 /* Alignment is only analyzed for the first element of a DR group,
7131 use that to look at base alignment we need to enforce. */
7132 if (STMT_VINFO_GROUPED_ACCESS (dr_info
->stmt
))
7133 dr_info
= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info
->stmt
));
7135 gcc_assert (dr_info
->misalignment
!= DR_MISALIGNMENT_UNINITIALIZED
);
7137 if (dr_info
->base_misaligned
)
7139 tree base_decl
= dr_info
->base_decl
;
7141 // We should only be able to increase the alignment of a base object if
7142 // we know what its new alignment should be at compile time.
7143 unsigned HOST_WIDE_INT align_base_to
=
7144 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
7146 if (decl_in_symtab_p (base_decl
))
7147 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
7148 else if (DECL_ALIGN (base_decl
) < align_base_to
)
7150 SET_DECL_ALIGN (base_decl
, align_base_to
);
7151 DECL_USER_ALIGN (base_decl
) = 1;
7153 dr_info
->base_misaligned
= false;
7158 /* Function get_group_alias_ptr_type.
7160 Return the alias type for the group starting at FIRST_STMT_INFO. */
7163 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
7165 struct data_reference
*first_dr
, *next_dr
;
7167 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
7168 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
7169 while (next_stmt_info
)
7171 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
7172 if (get_alias_set (DR_REF (first_dr
))
7173 != get_alias_set (DR_REF (next_dr
)))
7175 if (dump_enabled_p ())
7176 dump_printf_loc (MSG_NOTE
, vect_location
,
7177 "conflicting alias set types.\n");
7178 return ptr_type_node
;
7180 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7182 return reference_alias_ptr_type (DR_REF (first_dr
));
7186 /* Function scan_operand_equal_p.
7188 Helper function for check_scan_store. Compare two references
7189 with .GOMP_SIMD_LANE bases. */
7192 scan_operand_equal_p (tree ref1
, tree ref2
)
7194 tree ref
[2] = { ref1
, ref2
};
7195 poly_int64 bitsize
[2], bitpos
[2];
7196 tree offset
[2], base
[2];
7197 for (int i
= 0; i
< 2; ++i
)
7200 int unsignedp
, reversep
, volatilep
= 0;
7201 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
7202 &offset
[i
], &mode
, &unsignedp
,
7203 &reversep
, &volatilep
);
7204 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
7206 if (TREE_CODE (base
[i
]) == MEM_REF
7207 && offset
[i
] == NULL_TREE
7208 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
7210 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
7211 if (is_gimple_assign (def_stmt
)
7212 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
7213 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
7214 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
7216 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
7218 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
7219 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
7224 if (!operand_equal_p (base
[0], base
[1], 0))
7226 if (maybe_ne (bitsize
[0], bitsize
[1]))
7228 if (offset
[0] != offset
[1])
7230 if (!offset
[0] || !offset
[1])
7232 if (!operand_equal_p (offset
[0], offset
[1], 0))
7235 for (int i
= 0; i
< 2; ++i
)
7237 step
[i
] = integer_one_node
;
7238 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
7240 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
7241 if (is_gimple_assign (def_stmt
)
7242 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
7243 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
7246 step
[i
] = gimple_assign_rhs2 (def_stmt
);
7247 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
7250 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
7252 step
[i
] = TREE_OPERAND (offset
[i
], 1);
7253 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
7255 tree rhs1
= NULL_TREE
;
7256 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
7258 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
7259 if (gimple_assign_cast_p (def_stmt
))
7260 rhs1
= gimple_assign_rhs1 (def_stmt
);
7262 else if (CONVERT_EXPR_P (offset
[i
]))
7263 rhs1
= TREE_OPERAND (offset
[i
], 0);
7265 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
7266 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
7267 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
7268 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
7271 if (!operand_equal_p (offset
[0], offset
[1], 0)
7272 || !operand_equal_p (step
[0], step
[1], 0))
7280 enum scan_store_kind
{
7281 /* Normal permutation. */
7282 scan_store_kind_perm
,
7284 /* Whole vector left shift permutation with zero init. */
7285 scan_store_kind_lshift_zero
,
7287 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7288 scan_store_kind_lshift_cond
7291 /* Function check_scan_store.
7293 Verify if we can perform the needed permutations or whole vector shifts.
7294 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7295 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7296 to do at each step. */
7299 scan_store_can_perm_p (tree vectype
, tree init
,
7300 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
7302 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7303 unsigned HOST_WIDE_INT nunits
;
7304 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7306 int units_log2
= exact_log2 (nunits
);
7307 if (units_log2
<= 0)
7311 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
7312 for (i
= 0; i
<= units_log2
; ++i
)
7314 unsigned HOST_WIDE_INT j
, k
;
7315 enum scan_store_kind kind
= scan_store_kind_perm
;
7316 vec_perm_builder
sel (nunits
, nunits
, 1);
7317 sel
.quick_grow (nunits
);
7318 if (i
== units_log2
)
7320 for (j
= 0; j
< nunits
; ++j
)
7321 sel
[j
] = nunits
- 1;
7325 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7327 for (k
= 0; j
< nunits
; ++j
, ++k
)
7328 sel
[j
] = nunits
+ k
;
7330 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7331 if (!can_vec_perm_const_p (vec_mode
, vec_mode
, indices
))
7333 if (i
== units_log2
)
7336 if (whole_vector_shift_kind
== scan_store_kind_perm
)
7338 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
7340 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
7341 /* Whole vector shifts shift in zeros, so if init is all zero
7342 constant, there is no need to do anything further. */
7343 if ((TREE_CODE (init
) != INTEGER_CST
7344 && TREE_CODE (init
) != REAL_CST
)
7345 || !initializer_zerop (init
))
7347 tree masktype
= truth_type_for (vectype
);
7348 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
7350 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
7353 kind
= whole_vector_shift_kind
;
7355 if (use_whole_vector
)
7357 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
7358 use_whole_vector
->safe_grow_cleared (i
, true);
7359 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
7360 use_whole_vector
->safe_push (kind
);
7368 /* Function check_scan_store.
7370 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7373 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
7374 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
7375 vect_memory_access_type memory_access_type
)
7377 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7378 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7381 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
7384 || memory_access_type
!= VMAT_CONTIGUOUS
7385 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
7386 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
7387 || loop_vinfo
== NULL
7388 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7389 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7390 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
7391 || !integer_zerop (DR_INIT (dr_info
->dr
))
7392 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
7393 || !alias_sets_conflict_p (get_alias_set (vectype
),
7394 get_alias_set (TREE_TYPE (ref_type
))))
7396 if (dump_enabled_p ())
7397 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7398 "unsupported OpenMP scan store.\n");
7402 /* We need to pattern match code built by OpenMP lowering and simplified
7403 by following optimizations into something we can handle.
7404 #pragma omp simd reduction(inscan,+:r)
7408 #pragma omp scan inclusive (r)
7411 shall have body with:
7412 // Initialization for input phase, store the reduction initializer:
7413 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7414 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7416 // Actual input phase:
7418 r.0_5 = D.2042[_20];
7421 // Initialization for scan phase:
7422 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7428 // Actual scan phase:
7430 r.1_8 = D.2042[_20];
7432 The "omp simd array" variable D.2042 holds the privatized copy used
7433 inside of the loop and D.2043 is another one that holds copies of
7434 the current original list item. The separate GOMP_SIMD_LANE ifn
7435 kinds are there in order to allow optimizing the initializer store
7436 and combiner sequence, e.g. if it is originally some C++ish user
7437 defined reduction, but allow the vectorizer to pattern recognize it
7438 and turn into the appropriate vectorized scan.
7440 For exclusive scan, this is slightly different:
7441 #pragma omp simd reduction(inscan,+:r)
7445 #pragma omp scan exclusive (r)
7448 shall have body with:
7449 // Initialization for input phase, store the reduction initializer:
7450 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7451 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7453 // Actual input phase:
7455 r.0_5 = D.2042[_20];
7458 // Initialization for scan phase:
7459 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7465 // Actual scan phase:
7467 r.1_8 = D.2044[_20];
7470 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
7472 /* Match the D.2042[_21] = 0; store above. Just require that
7473 it is a constant or external definition store. */
7474 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
7477 if (dump_enabled_p ())
7478 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7479 "unsupported OpenMP scan initializer store.\n");
7483 if (! loop_vinfo
->scan_map
)
7484 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
7485 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7486 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
7489 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
7491 /* These stores can be vectorized normally. */
7495 if (rhs_dt
!= vect_internal_def
)
7498 if (dump_enabled_p ())
7499 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7500 "unsupported OpenMP scan combiner pattern.\n");
7504 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7505 tree rhs
= gimple_assign_rhs1 (stmt
);
7506 if (TREE_CODE (rhs
) != SSA_NAME
)
7509 gimple
*other_store_stmt
= NULL
;
7510 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7511 bool inscan_var_store
7512 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7514 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7516 if (!inscan_var_store
)
7518 use_operand_p use_p
;
7519 imm_use_iterator iter
;
7520 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7522 gimple
*use_stmt
= USE_STMT (use_p
);
7523 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7525 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
7526 || !is_gimple_assign (use_stmt
)
7527 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
7529 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
7531 other_store_stmt
= use_stmt
;
7533 if (other_store_stmt
== NULL
)
7535 rhs
= gimple_assign_lhs (other_store_stmt
);
7536 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
7540 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
7542 use_operand_p use_p
;
7543 imm_use_iterator iter
;
7544 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7546 gimple
*use_stmt
= USE_STMT (use_p
);
7547 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7549 if (other_store_stmt
)
7551 other_store_stmt
= use_stmt
;
7557 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7558 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
7559 || !is_gimple_assign (def_stmt
)
7560 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
7563 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7564 /* For pointer addition, we should use the normal plus for the vector
7568 case POINTER_PLUS_EXPR
:
7571 case MULT_HIGHPART_EXPR
:
7576 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
7579 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7580 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7581 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
7584 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7585 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7586 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
7587 || !gimple_assign_load_p (load1_stmt
)
7588 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
7589 || !gimple_assign_load_p (load2_stmt
))
7592 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7593 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7594 if (load1_stmt_info
== NULL
7595 || load2_stmt_info
== NULL
7596 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
7597 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
7598 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
7599 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7602 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
7604 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7605 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
7606 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
7608 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7610 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7614 use_operand_p use_p
;
7615 imm_use_iterator iter
;
7616 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
7618 gimple
*use_stmt
= USE_STMT (use_p
);
7619 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
7621 if (other_store_stmt
)
7623 other_store_stmt
= use_stmt
;
7627 if (other_store_stmt
== NULL
)
7629 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
7630 || !gimple_store_p (other_store_stmt
))
7633 stmt_vec_info other_store_stmt_info
7634 = loop_vinfo
->lookup_stmt (other_store_stmt
);
7635 if (other_store_stmt_info
== NULL
7636 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
7637 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7640 gimple
*stmt1
= stmt
;
7641 gimple
*stmt2
= other_store_stmt
;
7642 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7643 std::swap (stmt1
, stmt2
);
7644 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7645 gimple_assign_rhs1 (load2_stmt
)))
7647 std::swap (rhs1
, rhs2
);
7648 std::swap (load1_stmt
, load2_stmt
);
7649 std::swap (load1_stmt_info
, load2_stmt_info
);
7651 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7652 gimple_assign_rhs1 (load1_stmt
)))
7655 tree var3
= NULL_TREE
;
7656 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
7657 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
7658 gimple_assign_rhs1 (load2_stmt
)))
7660 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7662 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7663 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
7664 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
7666 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7667 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
7668 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
7669 || lookup_attribute ("omp simd inscan exclusive",
7670 DECL_ATTRIBUTES (var3
)))
7674 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
7675 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
7676 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
7679 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7680 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
7681 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
7682 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
7683 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7684 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
7687 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7688 std::swap (var1
, var2
);
7690 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7692 if (!lookup_attribute ("omp simd inscan exclusive",
7693 DECL_ATTRIBUTES (var1
)))
7698 if (loop_vinfo
->scan_map
== NULL
)
7700 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7704 /* The IL is as expected, now check if we can actually vectorize it.
7711 should be vectorized as (where _40 is the vectorized rhs
7712 from the D.2042[_21] = 0; store):
7713 _30 = MEM <vector(8) int> [(int *)&D.2043];
7714 _31 = MEM <vector(8) int> [(int *)&D.2042];
7715 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7717 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7718 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7720 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7721 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7722 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7724 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7725 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7727 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7728 MEM <vector(8) int> [(int *)&D.2043] = _39;
7729 MEM <vector(8) int> [(int *)&D.2042] = _38;
7736 should be vectorized as (where _40 is the vectorized rhs
7737 from the D.2042[_21] = 0; store):
7738 _30 = MEM <vector(8) int> [(int *)&D.2043];
7739 _31 = MEM <vector(8) int> [(int *)&D.2042];
7740 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7741 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7743 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7744 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7745 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7747 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7748 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7749 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7751 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7752 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7755 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7756 MEM <vector(8) int> [(int *)&D.2044] = _39;
7757 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7758 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7759 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7760 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7763 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7764 if (units_log2
== -1)
7771 /* Function vectorizable_scan_store.
7773 Helper of vectorizable_score, arguments like on vectorizable_store.
7774 Handle only the transformation, checking is done in check_scan_store. */
7777 vectorizable_scan_store (vec_info
*vinfo
,
7778 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7779 gimple
**vec_stmt
, int ncopies
)
7781 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7782 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7783 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7784 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7786 if (dump_enabled_p ())
7787 dump_printf_loc (MSG_NOTE
, vect_location
,
7788 "transform scan store. ncopies = %d\n", ncopies
);
7790 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7791 tree rhs
= gimple_assign_rhs1 (stmt
);
7792 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7794 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7795 bool inscan_var_store
7796 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7798 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7800 use_operand_p use_p
;
7801 imm_use_iterator iter
;
7802 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7804 gimple
*use_stmt
= USE_STMT (use_p
);
7805 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7807 rhs
= gimple_assign_lhs (use_stmt
);
7812 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7813 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7814 if (code
== POINTER_PLUS_EXPR
)
7816 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7817 && commutative_tree_code (code
));
7818 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7819 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7820 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7821 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7822 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7823 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7824 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7825 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7826 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7827 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7828 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7830 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7832 std::swap (rhs1
, rhs2
);
7833 std::swap (var1
, var2
);
7834 std::swap (load1_dr_info
, load2_dr_info
);
7837 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7840 unsigned HOST_WIDE_INT nunits
;
7841 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7843 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7844 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7845 gcc_assert (units_log2
> 0);
7846 auto_vec
<tree
, 16> perms
;
7847 perms
.quick_grow (units_log2
+ 1);
7848 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7849 for (int i
= 0; i
<= units_log2
; ++i
)
7851 unsigned HOST_WIDE_INT j
, k
;
7852 vec_perm_builder
sel (nunits
, nunits
, 1);
7853 sel
.quick_grow (nunits
);
7854 if (i
== units_log2
)
7855 for (j
= 0; j
< nunits
; ++j
)
7856 sel
[j
] = nunits
- 1;
7859 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7861 for (k
= 0; j
< nunits
; ++j
, ++k
)
7862 sel
[j
] = nunits
+ k
;
7864 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7865 if (!use_whole_vector
.is_empty ()
7866 && use_whole_vector
[i
] != scan_store_kind_perm
)
7868 if (zero_vec
== NULL_TREE
)
7869 zero_vec
= build_zero_cst (vectype
);
7870 if (masktype
== NULL_TREE
7871 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7872 masktype
= truth_type_for (vectype
);
7873 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7876 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7879 tree vec_oprnd1
= NULL_TREE
;
7880 tree vec_oprnd2
= NULL_TREE
;
7881 tree vec_oprnd3
= NULL_TREE
;
7882 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7883 tree dataref_offset
= build_int_cst (ref_type
, 0);
7884 tree bump
= vect_get_data_ptr_increment (vinfo
, gsi
, dr_info
,
7885 vectype
, VMAT_CONTIGUOUS
);
7886 tree ldataref_ptr
= NULL_TREE
;
7887 tree orig
= NULL_TREE
;
7888 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7889 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7890 auto_vec
<tree
> vec_oprnds1
;
7891 auto_vec
<tree
> vec_oprnds2
;
7892 auto_vec
<tree
> vec_oprnds3
;
7893 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
7894 *init
, &vec_oprnds1
,
7895 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
7896 rhs2
, &vec_oprnds3
);
7897 for (int j
= 0; j
< ncopies
; j
++)
7899 vec_oprnd1
= vec_oprnds1
[j
];
7900 if (ldataref_ptr
== NULL
)
7901 vec_oprnd2
= vec_oprnds2
[j
];
7902 vec_oprnd3
= vec_oprnds3
[j
];
7905 else if (!inscan_var_store
)
7906 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7910 vec_oprnd2
= make_ssa_name (vectype
);
7911 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7912 unshare_expr (ldataref_ptr
),
7914 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7915 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7916 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7917 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7918 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7921 tree v
= vec_oprnd2
;
7922 for (int i
= 0; i
< units_log2
; ++i
)
7924 tree new_temp
= make_ssa_name (vectype
);
7925 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7927 && (use_whole_vector
[i
]
7928 != scan_store_kind_perm
))
7929 ? zero_vec
: vec_oprnd1
, v
,
7931 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7932 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7933 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7935 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7937 /* Whole vector shift shifted in zero bits, but if *init
7938 is not initializer_zerop, we need to replace those elements
7939 with elements from vec_oprnd1. */
7940 tree_vector_builder
vb (masktype
, nunits
, 1);
7941 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7942 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7943 ? boolean_false_node
: boolean_true_node
);
7945 tree new_temp2
= make_ssa_name (vectype
);
7946 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7947 new_temp
, vec_oprnd1
);
7948 vect_finish_stmt_generation (vinfo
, stmt_info
,
7950 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7951 new_temp
= new_temp2
;
7954 /* For exclusive scan, perform the perms[i] permutation once
7957 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7965 tree new_temp2
= make_ssa_name (vectype
);
7966 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7967 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7968 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7973 tree new_temp
= make_ssa_name (vectype
);
7974 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7975 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7976 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7978 tree last_perm_arg
= new_temp
;
7979 /* For exclusive scan, new_temp computed above is the exclusive scan
7980 prefix sum. Turn it into inclusive prefix sum for the broadcast
7981 of the last element into orig. */
7982 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7984 last_perm_arg
= make_ssa_name (vectype
);
7985 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7986 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7987 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7990 orig
= make_ssa_name (vectype
);
7991 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7992 last_perm_arg
, perms
[units_log2
]);
7993 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7994 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7996 if (!inscan_var_store
)
7998 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7999 unshare_expr (dataref_ptr
),
8001 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
8002 g
= gimple_build_assign (data_ref
, new_temp
);
8003 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8004 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8008 if (inscan_var_store
)
8009 for (int j
= 0; j
< ncopies
; j
++)
8012 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8014 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
8015 unshare_expr (dataref_ptr
),
8017 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
8018 gimple
*g
= gimple_build_assign (data_ref
, orig
);
8019 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8020 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8026 /* Function vectorizable_store.
8028 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8029 that can be vectorized.
8030 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8031 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8032 Return true if STMT_INFO is vectorizable in this way. */
8035 vectorizable_store (vec_info
*vinfo
,
8036 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8037 gimple
**vec_stmt
, slp_tree slp_node
,
8038 stmt_vector_for_cost
*cost_vec
)
8041 tree vec_oprnd
= NULL_TREE
;
8043 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8044 class loop
*loop
= NULL
;
8045 machine_mode vec_mode
;
8047 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
8048 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8049 tree dataref_ptr
= NULL_TREE
;
8050 tree dataref_offset
= NULL_TREE
;
8051 gimple
*ptr_incr
= NULL
;
8054 stmt_vec_info first_stmt_info
;
8056 unsigned int group_size
, i
;
8057 bool slp
= (slp_node
!= NULL
);
8058 unsigned int vec_num
;
8059 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8061 gather_scatter_info gs_info
;
8063 vec_load_store_type vls_type
;
8066 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8069 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8073 /* Is vectorizable store? */
8075 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8076 slp_tree mask_node
= NULL
;
8077 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8079 tree scalar_dest
= gimple_assign_lhs (assign
);
8080 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
8081 && is_pattern_stmt_p (stmt_info
))
8082 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
8083 if (TREE_CODE (scalar_dest
) != ARRAY_REF
8084 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
8085 && TREE_CODE (scalar_dest
) != INDIRECT_REF
8086 && TREE_CODE (scalar_dest
) != COMPONENT_REF
8087 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
8088 && TREE_CODE (scalar_dest
) != REALPART_EXPR
8089 && TREE_CODE (scalar_dest
) != MEM_REF
)
8094 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8095 if (!call
|| !gimple_call_internal_p (call
))
8098 internal_fn ifn
= gimple_call_internal_fn (call
);
8099 if (!internal_store_fn_p (ifn
))
8102 int mask_index
= internal_fn_mask_index (ifn
);
8103 if (mask_index
>= 0 && slp_node
)
8104 mask_index
= vect_slp_child_index_for_operand
8105 (call
, mask_index
, STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8107 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
8108 &mask
, &mask_node
, &mask_dt
,
8113 /* Cannot have hybrid store SLP -- that would mean storing to the
8114 same location twice. */
8115 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
8117 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
8118 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8122 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8123 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8128 /* Multiple types in SLP are handled by creating the appropriate number of
8129 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8134 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8136 gcc_assert (ncopies
>= 1);
8138 /* FORNOW. This restriction should be relaxed. */
8139 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
8141 if (dump_enabled_p ())
8142 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8143 "multiple types in nested loop.\n");
8149 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
8150 &op
, &op_node
, &rhs_dt
, &rhs_vectype
, &vls_type
))
8153 elem_type
= TREE_TYPE (vectype
);
8154 vec_mode
= TYPE_MODE (vectype
);
8156 if (!STMT_VINFO_DATA_REF (stmt_info
))
8159 vect_memory_access_type memory_access_type
;
8160 enum dr_alignment_support alignment_support_scheme
;
8163 internal_fn lanes_ifn
;
8164 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, vls_type
,
8165 ncopies
, &memory_access_type
, &poffset
,
8166 &alignment_support_scheme
, &misalignment
, &gs_info
,
8172 if (memory_access_type
== VMAT_CONTIGUOUS
)
8174 if (!VECTOR_MODE_P (vec_mode
)
8175 || !can_vec_mask_load_store_p (vec_mode
,
8176 TYPE_MODE (mask_vectype
), false))
8179 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8180 && (memory_access_type
!= VMAT_GATHER_SCATTER
8181 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
8183 if (dump_enabled_p ())
8184 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8185 "unsupported access type for masked store.\n");
8188 else if (memory_access_type
== VMAT_GATHER_SCATTER
8189 && gs_info
.ifn
== IFN_LAST
8192 if (dump_enabled_p ())
8193 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8194 "unsupported masked emulated scatter.\n");
8200 /* FORNOW. In some cases can vectorize even if data-type not supported
8201 (e.g. - array initialization with 0). */
8202 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
8206 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8207 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
8208 && memory_access_type
!= VMAT_GATHER_SCATTER
8209 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
8212 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8213 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8214 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8218 first_stmt_info
= stmt_info
;
8219 first_dr_info
= dr_info
;
8220 group_size
= vec_num
= 1;
8223 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
8225 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
8226 memory_access_type
))
8230 bool costing_p
= !vec_stmt
;
8231 if (costing_p
) /* transformation not required. */
8233 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8236 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
8237 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
8238 vls_type
, group_size
,
8239 memory_access_type
, &gs_info
,
8243 && (!vect_maybe_update_slp_op_vectype (op_node
, vectype
)
8245 && !vect_maybe_update_slp_op_vectype (mask_node
,
8248 if (dump_enabled_p ())
8249 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8250 "incompatible vector types for invariants\n");
8254 if (dump_enabled_p ()
8255 && memory_access_type
!= VMAT_ELEMENTWISE
8256 && memory_access_type
!= VMAT_GATHER_SCATTER
8257 && alignment_support_scheme
!= dr_aligned
)
8258 dump_printf_loc (MSG_NOTE
, vect_location
,
8259 "Vectorizing an unaligned access.\n");
8261 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
8263 /* As function vect_transform_stmt shows, for interleaving stores
8264 the whole chain is vectorized when the last store in the chain
8265 is reached, the other stores in the group are skipped. So we
8266 want to only cost the last one here, but it's not trivial to
8267 get the last, as it's equivalent to use the first one for
8268 costing, use the first one instead. */
8271 && first_stmt_info
!= stmt_info
)
8274 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8278 ensure_base_align (dr_info
);
8280 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
8282 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS
);
8286 unsigned int inside_cost
= 0, prologue_cost
= 0;
8287 if (vls_type
== VLS_STORE_INVARIANT
)
8288 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
8289 stmt_info
, 0, vect_prologue
);
8290 vect_get_store_cost (vinfo
, stmt_info
, ncopies
,
8291 alignment_support_scheme
, misalignment
,
8292 &inside_cost
, cost_vec
);
8294 if (dump_enabled_p ())
8295 dump_printf_loc (MSG_NOTE
, vect_location
,
8296 "vect_model_store_cost: inside_cost = %d, "
8297 "prologue_cost = %d .\n",
8298 inside_cost
, prologue_cost
);
8302 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
8308 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
8312 grouped_store
= false;
8313 /* VEC_NUM is the number of vect stmts to be created for this
8315 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8316 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8317 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
8318 == first_stmt_info
);
8319 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8320 op
= vect_get_store_rhs (first_stmt_info
);
8323 /* VEC_NUM is the number of vect stmts to be created for this
8325 vec_num
= group_size
;
8327 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8330 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
8332 if (!costing_p
&& dump_enabled_p ())
8333 dump_printf_loc (MSG_NOTE
, vect_location
, "transform store. ncopies = %d\n",
8336 /* Check if we need to update prologue cost for invariant,
8337 and update it accordingly if so. If it's not for
8338 interleaving store, we can just check vls_type; but if
8339 it's for interleaving store, need to check the def_type
8340 of the stored value since the current vls_type is just
8341 for first_stmt_info. */
8342 auto update_prologue_cost
= [&](unsigned *prologue_cost
, tree store_rhs
)
8344 gcc_assert (costing_p
);
8349 gcc_assert (store_rhs
);
8350 enum vect_def_type cdt
;
8351 gcc_assert (vect_is_simple_use (store_rhs
, vinfo
, &cdt
));
8352 if (cdt
!= vect_constant_def
&& cdt
!= vect_external_def
)
8355 else if (vls_type
!= VLS_STORE_INVARIANT
)
8357 *prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
, stmt_info
,
8361 if (memory_access_type
== VMAT_ELEMENTWISE
8362 || memory_access_type
== VMAT_STRIDED_SLP
)
8364 unsigned inside_cost
= 0, prologue_cost
= 0;
8365 gimple_stmt_iterator incr_gsi
;
8371 tree stride_base
, stride_step
, alias_off
;
8372 tree vec_oprnd
= NULL_TREE
;
8375 /* Checked by get_load_store_type. */
8376 unsigned int const_nunits
= nunits
.to_constant ();
8378 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
8379 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
8381 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
8383 = fold_build_pointer_plus
8384 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8385 size_binop (PLUS_EXPR
,
8386 convert_to_ptrofftype (dr_offset
),
8387 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8388 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8390 /* For a store with loop-invariant (but other than power-of-2)
8391 stride (i.e. not a grouped access) like so:
8393 for (i = 0; i < n; i += stride)
8396 we generate a new induction variable and new stores from
8397 the components of the (vectorized) rhs:
8399 for (j = 0; ; j += VF*stride)
8404 array[j + stride] = tmp2;
8408 unsigned nstores
= const_nunits
;
8410 tree ltype
= elem_type
;
8411 tree lvectype
= vectype
;
8414 if (group_size
< const_nunits
8415 && const_nunits
% group_size
== 0)
8417 nstores
= const_nunits
/ group_size
;
8419 ltype
= build_vector_type (elem_type
, group_size
);
8422 /* First check if vec_extract optab doesn't support extraction
8423 of vector elts directly. */
8424 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
8426 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
8427 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
8428 group_size
).exists (&vmode
)
8429 || (convert_optab_handler (vec_extract_optab
,
8430 TYPE_MODE (vectype
), vmode
)
8431 == CODE_FOR_nothing
))
8433 /* Try to avoid emitting an extract of vector elements
8434 by performing the extracts using an integer type of the
8435 same size, extracting from a vector of those and then
8436 re-interpreting it as the original vector type if
8439 = group_size
* GET_MODE_BITSIZE (elmode
);
8440 unsigned int lnunits
= const_nunits
/ group_size
;
8441 /* If we can't construct such a vector fall back to
8442 element extracts from the original vector type and
8443 element size stores. */
8444 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
8445 && VECTOR_MODE_P (TYPE_MODE (vectype
))
8446 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
8447 lnunits
).exists (&vmode
)
8448 && (convert_optab_handler (vec_extract_optab
,
8450 != CODE_FOR_nothing
))
8454 ltype
= build_nonstandard_integer_type (lsize
, 1);
8455 lvectype
= build_vector_type (ltype
, nstores
);
8457 /* Else fall back to vector extraction anyway.
8458 Fewer stores are more important than avoiding spilling
8459 of the vector we extract from. Compared to the
8460 construction case in vectorizable_load no store-forwarding
8461 issue exists here for reasonable archs. */
8464 else if (group_size
>= const_nunits
8465 && group_size
% const_nunits
== 0)
8467 int mis_align
= dr_misalignment (first_dr_info
, vectype
);
8468 dr_alignment_support dr_align
8469 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
,
8471 if (dr_align
== dr_aligned
8472 || dr_align
== dr_unaligned_supported
)
8475 lnel
= const_nunits
;
8478 alignment_support_scheme
= dr_align
;
8479 misalignment
= mis_align
;
8482 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
8483 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8488 ivstep
= stride_step
;
8489 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
8490 build_int_cst (TREE_TYPE (ivstep
), vf
));
8492 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8494 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8495 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8496 create_iv (stride_base
, PLUS_EXPR
, ivstep
, NULL
, loop
, &incr_gsi
,
8497 insert_after
, &offvar
, NULL
);
8498 incr
= gsi_stmt (incr_gsi
);
8500 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8503 alias_off
= build_int_cst (ref_type
, 0);
8504 stmt_vec_info next_stmt_info
= first_stmt_info
;
8505 auto_vec
<tree
> vec_oprnds (ncopies
);
8506 /* For costing some adjacent vector stores, we'd like to cost with
8507 the total number of them once instead of cost each one by one. */
8508 unsigned int n_adjacent_stores
= 0;
8509 for (g
= 0; g
< group_size
; g
++)
8511 running_off
= offvar
;
8516 tree size
= TYPE_SIZE_UNIT (ltype
);
8518 = fold_build2 (MULT_EXPR
, sizetype
, size_int (g
), size
);
8519 tree newoff
= copy_ssa_name (running_off
, NULL
);
8520 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8522 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8523 running_off
= newoff
;
8527 op
= vect_get_store_rhs (next_stmt_info
);
8529 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
, op
,
8532 update_prologue_cost (&prologue_cost
, op
);
8533 unsigned int group_el
= 0;
8534 unsigned HOST_WIDE_INT
8535 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8536 for (j
= 0; j
< ncopies
; j
++)
8540 vec_oprnd
= vec_oprnds
[j
];
8541 /* Pun the vector to extract from if necessary. */
8542 if (lvectype
!= vectype
)
8544 tree tem
= make_ssa_name (lvectype
);
8546 = build1 (VIEW_CONVERT_EXPR
, lvectype
, vec_oprnd
);
8547 gimple
*pun
= gimple_build_assign (tem
, cvt
);
8548 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
8552 for (i
= 0; i
< nstores
; i
++)
8556 /* Only need vector extracting when there are more
8560 += record_stmt_cost (cost_vec
, 1, vec_to_scalar
,
8561 stmt_info
, 0, vect_body
);
8562 /* Take a single lane vector type store as scalar
8563 store to avoid ICE like 110776. */
8564 if (VECTOR_TYPE_P (ltype
)
8565 && known_ne (TYPE_VECTOR_SUBPARTS (ltype
), 1U))
8566 n_adjacent_stores
++;
8569 += record_stmt_cost (cost_vec
, 1, scalar_store
,
8570 stmt_info
, 0, vect_body
);
8573 tree newref
, newoff
;
8574 gimple
*incr
, *assign
;
8575 tree size
= TYPE_SIZE (ltype
);
8576 /* Extract the i'th component. */
8577 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
8578 bitsize_int (i
), size
);
8579 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
8582 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
8586 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8588 newref
= build2 (MEM_REF
, ltype
,
8589 running_off
, this_off
);
8590 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
8592 /* And store it to *running_off. */
8593 assign
= gimple_build_assign (newref
, elem
);
8594 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
8598 || group_el
== group_size
)
8600 newoff
= copy_ssa_name (running_off
, NULL
);
8601 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8602 running_off
, stride_step
);
8603 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8605 running_off
= newoff
;
8608 if (g
== group_size
- 1
8611 if (j
== 0 && i
== 0)
8613 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
8617 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8618 vec_oprnds
.truncate(0);
8625 if (n_adjacent_stores
> 0)
8626 vect_get_store_cost (vinfo
, stmt_info
, n_adjacent_stores
,
8627 alignment_support_scheme
, misalignment
,
8628 &inside_cost
, cost_vec
);
8629 if (dump_enabled_p ())
8630 dump_printf_loc (MSG_NOTE
, vect_location
,
8631 "vect_model_store_cost: inside_cost = %d, "
8632 "prologue_cost = %d .\n",
8633 inside_cost
, prologue_cost
);
8639 gcc_assert (alignment_support_scheme
);
8640 vec_loop_masks
*loop_masks
8641 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8642 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8644 vec_loop_lens
*loop_lens
8645 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
8646 ? &LOOP_VINFO_LENS (loop_vinfo
)
8649 /* Shouldn't go with length-based approach if fully masked. */
8650 gcc_assert (!loop_lens
|| !loop_masks
);
8652 /* Targets with store-lane instructions must not require explicit
8653 realignment. vect_supportable_dr_alignment always returns either
8654 dr_aligned or dr_unaligned_supported for masked operations. */
8655 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8658 || alignment_support_scheme
== dr_aligned
8659 || alignment_support_scheme
== dr_unaligned_supported
);
8661 tree offset
= NULL_TREE
;
8662 if (!known_eq (poffset
, 0))
8663 offset
= size_int (poffset
);
8666 tree vec_offset
= NULL_TREE
;
8667 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8669 aggr_type
= NULL_TREE
;
8672 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8674 aggr_type
= elem_type
;
8676 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, gsi
, &gs_info
,
8677 &bump
, &vec_offset
, loop_lens
);
8681 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8682 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8684 aggr_type
= vectype
;
8685 bump
= vect_get_data_ptr_increment (vinfo
, gsi
, dr_info
, aggr_type
,
8686 memory_access_type
, loop_lens
);
8689 if (mask
&& !costing_p
)
8690 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8692 /* In case the vectorization factor (VF) is bigger than the number
8693 of elements that we can fit in a vectype (nunits), we have to generate
8694 more than one vector stmt - i.e - we need to "unroll" the
8695 vector stmt by a factor VF/nunits. */
8697 /* In case of interleaving (non-unit grouped access):
8704 We create vectorized stores starting from base address (the access of the
8705 first stmt in the chain (S2 in the above example), when the last store stmt
8706 of the chain (S4) is reached:
8709 VS2: &base + vec_size*1 = vx0
8710 VS3: &base + vec_size*2 = vx1
8711 VS4: &base + vec_size*3 = vx3
8713 Then permutation statements are generated:
8715 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8716 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8719 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8720 (the order of the data-refs in the output of vect_permute_store_chain
8721 corresponds to the order of scalar stmts in the interleaving chain - see
8722 the documentation of vect_permute_store_chain()).
8724 In case of both multiple types and interleaving, above vector stores and
8725 permutation stmts are created for every copy. The result vector stmts are
8726 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8727 STMT_VINFO_RELATED_STMT for the next copies.
8730 auto_vec
<tree
> dr_chain (group_size
);
8731 auto_vec
<tree
> vec_masks
;
8732 tree vec_mask
= NULL
;
8733 auto_delete_vec
<auto_vec
<tree
>> gvec_oprnds (group_size
);
8734 for (i
= 0; i
< group_size
; i
++)
8735 gvec_oprnds
.quick_push (new auto_vec
<tree
> (ncopies
));
8737 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8739 gcc_assert (!slp
&& grouped_store
);
8740 unsigned inside_cost
= 0, prologue_cost
= 0;
8741 /* For costing some adjacent vector stores, we'd like to cost with
8742 the total number of them once instead of cost each one by one. */
8743 unsigned int n_adjacent_stores
= 0;
8744 for (j
= 0; j
< ncopies
; j
++)
8749 /* For interleaved stores we collect vectorized defs for all
8750 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8751 as an input to vect_permute_store_chain(). */
8752 stmt_vec_info next_stmt_info
= first_stmt_info
;
8753 for (i
= 0; i
< group_size
; i
++)
8755 /* Since gaps are not supported for interleaved stores,
8756 DR_GROUP_SIZE is the exact number of stmts in the
8757 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8758 op
= vect_get_store_rhs (next_stmt_info
);
8760 update_prologue_cost (&prologue_cost
, op
);
8763 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
8766 vec_oprnd
= (*gvec_oprnds
[i
])[0];
8767 dr_chain
.quick_push (vec_oprnd
);
8769 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8776 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
8779 vec_mask
= vec_masks
[0];
8782 /* We should have catched mismatched types earlier. */
8784 useless_type_conversion_p (vectype
, TREE_TYPE (vec_oprnd
)));
8786 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
,
8787 aggr_type
, NULL
, offset
, &dummy
,
8788 gsi
, &ptr_incr
, false, bump
);
8791 else if (!costing_p
)
8793 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
8794 /* DR_CHAIN is then used as an input to
8795 vect_permute_store_chain(). */
8796 for (i
= 0; i
< group_size
; i
++)
8798 vec_oprnd
= (*gvec_oprnds
[i
])[j
];
8799 dr_chain
[i
] = vec_oprnd
;
8802 vec_mask
= vec_masks
[j
];
8803 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8809 n_adjacent_stores
+= vec_num
;
8813 /* Get an array into which we can store the individual vectors. */
8814 tree vec_array
= create_vector_array (vectype
, vec_num
);
8816 /* Invalidate the current contents of VEC_ARRAY. This should
8817 become an RTL clobber too, which prevents the vector registers
8818 from being upward-exposed. */
8819 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8821 /* Store the individual vectors into the array. */
8822 for (i
= 0; i
< vec_num
; i
++)
8824 vec_oprnd
= dr_chain
[i
];
8825 write_vector_array (vinfo
, stmt_info
, gsi
, vec_oprnd
, vec_array
,
8829 tree final_mask
= NULL
;
8830 tree final_len
= NULL
;
8833 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
8834 ncopies
, vectype
, j
);
8836 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
8839 if (lanes_ifn
== IFN_MASK_LEN_STORE_LANES
)
8842 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
8843 ncopies
, vectype
, j
, 1);
8845 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
8847 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
8848 bias
= build_int_cst (intQI_type_node
, biasval
);
8851 mask_vectype
= truth_type_for (vectype
);
8852 final_mask
= build_minus_one_cst (mask_vectype
);
8857 if (final_len
&& final_mask
)
8860 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8861 LEN, BIAS, VEC_ARRAY). */
8862 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8863 tree alias_ptr
= build_int_cst (ref_type
, align
);
8864 call
= gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES
, 6,
8865 dataref_ptr
, alias_ptr
,
8866 final_mask
, final_len
, bias
,
8869 else if (final_mask
)
8872 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8874 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8875 tree alias_ptr
= build_int_cst (ref_type
, align
);
8876 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8877 dataref_ptr
, alias_ptr
,
8878 final_mask
, vec_array
);
8883 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8884 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8885 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
8886 gimple_call_set_lhs (call
, data_ref
);
8888 gimple_call_set_nothrow (call
, true);
8889 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8892 /* Record that VEC_ARRAY is now dead. */
8893 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8895 *vec_stmt
= new_stmt
;
8896 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8901 if (n_adjacent_stores
> 0)
8902 vect_get_store_cost (vinfo
, stmt_info
, n_adjacent_stores
,
8903 alignment_support_scheme
, misalignment
,
8904 &inside_cost
, cost_vec
);
8905 if (dump_enabled_p ())
8906 dump_printf_loc (MSG_NOTE
, vect_location
,
8907 "vect_model_store_cost: inside_cost = %d, "
8908 "prologue_cost = %d .\n",
8909 inside_cost
, prologue_cost
);
8915 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8917 gcc_assert (!grouped_store
);
8918 auto_vec
<tree
> vec_offsets
;
8919 unsigned int inside_cost
= 0, prologue_cost
= 0;
8920 for (j
= 0; j
< ncopies
; j
++)
8925 if (costing_p
&& vls_type
== VLS_STORE_INVARIANT
)
8926 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
8927 stmt_info
, 0, vect_prologue
);
8928 else if (!costing_p
)
8930 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
8931 DR_CHAIN is of size 1. */
8932 gcc_assert (group_size
== 1);
8934 vect_get_slp_defs (op_node
, gvec_oprnds
[0]);
8936 vect_get_vec_defs_for_operand (vinfo
, first_stmt_info
,
8937 ncopies
, op
, gvec_oprnds
[0]);
8941 vect_get_slp_defs (mask_node
, &vec_masks
);
8943 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
8949 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8950 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
8952 &dataref_ptr
, &vec_offsets
);
8955 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
,
8956 aggr_type
, NULL
, offset
,
8957 &dummy
, gsi
, &ptr_incr
, false,
8961 else if (!costing_p
)
8963 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
8964 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8965 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
8966 gsi
, stmt_info
, bump
);
8970 for (i
= 0; i
< vec_num
; ++i
)
8974 vec_oprnd
= (*gvec_oprnds
[0])[vec_num
* j
+ i
];
8976 vec_mask
= vec_masks
[vec_num
* j
+ i
];
8977 /* We should have catched mismatched types earlier. */
8978 gcc_assert (useless_type_conversion_p (vectype
,
8979 TREE_TYPE (vec_oprnd
)));
8981 unsigned HOST_WIDE_INT align
;
8982 tree final_mask
= NULL_TREE
;
8983 tree final_len
= NULL_TREE
;
8984 tree bias
= NULL_TREE
;
8988 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
,
8989 loop_masks
, ncopies
,
8992 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
8993 final_mask
, vec_mask
, gsi
);
8996 if (gs_info
.ifn
!= IFN_LAST
)
9000 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
9002 += record_stmt_cost (cost_vec
, cnunits
, scalar_store
,
9003 stmt_info
, 0, vect_body
);
9007 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9008 vec_offset
= vec_offsets
[vec_num
* j
+ i
];
9009 tree scale
= size_int (gs_info
.scale
);
9011 if (gs_info
.ifn
== IFN_MASK_LEN_SCATTER_STORE
)
9014 final_len
= vect_get_loop_len (loop_vinfo
, gsi
,
9018 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9020 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
9021 bias
= build_int_cst (intQI_type_node
, biasval
);
9024 mask_vectype
= truth_type_for (vectype
);
9025 final_mask
= build_minus_one_cst (mask_vectype
);
9030 if (final_len
&& final_mask
)
9031 call
= gimple_build_call_internal
9032 (IFN_MASK_LEN_SCATTER_STORE
, 7, dataref_ptr
,
9033 vec_offset
, scale
, vec_oprnd
, final_mask
,
9035 else if (final_mask
)
9036 call
= gimple_build_call_internal
9037 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
,
9038 vec_offset
, scale
, vec_oprnd
, final_mask
);
9040 call
= gimple_build_call_internal (IFN_SCATTER_STORE
, 4,
9041 dataref_ptr
, vec_offset
,
9043 gimple_call_set_nothrow (call
, true);
9044 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9047 else if (gs_info
.decl
)
9049 /* The builtin decls path for scatter is legacy, x86 only. */
9050 gcc_assert (nunits
.is_constant ()
9052 || SCALAR_INT_MODE_P
9053 (TYPE_MODE (TREE_TYPE (final_mask
)))));
9056 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
9058 += record_stmt_cost (cost_vec
, cnunits
, scalar_store
,
9059 stmt_info
, 0, vect_body
);
9062 poly_uint64 offset_nunits
9063 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
9064 if (known_eq (nunits
, offset_nunits
))
9066 new_stmt
= vect_build_one_scatter_store_call
9067 (vinfo
, stmt_info
, gsi
, &gs_info
,
9068 dataref_ptr
, vec_offsets
[vec_num
* j
+ i
],
9069 vec_oprnd
, final_mask
);
9070 vect_finish_stmt_generation (vinfo
, stmt_info
,
9073 else if (known_eq (nunits
, offset_nunits
* 2))
9075 /* We have a offset vector with half the number of
9076 lanes but the builtins will store full vectype
9077 data from the lower lanes. */
9078 new_stmt
= vect_build_one_scatter_store_call
9079 (vinfo
, stmt_info
, gsi
, &gs_info
,
9081 vec_offsets
[2 * vec_num
* j
+ 2 * i
],
9082 vec_oprnd
, final_mask
);
9083 vect_finish_stmt_generation (vinfo
, stmt_info
,
9085 int count
= nunits
.to_constant ();
9086 vec_perm_builder
sel (count
, count
, 1);
9087 sel
.quick_grow (count
);
9088 for (int i
= 0; i
< count
; ++i
)
9089 sel
[i
] = i
| (count
/ 2);
9090 vec_perm_indices
indices (sel
, 2, count
);
9092 = vect_gen_perm_mask_checked (vectype
, indices
);
9093 new_stmt
= gimple_build_assign (NULL_TREE
, VEC_PERM_EXPR
,
9094 vec_oprnd
, vec_oprnd
,
9096 vec_oprnd
= make_ssa_name (vectype
);
9097 gimple_set_lhs (new_stmt
, vec_oprnd
);
9098 vect_finish_stmt_generation (vinfo
, stmt_info
,
9102 new_stmt
= gimple_build_assign (NULL_TREE
,
9105 final_mask
= make_ssa_name
9106 (truth_type_for (gs_info
.offset_vectype
));
9107 gimple_set_lhs (new_stmt
, final_mask
);
9108 vect_finish_stmt_generation (vinfo
, stmt_info
,
9111 new_stmt
= vect_build_one_scatter_store_call
9112 (vinfo
, stmt_info
, gsi
, &gs_info
,
9114 vec_offsets
[2 * vec_num
* j
+ 2 * i
+ 1],
9115 vec_oprnd
, final_mask
);
9116 vect_finish_stmt_generation (vinfo
, stmt_info
,
9119 else if (known_eq (nunits
* 2, offset_nunits
))
9121 /* We have a offset vector with double the number of
9122 lanes. Select the low/high part accordingly. */
9123 vec_offset
= vec_offsets
[(vec_num
* j
+ i
) / 2];
9124 if ((vec_num
* j
+ i
) & 1)
9126 int count
= offset_nunits
.to_constant ();
9127 vec_perm_builder
sel (count
, count
, 1);
9128 sel
.quick_grow (count
);
9129 for (int i
= 0; i
< count
; ++i
)
9130 sel
[i
] = i
| (count
/ 2);
9131 vec_perm_indices
indices (sel
, 2, count
);
9132 tree perm_mask
= vect_gen_perm_mask_checked
9133 (TREE_TYPE (vec_offset
), indices
);
9134 new_stmt
= gimple_build_assign (NULL_TREE
,
9139 vec_offset
= make_ssa_name (TREE_TYPE (vec_offset
));
9140 gimple_set_lhs (new_stmt
, vec_offset
);
9141 vect_finish_stmt_generation (vinfo
, stmt_info
,
9144 new_stmt
= vect_build_one_scatter_store_call
9145 (vinfo
, stmt_info
, gsi
, &gs_info
,
9146 dataref_ptr
, vec_offset
,
9147 vec_oprnd
, final_mask
);
9148 vect_finish_stmt_generation (vinfo
, stmt_info
,
9156 /* Emulated scatter. */
9157 gcc_assert (!final_mask
);
9160 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
9161 /* For emulated scatter N offset vector element extracts
9162 (we assume the scalar scaling and ptr + offset add is
9163 consumed by the load). */
9165 += record_stmt_cost (cost_vec
, cnunits
, vec_to_scalar
,
9166 stmt_info
, 0, vect_body
);
9167 /* N scalar stores plus extracting the elements. */
9169 += record_stmt_cost (cost_vec
, cnunits
, vec_to_scalar
,
9170 stmt_info
, 0, vect_body
);
9172 += record_stmt_cost (cost_vec
, cnunits
, scalar_store
,
9173 stmt_info
, 0, vect_body
);
9177 unsigned HOST_WIDE_INT const_nunits
= nunits
.to_constant ();
9178 unsigned HOST_WIDE_INT const_offset_nunits
9179 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
).to_constant ();
9180 vec
<constructor_elt
, va_gc
> *ctor_elts
;
9181 vec_alloc (ctor_elts
, const_nunits
);
9182 gimple_seq stmts
= NULL
;
9183 tree elt_type
= TREE_TYPE (vectype
);
9184 unsigned HOST_WIDE_INT elt_size
9185 = tree_to_uhwi (TYPE_SIZE (elt_type
));
9186 /* We support offset vectors with more elements
9187 than the data vector for now. */
9188 unsigned HOST_WIDE_INT factor
9189 = const_offset_nunits
/ const_nunits
;
9190 vec_offset
= vec_offsets
[(vec_num
* j
+ i
) / factor
];
9191 unsigned elt_offset
= (j
% factor
) * const_nunits
;
9192 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
9193 tree scale
= size_int (gs_info
.scale
);
9194 align
= get_object_alignment (DR_REF (first_dr_info
->dr
));
9195 tree ltype
= build_aligned_type (TREE_TYPE (vectype
), align
);
9196 for (unsigned k
= 0; k
< const_nunits
; ++k
)
9198 /* Compute the offsetted pointer. */
9199 tree boff
= size_binop (MULT_EXPR
, TYPE_SIZE (idx_type
),
9200 bitsize_int (k
+ elt_offset
));
9202 = gimple_build (&stmts
, BIT_FIELD_REF
, idx_type
,
9203 vec_offset
, TYPE_SIZE (idx_type
), boff
);
9204 idx
= gimple_convert (&stmts
, sizetype
, idx
);
9205 idx
= gimple_build (&stmts
, MULT_EXPR
, sizetype
,
9208 = gimple_build (&stmts
, PLUS_EXPR
,
9209 TREE_TYPE (dataref_ptr
),
9211 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
9212 /* Extract the element to be stored. */
9214 = gimple_build (&stmts
, BIT_FIELD_REF
,
9215 TREE_TYPE (vectype
),
9216 vec_oprnd
, TYPE_SIZE (elt_type
),
9217 bitsize_int (k
* elt_size
));
9218 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
9221 = build2 (MEM_REF
, ltype
, ptr
,
9222 build_int_cst (ref_type
, 0));
9223 new_stmt
= gimple_build_assign (ref
, elt
);
9224 vect_finish_stmt_generation (vinfo
, stmt_info
,
9228 slp_node
->push_vec_def (new_stmt
);
9231 if (!slp
&& !costing_p
)
9232 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9235 if (!slp
&& !costing_p
)
9236 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9238 if (costing_p
&& dump_enabled_p ())
9239 dump_printf_loc (MSG_NOTE
, vect_location
,
9240 "vect_model_store_cost: inside_cost = %d, "
9241 "prologue_cost = %d .\n",
9242 inside_cost
, prologue_cost
);
9247 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS
9248 || memory_access_type
== VMAT_CONTIGUOUS_DOWN
9249 || memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
9250 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
);
9252 unsigned inside_cost
= 0, prologue_cost
= 0;
9253 /* For costing some adjacent vector stores, we'd like to cost with
9254 the total number of them once instead of cost each one by one. */
9255 unsigned int n_adjacent_stores
= 0;
9256 auto_vec
<tree
> result_chain (group_size
);
9257 auto_vec
<tree
, 1> vec_oprnds
;
9258 for (j
= 0; j
< ncopies
; j
++)
9263 if (slp
&& !costing_p
)
9265 /* Get vectorized arguments for SLP_NODE. */
9266 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1, op
,
9267 &vec_oprnds
, mask
, &vec_masks
);
9268 vec_oprnd
= vec_oprnds
[0];
9270 vec_mask
= vec_masks
[0];
9274 /* For interleaved stores we collect vectorized defs for all the
9275 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9276 input to vect_permute_store_chain().
9278 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9280 stmt_vec_info next_stmt_info
= first_stmt_info
;
9281 for (i
= 0; i
< group_size
; i
++)
9283 /* Since gaps are not supported for interleaved stores,
9284 DR_GROUP_SIZE is the exact number of stmts in the chain.
9285 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9286 that there is no interleaving, DR_GROUP_SIZE is 1,
9287 and only one iteration of the loop will be executed. */
9288 op
= vect_get_store_rhs (next_stmt_info
);
9290 update_prologue_cost (&prologue_cost
, op
);
9293 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
9296 vec_oprnd
= (*gvec_oprnds
[i
])[0];
9297 dr_chain
.quick_push (vec_oprnd
);
9299 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9301 if (mask
&& !costing_p
)
9303 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
9306 vec_mask
= vec_masks
[0];
9310 /* We should have catched mismatched types earlier. */
9311 gcc_assert (costing_p
9312 || useless_type_conversion_p (vectype
,
9313 TREE_TYPE (vec_oprnd
)));
9314 bool simd_lane_access_p
9315 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9317 && simd_lane_access_p
9319 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9320 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9321 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9322 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9323 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9324 get_alias_set (TREE_TYPE (ref_type
))))
9326 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9327 dataref_offset
= build_int_cst (ref_type
, 0);
9329 else if (!costing_p
)
9331 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9332 simd_lane_access_p
? loop
: NULL
,
9333 offset
, &dummy
, gsi
, &ptr_incr
,
9334 simd_lane_access_p
, bump
);
9336 else if (!costing_p
)
9338 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
9339 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9340 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9342 for (i
= 0; i
< group_size
; i
++)
9344 vec_oprnd
= (*gvec_oprnds
[i
])[j
];
9345 dr_chain
[i
] = vec_oprnd
;
9348 vec_mask
= vec_masks
[j
];
9350 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
9352 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9360 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
);
9363 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
9364 int nstmts
= ceil_log2 (group_size
) * group_size
;
9365 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
9366 stmt_info
, 0, vect_body
);
9367 if (dump_enabled_p ())
9368 dump_printf_loc (MSG_NOTE
, vect_location
,
9369 "vect_model_store_cost: "
9370 "strided group_size = %d .\n",
9374 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
9375 gsi
, &result_chain
);
9378 stmt_vec_info next_stmt_info
= first_stmt_info
;
9379 for (i
= 0; i
< vec_num
; i
++)
9384 vec_oprnd
= vec_oprnds
[i
];
9385 else if (grouped_store
)
9386 /* For grouped stores vectorized defs are interleaved in
9387 vect_permute_store_chain(). */
9388 vec_oprnd
= result_chain
[i
];
9391 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9394 inside_cost
+= record_stmt_cost (cost_vec
, 1, vec_perm
,
9395 stmt_info
, 0, vect_body
);
9398 tree perm_mask
= perm_mask_for_reverse (vectype
);
9399 tree perm_dest
= vect_create_destination_var (
9400 vect_get_store_rhs (stmt_info
), vectype
);
9401 tree new_temp
= make_ssa_name (perm_dest
);
9403 /* Generate the permute statement. */
9405 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
9406 vec_oprnd
, perm_mask
);
9407 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
,
9410 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9411 vec_oprnd
= new_temp
;
9417 n_adjacent_stores
++;
9421 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9422 if (!next_stmt_info
)
9429 tree final_mask
= NULL_TREE
;
9430 tree final_len
= NULL_TREE
;
9431 tree bias
= NULL_TREE
;
9433 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
9434 vec_num
* ncopies
, vectype
,
9436 if (slp
&& vec_mask
)
9437 vec_mask
= vec_masks
[i
];
9439 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
9443 /* Bump the vector pointer. */
9444 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9448 unsigned HOST_WIDE_INT align
;
9449 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9450 if (alignment_support_scheme
== dr_aligned
)
9452 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
9454 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
9458 misalign
= misalignment
;
9459 if (dataref_offset
== NULL_TREE
9460 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9461 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
9463 align
= least_bit_hwi (misalign
| align
);
9465 /* Compute IFN when LOOP_LENS or final_mask valid. */
9466 machine_mode vmode
= TYPE_MODE (vectype
);
9467 machine_mode new_vmode
= vmode
;
9468 internal_fn partial_ifn
= IFN_LAST
;
9471 opt_machine_mode new_ovmode
9472 = get_len_load_store_mode (vmode
, false, &partial_ifn
);
9473 new_vmode
= new_ovmode
.require ();
9475 = (new_ovmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vmode
);
9476 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
9477 vec_num
* ncopies
, vectype
,
9478 vec_num
* j
+ i
, factor
);
9480 else if (final_mask
)
9482 if (!can_vec_mask_load_store_p (
9483 vmode
, TYPE_MODE (TREE_TYPE (final_mask
)), false,
9488 if (partial_ifn
== IFN_MASK_LEN_STORE
)
9492 /* Pass VF value to 'len' argument of
9493 MASK_LEN_STORE if LOOP_LENS is invalid. */
9494 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9498 /* Pass all ones value to 'mask' argument of
9499 MASK_LEN_STORE if final_mask is invalid. */
9500 mask_vectype
= truth_type_for (vectype
);
9501 final_mask
= build_minus_one_cst (mask_vectype
);
9507 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
9509 bias
= build_int_cst (intQI_type_node
, biasval
);
9512 /* Arguments are ready. Create the new vector stmt. */
9516 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
9517 /* Need conversion if it's wrapped with VnQI. */
9518 if (vmode
!= new_vmode
)
9521 = build_vector_type_for_mode (unsigned_intQI_type_node
,
9523 tree var
= vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
9524 vec_oprnd
= build1 (VIEW_CONVERT_EXPR
, new_vtype
, vec_oprnd
);
9526 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, vec_oprnd
);
9527 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9531 if (partial_ifn
== IFN_MASK_LEN_STORE
)
9532 call
= gimple_build_call_internal (IFN_MASK_LEN_STORE
, 6,
9533 dataref_ptr
, ptr
, final_mask
,
9534 final_len
, bias
, vec_oprnd
);
9536 call
= gimple_build_call_internal (IFN_LEN_STORE
, 5,
9537 dataref_ptr
, ptr
, final_len
,
9539 gimple_call_set_nothrow (call
, true);
9540 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9543 else if (final_mask
)
9545 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
9547 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
9548 ptr
, final_mask
, vec_oprnd
);
9549 gimple_call_set_nothrow (call
, true);
9550 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9556 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
9557 dataref_offset
? dataref_offset
9558 : build_int_cst (ref_type
, 0));
9559 if (alignment_support_scheme
== dr_aligned
)
9562 TREE_TYPE (data_ref
)
9563 = build_aligned_type (TREE_TYPE (data_ref
),
9564 align
* BITS_PER_UNIT
);
9565 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9566 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
9567 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9573 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9574 if (!next_stmt_info
)
9577 if (!slp
&& !costing_p
)
9580 *vec_stmt
= new_stmt
;
9581 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9587 if (n_adjacent_stores
> 0)
9588 vect_get_store_cost (vinfo
, stmt_info
, n_adjacent_stores
,
9589 alignment_support_scheme
, misalignment
,
9590 &inside_cost
, cost_vec
);
9592 /* When vectorizing a store into the function result assign
9593 a penalty if the function returns in a multi-register location.
9594 In this case we assume we'll end up with having to spill the
9595 vector result and do piecewise loads as a conservative estimate. */
9596 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
9598 && (TREE_CODE (base
) == RESULT_DECL
9599 || (DECL_P (base
) && cfun_returns (base
)))
9600 && !aggregate_value_p (base
, cfun
->decl
))
9602 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
9603 /* ??? Handle PARALLEL in some way. */
9606 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
9607 /* Assume that a single reg-reg move is possible and cheap,
9608 do not account for vector to gp register move cost. */
9613 += record_stmt_cost (cost_vec
, ncopies
, vector_store
,
9614 stmt_info
, 0, vect_epilogue
);
9617 += record_stmt_cost (cost_vec
, ncopies
* nregs
, scalar_load
,
9618 stmt_info
, 0, vect_epilogue
);
9622 if (dump_enabled_p ())
9623 dump_printf_loc (MSG_NOTE
, vect_location
,
9624 "vect_model_store_cost: inside_cost = %d, "
9625 "prologue_cost = %d .\n",
9626 inside_cost
, prologue_cost
);
9632 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9633 VECTOR_CST mask. No checks are made that the target platform supports the
9634 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9635 vect_gen_perm_mask_checked. */
9638 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
9642 poly_uint64 nunits
= sel
.length ();
9643 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
9645 mask_type
= build_vector_type (ssizetype
, nunits
);
9646 return vec_perm_indices_to_tree (mask_type
, sel
);
9649 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9650 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9653 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
9655 machine_mode vmode
= TYPE_MODE (vectype
);
9656 gcc_assert (can_vec_perm_const_p (vmode
, vmode
, sel
));
9657 return vect_gen_perm_mask_any (vectype
, sel
);
9660 /* Given a vector variable X and Y, that was generated for the scalar
9661 STMT_INFO, generate instructions to permute the vector elements of X and Y
9662 using permutation mask MASK_VEC, insert them at *GSI and return the
9663 permuted vector variable. */
9666 permute_vec_elements (vec_info
*vinfo
,
9667 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
9668 gimple_stmt_iterator
*gsi
)
9670 tree vectype
= TREE_TYPE (x
);
9671 tree perm_dest
, data_ref
;
9674 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
9675 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
9676 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9678 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
9679 data_ref
= make_ssa_name (perm_dest
);
9681 /* Generate the permute statement. */
9682 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
9683 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
9688 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9689 inserting them on the loops preheader edge. Returns true if we
9690 were successful in doing so (and thus STMT_INFO can be moved then),
9691 otherwise returns false. HOIST_P indicates if we want to hoist the
9692 definitions of all SSA uses, it would be false when we are costing. */
9695 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
, bool hoist_p
)
9701 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
9703 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
9704 if (!gimple_nop_p (def_stmt
)
9705 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
9707 /* Make sure we don't need to recurse. While we could do
9708 so in simple cases when there are more complex use webs
9709 we don't have an easy way to preserve stmt order to fulfil
9710 dependencies within them. */
9713 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
9715 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
9717 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
9718 if (!gimple_nop_p (def_stmt2
)
9719 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
9732 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
9734 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
9735 if (!gimple_nop_p (def_stmt
)
9736 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
9738 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
9739 gsi_remove (&gsi
, false);
9740 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
9747 /* vectorizable_load.
9749 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9750 that can be vectorized.
9751 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9752 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9753 Return true if STMT_INFO is vectorizable in this way. */
9756 vectorizable_load (vec_info
*vinfo
,
9757 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9758 gimple
**vec_stmt
, slp_tree slp_node
,
9759 stmt_vector_for_cost
*cost_vec
)
9762 tree vec_dest
= NULL
;
9763 tree data_ref
= NULL
;
9764 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
9765 class loop
*loop
= NULL
;
9766 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
9767 bool nested_in_vect_loop
= false;
9769 /* Avoid false positive uninitialized warning, see PR110652. */
9770 tree new_temp
= NULL_TREE
;
9773 tree dataref_ptr
= NULL_TREE
;
9774 tree dataref_offset
= NULL_TREE
;
9775 gimple
*ptr_incr
= NULL
;
9778 unsigned int group_size
;
9779 poly_uint64 group_gap_adj
;
9780 tree msq
= NULL_TREE
, lsq
;
9781 tree realignment_token
= NULL_TREE
;
9783 vec
<tree
> dr_chain
= vNULL
;
9784 bool grouped_load
= false;
9785 stmt_vec_info first_stmt_info
;
9786 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
9787 bool compute_in_loop
= false;
9788 class loop
*at_loop
;
9790 bool slp
= (slp_node
!= NULL
);
9791 bool slp_perm
= false;
9792 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
9795 gather_scatter_info gs_info
;
9797 enum vect_def_type mask_dt
= vect_unknown_def_type
;
9799 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9802 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
9806 if (!STMT_VINFO_DATA_REF (stmt_info
))
9809 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
9810 int mask_index
= -1;
9811 slp_tree slp_op
= NULL
;
9812 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
9814 scalar_dest
= gimple_assign_lhs (assign
);
9815 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
9818 tree_code code
= gimple_assign_rhs_code (assign
);
9819 if (code
!= ARRAY_REF
9820 && code
!= BIT_FIELD_REF
9821 && code
!= INDIRECT_REF
9822 && code
!= COMPONENT_REF
9823 && code
!= IMAGPART_EXPR
9824 && code
!= REALPART_EXPR
9826 && TREE_CODE_CLASS (code
) != tcc_declaration
)
9831 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
9832 if (!call
|| !gimple_call_internal_p (call
))
9835 internal_fn ifn
= gimple_call_internal_fn (call
);
9836 if (!internal_load_fn_p (ifn
))
9839 scalar_dest
= gimple_call_lhs (call
);
9843 mask_index
= internal_fn_mask_index (ifn
);
9844 if (mask_index
>= 0 && slp_node
)
9845 mask_index
= vect_slp_child_index_for_operand
9846 (call
, mask_index
, STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
9848 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
9849 &mask
, &slp_op
, &mask_dt
, &mask_vectype
))
9853 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9854 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
9858 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
9859 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
9860 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
9865 /* Multiple types in SLP are handled by creating the appropriate number of
9866 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9871 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9873 gcc_assert (ncopies
>= 1);
9875 /* FORNOW. This restriction should be relaxed. */
9876 if (nested_in_vect_loop
&& ncopies
> 1)
9878 if (dump_enabled_p ())
9879 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9880 "multiple types in nested loop.\n");
9884 /* Invalidate assumptions made by dependence analysis when vectorization
9885 on the unrolled body effectively re-orders stmts. */
9887 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
9888 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
9889 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
9891 if (dump_enabled_p ())
9892 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9893 "cannot perform implicit CSE when unrolling "
9894 "with negative dependence distance\n");
9898 elem_type
= TREE_TYPE (vectype
);
9899 mode
= TYPE_MODE (vectype
);
9901 /* FORNOW. In some cases can vectorize even if data-type not supported
9902 (e.g. - data copies). */
9903 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
9905 if (dump_enabled_p ())
9906 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9907 "Aligned load, but unsupported type.\n");
9911 /* Check if the load is a part of an interleaving chain. */
9912 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
9914 grouped_load
= true;
9916 gcc_assert (!nested_in_vect_loop
);
9917 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
9919 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9920 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9922 /* Refuse non-SLP vectorization of SLP-only groups. */
9923 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
9925 if (dump_enabled_p ())
9926 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9927 "cannot vectorize load in non-SLP mode.\n");
9931 /* Invalidate assumptions made by dependence analysis when vectorization
9932 on the unrolled body effectively re-orders stmts. */
9933 if (!PURE_SLP_STMT (stmt_info
)
9934 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
9935 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
9936 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
9938 if (dump_enabled_p ())
9939 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9940 "cannot perform implicit CSE when performing "
9941 "group loads with negative dependence distance\n");
9948 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9954 /* In BB vectorization we may not actually use a loaded vector
9955 accessing elements in excess of DR_GROUP_SIZE. */
9956 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9957 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
9958 unsigned HOST_WIDE_INT nunits
;
9959 unsigned j
, k
, maxk
= 0;
9960 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
9963 tree vectype
= SLP_TREE_VECTYPE (slp_node
);
9964 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
9965 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
9967 if (dump_enabled_p ())
9968 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9969 "BB vectorization with gaps at the end of "
9970 "a load is not supported\n");
9977 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
9980 if (dump_enabled_p ())
9981 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
9983 "unsupported load permutation\n");
9988 vect_memory_access_type memory_access_type
;
9989 enum dr_alignment_support alignment_support_scheme
;
9992 internal_fn lanes_ifn
;
9993 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, VLS_LOAD
,
9994 ncopies
, &memory_access_type
, &poffset
,
9995 &alignment_support_scheme
, &misalignment
, &gs_info
,
10001 if (memory_access_type
== VMAT_CONTIGUOUS
)
10003 machine_mode vec_mode
= TYPE_MODE (vectype
);
10004 if (!VECTOR_MODE_P (vec_mode
)
10005 || !can_vec_mask_load_store_p (vec_mode
,
10006 TYPE_MODE (mask_vectype
), true))
10009 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
10010 && memory_access_type
!= VMAT_GATHER_SCATTER
)
10012 if (dump_enabled_p ())
10013 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10014 "unsupported access type for masked load.\n");
10017 else if (memory_access_type
== VMAT_GATHER_SCATTER
10018 && gs_info
.ifn
== IFN_LAST
10021 if (dump_enabled_p ())
10022 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10023 "unsupported masked emulated gather.\n");
10028 bool costing_p
= !vec_stmt
;
10030 if (costing_p
) /* transformation not required. */
10034 && !vect_maybe_update_slp_op_vectype (slp_op
,
10037 if (dump_enabled_p ())
10038 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10039 "incompatible vector types for invariants\n");
10044 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
10047 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
10048 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
10049 VLS_LOAD
, group_size
,
10050 memory_access_type
, &gs_info
,
10053 if (dump_enabled_p ()
10054 && memory_access_type
!= VMAT_ELEMENTWISE
10055 && memory_access_type
!= VMAT_GATHER_SCATTER
10056 && alignment_support_scheme
!= dr_aligned
)
10057 dump_printf_loc (MSG_NOTE
, vect_location
,
10058 "Vectorizing an unaligned access.\n");
10060 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
10061 vinfo
->any_known_not_updated_vssa
= true;
10063 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
10067 gcc_assert (memory_access_type
10068 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
10070 if (dump_enabled_p () && !costing_p
)
10071 dump_printf_loc (MSG_NOTE
, vect_location
,
10072 "transform load. ncopies = %d\n", ncopies
);
10076 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
10077 ensure_base_align (dr_info
);
10079 if (memory_access_type
== VMAT_INVARIANT
)
10081 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
10082 /* If we have versioned for aliasing or the loop doesn't
10083 have any data dependencies that would preclude this,
10084 then we are sure this is a loop invariant load and
10085 thus we can insert it on the preheader edge.
10086 TODO: hoist_defs_of_uses should ideally be computed
10087 once at analysis time, remembered and used in the
10089 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
10090 && !nested_in_vect_loop
10091 && hoist_defs_of_uses (stmt_info
, loop
, !costing_p
));
10094 enum vect_cost_model_location cost_loc
10095 = hoist_p
? vect_prologue
: vect_body
;
10096 unsigned int cost
= record_stmt_cost (cost_vec
, 1, scalar_load
,
10097 stmt_info
, 0, cost_loc
);
10098 cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
, stmt_info
, 0,
10100 unsigned int prologue_cost
= hoist_p
? cost
: 0;
10101 unsigned int inside_cost
= hoist_p
? 0 : cost
;
10102 if (dump_enabled_p ())
10103 dump_printf_loc (MSG_NOTE
, vect_location
,
10104 "vect_model_load_cost: inside_cost = %d, "
10105 "prologue_cost = %d .\n",
10106 inside_cost
, prologue_cost
);
10111 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
10112 if (dump_enabled_p ())
10113 dump_printf_loc (MSG_NOTE
, vect_location
,
10114 "hoisting out of the vectorized loop: %G",
10116 scalar_dest
= copy_ssa_name (scalar_dest
);
10117 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
10118 edge pe
= loop_preheader_edge (loop
);
10119 gphi
*vphi
= get_virtual_phi (loop
->header
);
10122 vuse
= PHI_ARG_DEF_FROM_EDGE (vphi
, pe
);
10124 vuse
= gimple_vuse (gsi_stmt (*gsi
));
10125 gimple
*new_stmt
= gimple_build_assign (scalar_dest
, rhs
);
10126 gimple_set_vuse (new_stmt
, vuse
);
10127 gsi_insert_on_edge_immediate (pe
, new_stmt
);
10129 /* These copies are all equivalent. */
10131 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
10135 gimple_stmt_iterator gsi2
= *gsi
;
10137 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
10140 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
10142 for (j
= 0; j
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
); ++j
)
10143 slp_node
->push_vec_def (new_stmt
);
10146 for (j
= 0; j
< ncopies
; ++j
)
10147 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10148 *vec_stmt
= new_stmt
;
10153 if (memory_access_type
== VMAT_ELEMENTWISE
10154 || memory_access_type
== VMAT_STRIDED_SLP
)
10156 gimple_stmt_iterator incr_gsi
;
10161 vec
<constructor_elt
, va_gc
> *v
= NULL
;
10162 tree stride_base
, stride_step
, alias_off
;
10163 /* Checked by get_load_store_type. */
10164 unsigned int const_nunits
= nunits
.to_constant ();
10165 unsigned HOST_WIDE_INT cst_offset
= 0;
10167 unsigned int inside_cost
= 0;
10169 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
));
10170 gcc_assert (!nested_in_vect_loop
);
10174 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
10175 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
10179 first_stmt_info
= stmt_info
;
10180 first_dr_info
= dr_info
;
10183 if (slp
&& grouped_load
)
10185 group_size
= DR_GROUP_SIZE (first_stmt_info
);
10186 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
10192 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
10193 * vect_get_place_in_interleaving_chain (stmt_info
,
10196 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
10201 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
10202 stride_base
= fold_build_pointer_plus (
10203 DR_BASE_ADDRESS (first_dr_info
->dr
),
10204 size_binop (PLUS_EXPR
, convert_to_ptrofftype (dr_offset
),
10205 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
10206 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
10208 /* For a load with loop-invariant (but other than power-of-2)
10209 stride (i.e. not a grouped access) like so:
10211 for (i = 0; i < n; i += stride)
10214 we generate a new induction variable and new accesses to
10215 form a new vector (or vectors, depending on ncopies):
10217 for (j = 0; ; j += VF*stride)
10219 tmp2 = array[j + stride];
10221 vectemp = {tmp1, tmp2, ...}
10224 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
10225 build_int_cst (TREE_TYPE (stride_step
), vf
));
10227 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
10229 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
10230 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
10231 create_iv (stride_base
, PLUS_EXPR
, ivstep
, NULL
,
10232 loop
, &incr_gsi
, insert_after
,
10235 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
10238 running_off
= offvar
;
10239 alias_off
= build_int_cst (ref_type
, 0);
10240 int nloads
= const_nunits
;
10242 tree ltype
= TREE_TYPE (vectype
);
10243 tree lvectype
= vectype
;
10244 auto_vec
<tree
> dr_chain
;
10245 if (memory_access_type
== VMAT_STRIDED_SLP
)
10247 if (group_size
< const_nunits
)
10249 /* First check if vec_init optab supports construction from vector
10250 elts directly. Otherwise avoid emitting a constructor of
10251 vector elements by performing the loads using an integer type
10252 of the same size, constructing a vector of those and then
10253 re-interpreting it as the original vector type. This avoids a
10254 huge runtime penalty due to the general inability to perform
10255 store forwarding from smaller stores to a larger load. */
10258 = vector_vector_composition_type (vectype
,
10259 const_nunits
/ group_size
,
10261 if (vtype
!= NULL_TREE
)
10263 nloads
= const_nunits
/ group_size
;
10272 lnel
= const_nunits
;
10275 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
10277 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10278 else if (nloads
== 1)
10283 /* For SLP permutation support we need to load the whole group,
10284 not only the number of vector stmts the permutation result
10288 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10290 unsigned int const_vf
= vf
.to_constant ();
10291 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
10292 dr_chain
.create (ncopies
);
10295 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10297 unsigned int group_el
= 0;
10298 unsigned HOST_WIDE_INT
10299 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
10300 unsigned int n_groups
= 0;
10301 /* For costing some adjacent vector loads, we'd like to cost with
10302 the total number of them once instead of cost each one by one. */
10303 unsigned int n_adjacent_loads
= 0;
10304 for (j
= 0; j
< ncopies
; j
++)
10306 if (nloads
> 1 && !costing_p
)
10307 vec_alloc (v
, nloads
);
10308 gimple
*new_stmt
= NULL
;
10309 for (i
= 0; i
< nloads
; i
++)
10313 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10314 avoid ICE, see PR110776. */
10315 if (VECTOR_TYPE_P (ltype
)
10316 && memory_access_type
!= VMAT_ELEMENTWISE
)
10317 n_adjacent_loads
++;
10319 inside_cost
+= record_stmt_cost (cost_vec
, 1, scalar_load
,
10320 stmt_info
, 0, vect_body
);
10323 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
10324 group_el
* elsz
+ cst_offset
);
10325 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
10326 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
10327 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
10328 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10330 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
10331 gimple_assign_lhs (new_stmt
));
10335 || group_el
== group_size
)
10338 /* When doing SLP make sure to not load elements from
10339 the next vector iteration, those will not be accessed
10340 so just use the last element again. See PR107451. */
10341 if (!slp
|| known_lt (n_groups
, vf
))
10343 tree newoff
= copy_ssa_name (running_off
);
10345 = gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
10346 running_off
, stride_step
);
10347 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
10348 running_off
= newoff
;
10357 inside_cost
+= record_stmt_cost (cost_vec
, 1, vec_construct
,
10358 stmt_info
, 0, vect_body
);
10361 tree vec_inv
= build_constructor (lvectype
, v
);
10362 new_temp
= vect_init_vector (vinfo
, stmt_info
, vec_inv
,
10364 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
10365 if (lvectype
!= vectype
)
10368 = gimple_build_assign (make_ssa_name (vectype
),
10370 build1 (VIEW_CONVERT_EXPR
,
10371 vectype
, new_temp
));
10372 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
10383 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
10385 slp_node
->push_vec_def (new_stmt
);
10390 *vec_stmt
= new_stmt
;
10391 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10401 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
, vf
,
10402 true, &n_perms
, &n_loads
);
10403 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
10404 first_stmt_info
, 0, vect_body
);
10407 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
10413 if (n_adjacent_loads
> 0)
10414 vect_get_load_cost (vinfo
, stmt_info
, n_adjacent_loads
,
10415 alignment_support_scheme
, misalignment
, false,
10416 &inside_cost
, nullptr, cost_vec
, cost_vec
,
10418 if (dump_enabled_p ())
10419 dump_printf_loc (MSG_NOTE
, vect_location
,
10420 "vect_model_load_cost: inside_cost = %u, "
10421 "prologue_cost = 0 .\n",
10428 if (memory_access_type
== VMAT_GATHER_SCATTER
10429 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
10430 grouped_load
= false;
10433 || (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()))
10437 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
10438 group_size
= DR_GROUP_SIZE (first_stmt_info
);
10442 first_stmt_info
= stmt_info
;
10445 /* For SLP vectorization we directly vectorize a subchain
10446 without permutation. */
10447 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
10448 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
10449 /* For BB vectorization always use the first stmt to base
10450 the data ref pointer on. */
10452 first_stmt_info_for_drptr
10453 = vect_find_first_scalar_stmt_in_slp (slp_node
);
10455 /* Check if the chain of loads is already vectorized. */
10456 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
10457 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10458 ??? But we can only do so if there is exactly one
10459 as we have no way to get at the rest. Leave the CSE
10461 ??? With the group load eventually participating
10462 in multiple different permutations (having multiple
10463 slp nodes which refer to the same group) the CSE
10464 is even wrong code. See PR56270. */
10467 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10470 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
10473 /* VEC_NUM is the number of vect stmts to be created for this group. */
10476 grouped_load
= false;
10477 /* If an SLP permutation is from N elements to N elements,
10478 and if one vector holds a whole number of N, we can load
10479 the inputs to the permutation in the same way as an
10480 unpermuted sequence. In other cases we need to load the
10481 whole group, not only the number of vector stmts the
10482 permutation result fits in. */
10483 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
10485 && (group_size
!= scalar_lanes
10486 || !multiple_p (nunits
, group_size
)))
10488 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10489 variable VF; see vect_transform_slp_perm_load. */
10490 unsigned int const_vf
= vf
.to_constant ();
10491 unsigned int const_nunits
= nunits
.to_constant ();
10492 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
10493 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
10497 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10499 = group_size
- scalar_lanes
;
10503 vec_num
= group_size
;
10505 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
10509 first_stmt_info
= stmt_info
;
10510 first_dr_info
= dr_info
;
10511 group_size
= vec_num
= 1;
10513 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
10515 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10518 gcc_assert (alignment_support_scheme
);
10519 vec_loop_masks
*loop_masks
10520 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
10521 ? &LOOP_VINFO_MASKS (loop_vinfo
)
10523 vec_loop_lens
*loop_lens
10524 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
10525 ? &LOOP_VINFO_LENS (loop_vinfo
)
10528 /* Shouldn't go with length-based approach if fully masked. */
10529 gcc_assert (!loop_lens
|| !loop_masks
);
10531 /* Targets with store-lane instructions must not require explicit
10532 realignment. vect_supportable_dr_alignment always returns either
10533 dr_aligned or dr_unaligned_supported for masked operations. */
10534 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
10537 || alignment_support_scheme
== dr_aligned
10538 || alignment_support_scheme
== dr_unaligned_supported
);
10540 /* In case the vectorization factor (VF) is bigger than the number
10541 of elements that we can fit in a vectype (nunits), we have to generate
10542 more than one vector stmt - i.e - we need to "unroll" the
10543 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10544 from one copy of the vector stmt to the next, in the field
10545 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10546 stages to find the correct vector defs to be used when vectorizing
10547 stmts that use the defs of the current stmt. The example below
10548 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10549 need to create 4 vectorized stmts):
10551 before vectorization:
10552 RELATED_STMT VEC_STMT
10556 step 1: vectorize stmt S1:
10557 We first create the vector stmt VS1_0, and, as usual, record a
10558 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10559 Next, we create the vector stmt VS1_1, and record a pointer to
10560 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10561 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10562 stmts and pointers:
10563 RELATED_STMT VEC_STMT
10564 VS1_0: vx0 = memref0 VS1_1 -
10565 VS1_1: vx1 = memref1 VS1_2 -
10566 VS1_2: vx2 = memref2 VS1_3 -
10567 VS1_3: vx3 = memref3 - -
10568 S1: x = load - VS1_0
10572 /* In case of interleaving (non-unit grouped access):
10579 Vectorized loads are created in the order of memory accesses
10580 starting from the access of the first stmt of the chain:
10583 VS2: vx1 = &base + vec_size*1
10584 VS3: vx3 = &base + vec_size*2
10585 VS4: vx4 = &base + vec_size*3
10587 Then permutation statements are generated:
10589 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10590 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10593 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10594 (the order of the data-refs in the output of vect_permute_load_chain
10595 corresponds to the order of scalar stmts in the interleaving chain - see
10596 the documentation of vect_permute_load_chain()).
10597 The generation of permutation stmts and recording them in
10598 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10600 In case of both multiple types and interleaving, the vector loads and
10601 permutation stmts above are created for every copy. The result vector
10602 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10603 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10605 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10606 on a target that supports unaligned accesses (dr_unaligned_supported)
10607 we generate the following code:
10611 p = p + indx * vectype_size;
10616 Otherwise, the data reference is potentially unaligned on a target that
10617 does not support unaligned accesses (dr_explicit_realign_optimized) -
10618 then generate the following code, in which the data in each iteration is
10619 obtained by two vector loads, one from the previous iteration, and one
10620 from the current iteration:
10622 msq_init = *(floor(p1))
10623 p2 = initial_addr + VS - 1;
10624 realignment_token = call target_builtin;
10627 p2 = p2 + indx * vectype_size
10629 vec_dest = realign_load (msq, lsq, realignment_token)
10634 /* If the misalignment remains the same throughout the execution of the
10635 loop, we can create the init_addr and permutation mask at the loop
10636 preheader. Otherwise, it needs to be created inside the loop.
10637 This can only occur when vectorizing memory accesses in the inner-loop
10638 nested within an outer-loop that is being vectorized. */
10640 if (nested_in_vect_loop
10641 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
10642 GET_MODE_SIZE (TYPE_MODE (vectype
))))
10644 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
10645 compute_in_loop
= true;
10648 bool diff_first_stmt_info
10649 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
10651 tree offset
= NULL_TREE
;
10652 if ((alignment_support_scheme
== dr_explicit_realign_optimized
10653 || alignment_support_scheme
== dr_explicit_realign
)
10654 && !compute_in_loop
)
10656 /* If we have different first_stmt_info, we can't set up realignment
10657 here, since we can't guarantee first_stmt_info DR has been
10658 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10659 distance from first_stmt_info DR instead as below. */
10662 if (!diff_first_stmt_info
)
10663 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
10664 &realignment_token
,
10665 alignment_support_scheme
, NULL_TREE
,
10667 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
10669 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
10670 offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
10672 gcc_assert (!first_stmt_info_for_drptr
);
10679 if (!known_eq (poffset
, 0))
10681 ? size_binop (PLUS_EXPR
, offset
, size_int (poffset
))
10682 : size_int (poffset
));
10685 tree vec_offset
= NULL_TREE
;
10686 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10688 aggr_type
= NULL_TREE
;
10691 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
10693 aggr_type
= elem_type
;
10695 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, gsi
, &gs_info
,
10696 &bump
, &vec_offset
, loop_lens
);
10700 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
10701 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
10703 aggr_type
= vectype
;
10704 bump
= vect_get_data_ptr_increment (vinfo
, gsi
, dr_info
, aggr_type
,
10705 memory_access_type
, loop_lens
);
10708 auto_vec
<tree
> vec_offsets
;
10709 auto_vec
<tree
> vec_masks
;
10710 if (mask
&& !costing_p
)
10713 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[mask_index
],
10716 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
, mask
,
10717 &vec_masks
, mask_vectype
);
10720 tree vec_mask
= NULL_TREE
;
10721 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
10723 gcc_assert (alignment_support_scheme
== dr_aligned
10724 || alignment_support_scheme
== dr_unaligned_supported
);
10725 gcc_assert (grouped_load
&& !slp
);
10727 unsigned int inside_cost
= 0, prologue_cost
= 0;
10728 /* For costing some adjacent vector loads, we'd like to cost with
10729 the total number of them once instead of cost each one by one. */
10730 unsigned int n_adjacent_loads
= 0;
10731 for (j
= 0; j
< ncopies
; j
++)
10735 /* An IFN_LOAD_LANES will load all its vector results,
10736 regardless of which ones we actually need. Account
10737 for the cost of unused results. */
10738 if (first_stmt_info
== stmt_info
)
10740 unsigned int gaps
= DR_GROUP_SIZE (first_stmt_info
);
10741 stmt_vec_info next_stmt_info
= first_stmt_info
;
10745 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
10747 while (next_stmt_info
);
10750 if (dump_enabled_p ())
10751 dump_printf_loc (MSG_NOTE
, vect_location
,
10752 "vect_model_load_cost: %d "
10753 "unused vectors.\n",
10755 vect_get_load_cost (vinfo
, stmt_info
, gaps
,
10756 alignment_support_scheme
,
10757 misalignment
, false, &inside_cost
,
10758 &prologue_cost
, cost_vec
, cost_vec
,
10762 n_adjacent_loads
++;
10766 /* 1. Create the vector or array pointer update chain. */
10769 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
10770 at_loop
, offset
, &dummy
, gsi
,
10771 &ptr_incr
, false, bump
);
10774 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
10775 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
10779 vec_mask
= vec_masks
[j
];
10781 tree vec_array
= create_vector_array (vectype
, vec_num
);
10783 tree final_mask
= NULL_TREE
;
10784 tree final_len
= NULL_TREE
;
10785 tree bias
= NULL_TREE
;
10787 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
10788 ncopies
, vectype
, j
);
10790 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
10793 if (lanes_ifn
== IFN_MASK_LEN_LOAD_LANES
)
10796 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
10797 ncopies
, vectype
, j
, 1);
10799 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
10800 signed char biasval
10801 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
10802 bias
= build_int_cst (intQI_type_node
, biasval
);
10805 mask_vectype
= truth_type_for (vectype
);
10806 final_mask
= build_minus_one_cst (mask_vectype
);
10811 if (final_len
&& final_mask
)
10814 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10815 VEC_MASK, LEN, BIAS). */
10816 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
10817 tree alias_ptr
= build_int_cst (ref_type
, align
);
10818 call
= gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES
, 5,
10819 dataref_ptr
, alias_ptr
,
10820 final_mask
, final_len
, bias
);
10822 else if (final_mask
)
10825 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10827 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
10828 tree alias_ptr
= build_int_cst (ref_type
, align
);
10829 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
10830 dataref_ptr
, alias_ptr
,
10836 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10837 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
10838 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
10840 gimple_call_set_lhs (call
, vec_array
);
10841 gimple_call_set_nothrow (call
, true);
10842 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
10844 dr_chain
.create (vec_num
);
10845 /* Extract each vector into an SSA_NAME. */
10846 for (i
= 0; i
< vec_num
; i
++)
10848 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
10850 dr_chain
.quick_push (new_temp
);
10853 /* Record the mapping between SSA_NAMEs and statements. */
10854 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
10856 /* Record that VEC_ARRAY is now dead. */
10857 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
10859 dr_chain
.release ();
10861 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10866 if (n_adjacent_loads
> 0)
10867 vect_get_load_cost (vinfo
, stmt_info
, n_adjacent_loads
,
10868 alignment_support_scheme
, misalignment
, false,
10869 &inside_cost
, &prologue_cost
, cost_vec
,
10871 if (dump_enabled_p ())
10872 dump_printf_loc (MSG_NOTE
, vect_location
,
10873 "vect_model_load_cost: inside_cost = %u, "
10874 "prologue_cost = %u .\n",
10875 inside_cost
, prologue_cost
);
10881 if (memory_access_type
== VMAT_GATHER_SCATTER
)
10883 gcc_assert (alignment_support_scheme
== dr_aligned
10884 || alignment_support_scheme
== dr_unaligned_supported
);
10885 gcc_assert (!grouped_load
&& !slp_perm
);
10887 unsigned int inside_cost
= 0, prologue_cost
= 0;
10888 for (j
= 0; j
< ncopies
; j
++)
10890 /* 1. Create the vector or array pointer update chain. */
10891 if (j
== 0 && !costing_p
)
10893 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10894 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
10895 slp_node
, &gs_info
, &dataref_ptr
,
10899 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
10900 at_loop
, offset
, &dummy
, gsi
,
10901 &ptr_incr
, false, bump
);
10903 else if (!costing_p
)
10905 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
10906 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10907 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10908 gsi
, stmt_info
, bump
);
10911 gimple
*new_stmt
= NULL
;
10912 for (i
= 0; i
< vec_num
; i
++)
10914 tree final_mask
= NULL_TREE
;
10915 tree final_len
= NULL_TREE
;
10916 tree bias
= NULL_TREE
;
10920 vec_mask
= vec_masks
[vec_num
* j
+ i
];
10923 = vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
10924 vec_num
* ncopies
, vectype
,
10927 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
10928 final_mask
, vec_mask
, gsi
);
10930 if (i
> 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10931 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10932 gsi
, stmt_info
, bump
);
10935 /* 2. Create the vector-load in the loop. */
10936 unsigned HOST_WIDE_INT align
;
10937 if (gs_info
.ifn
!= IFN_LAST
)
10941 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
10943 = record_stmt_cost (cost_vec
, cnunits
, scalar_load
,
10944 stmt_info
, 0, vect_body
);
10947 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10948 vec_offset
= vec_offsets
[vec_num
* j
+ i
];
10949 tree zero
= build_zero_cst (vectype
);
10950 tree scale
= size_int (gs_info
.scale
);
10952 if (gs_info
.ifn
== IFN_MASK_LEN_GATHER_LOAD
)
10956 = vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
10957 vec_num
* ncopies
, vectype
,
10958 vec_num
* j
+ i
, 1);
10961 = build_int_cst (sizetype
,
10962 TYPE_VECTOR_SUBPARTS (vectype
));
10963 signed char biasval
10964 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
10965 bias
= build_int_cst (intQI_type_node
, biasval
);
10968 mask_vectype
= truth_type_for (vectype
);
10969 final_mask
= build_minus_one_cst (mask_vectype
);
10974 if (final_len
&& final_mask
)
10976 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD
, 7,
10977 dataref_ptr
, vec_offset
,
10978 scale
, zero
, final_mask
,
10980 else if (final_mask
)
10981 call
= gimple_build_call_internal (IFN_MASK_GATHER_LOAD
, 5,
10982 dataref_ptr
, vec_offset
,
10983 scale
, zero
, final_mask
);
10985 call
= gimple_build_call_internal (IFN_GATHER_LOAD
, 4,
10986 dataref_ptr
, vec_offset
,
10988 gimple_call_set_nothrow (call
, true);
10990 data_ref
= NULL_TREE
;
10992 else if (gs_info
.decl
)
10994 /* The builtin decls path for gather is legacy, x86 only. */
10995 gcc_assert (!final_len
&& nunits
.is_constant ());
10998 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
11000 = record_stmt_cost (cost_vec
, cnunits
, scalar_load
,
11001 stmt_info
, 0, vect_body
);
11004 poly_uint64 offset_nunits
11005 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
11006 if (known_eq (nunits
, offset_nunits
))
11008 new_stmt
= vect_build_one_gather_load_call
11009 (vinfo
, stmt_info
, gsi
, &gs_info
,
11010 dataref_ptr
, vec_offsets
[vec_num
* j
+ i
],
11012 data_ref
= NULL_TREE
;
11014 else if (known_eq (nunits
, offset_nunits
* 2))
11016 /* We have a offset vector with half the number of
11017 lanes but the builtins will produce full vectype
11018 data with just the lower lanes filled. */
11019 new_stmt
= vect_build_one_gather_load_call
11020 (vinfo
, stmt_info
, gsi
, &gs_info
,
11021 dataref_ptr
, vec_offsets
[2 * vec_num
* j
+ 2 * i
],
11023 tree low
= make_ssa_name (vectype
);
11024 gimple_set_lhs (new_stmt
, low
);
11025 vect_finish_stmt_generation (vinfo
, stmt_info
,
11028 /* now put upper half of final_mask in final_mask low. */
11030 && !SCALAR_INT_MODE_P
11031 (TYPE_MODE (TREE_TYPE (final_mask
))))
11033 int count
= nunits
.to_constant ();
11034 vec_perm_builder
sel (count
, count
, 1);
11035 sel
.quick_grow (count
);
11036 for (int i
= 0; i
< count
; ++i
)
11037 sel
[i
] = i
| (count
/ 2);
11038 vec_perm_indices
indices (sel
, 2, count
);
11039 tree perm_mask
= vect_gen_perm_mask_checked
11040 (TREE_TYPE (final_mask
), indices
);
11041 new_stmt
= gimple_build_assign (NULL_TREE
,
11046 final_mask
= make_ssa_name (TREE_TYPE (final_mask
));
11047 gimple_set_lhs (new_stmt
, final_mask
);
11048 vect_finish_stmt_generation (vinfo
, stmt_info
,
11051 else if (final_mask
)
11053 new_stmt
= gimple_build_assign (NULL_TREE
,
11054 VEC_UNPACK_HI_EXPR
,
11056 final_mask
= make_ssa_name
11057 (truth_type_for (gs_info
.offset_vectype
));
11058 gimple_set_lhs (new_stmt
, final_mask
);
11059 vect_finish_stmt_generation (vinfo
, stmt_info
,
11063 new_stmt
= vect_build_one_gather_load_call
11064 (vinfo
, stmt_info
, gsi
, &gs_info
,
11066 vec_offsets
[2 * vec_num
* j
+ 2 * i
+ 1],
11068 tree high
= make_ssa_name (vectype
);
11069 gimple_set_lhs (new_stmt
, high
);
11070 vect_finish_stmt_generation (vinfo
, stmt_info
,
11073 /* compose low + high. */
11074 int count
= nunits
.to_constant ();
11075 vec_perm_builder
sel (count
, count
, 1);
11076 sel
.quick_grow (count
);
11077 for (int i
= 0; i
< count
; ++i
)
11078 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
11079 vec_perm_indices
indices (sel
, 2, count
);
11081 = vect_gen_perm_mask_checked (vectype
, indices
);
11082 new_stmt
= gimple_build_assign (NULL_TREE
,
11084 low
, high
, perm_mask
);
11085 data_ref
= NULL_TREE
;
11087 else if (known_eq (nunits
* 2, offset_nunits
))
11089 /* We have a offset vector with double the number of
11090 lanes. Select the low/high part accordingly. */
11091 vec_offset
= vec_offsets
[(vec_num
* j
+ i
) / 2];
11092 if ((vec_num
* j
+ i
) & 1)
11094 int count
= offset_nunits
.to_constant ();
11095 vec_perm_builder
sel (count
, count
, 1);
11096 sel
.quick_grow (count
);
11097 for (int i
= 0; i
< count
; ++i
)
11098 sel
[i
] = i
| (count
/ 2);
11099 vec_perm_indices
indices (sel
, 2, count
);
11100 tree perm_mask
= vect_gen_perm_mask_checked
11101 (TREE_TYPE (vec_offset
), indices
);
11102 new_stmt
= gimple_build_assign (NULL_TREE
,
11107 vec_offset
= make_ssa_name (TREE_TYPE (vec_offset
));
11108 gimple_set_lhs (new_stmt
, vec_offset
);
11109 vect_finish_stmt_generation (vinfo
, stmt_info
,
11112 new_stmt
= vect_build_one_gather_load_call
11113 (vinfo
, stmt_info
, gsi
, &gs_info
,
11114 dataref_ptr
, vec_offset
, final_mask
);
11115 data_ref
= NULL_TREE
;
11118 gcc_unreachable ();
11122 /* Emulated gather-scatter. */
11123 gcc_assert (!final_mask
);
11124 unsigned HOST_WIDE_INT const_nunits
= nunits
.to_constant ();
11127 /* For emulated gathers N offset vector element
11128 offset add is consumed by the load). */
11129 inside_cost
= record_stmt_cost (cost_vec
, const_nunits
,
11130 vec_to_scalar
, stmt_info
,
11132 /* N scalar loads plus gathering them into a
11135 = record_stmt_cost (cost_vec
, const_nunits
, scalar_load
,
11136 stmt_info
, 0, vect_body
);
11138 = record_stmt_cost (cost_vec
, 1, vec_construct
,
11139 stmt_info
, 0, vect_body
);
11142 unsigned HOST_WIDE_INT const_offset_nunits
11143 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
)
11145 vec
<constructor_elt
, va_gc
> *ctor_elts
;
11146 vec_alloc (ctor_elts
, const_nunits
);
11147 gimple_seq stmts
= NULL
;
11148 /* We support offset vectors with more elements
11149 than the data vector for now. */
11150 unsigned HOST_WIDE_INT factor
11151 = const_offset_nunits
/ const_nunits
;
11152 vec_offset
= vec_offsets
[(vec_num
* j
+ i
) / factor
];
11153 unsigned elt_offset
= (j
% factor
) * const_nunits
;
11154 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
11155 tree scale
= size_int (gs_info
.scale
);
11156 align
= get_object_alignment (DR_REF (first_dr_info
->dr
));
11157 tree ltype
= build_aligned_type (TREE_TYPE (vectype
), align
);
11158 for (unsigned k
= 0; k
< const_nunits
; ++k
)
11160 tree boff
= size_binop (MULT_EXPR
, TYPE_SIZE (idx_type
),
11161 bitsize_int (k
+ elt_offset
));
11163 = gimple_build (&stmts
, BIT_FIELD_REF
, idx_type
,
11164 vec_offset
, TYPE_SIZE (idx_type
), boff
);
11165 idx
= gimple_convert (&stmts
, sizetype
, idx
);
11166 idx
= gimple_build (&stmts
, MULT_EXPR
, sizetype
, idx
,
11168 tree ptr
= gimple_build (&stmts
, PLUS_EXPR
,
11169 TREE_TYPE (dataref_ptr
),
11171 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
11172 tree elt
= make_ssa_name (TREE_TYPE (vectype
));
11173 tree ref
= build2 (MEM_REF
, ltype
, ptr
,
11174 build_int_cst (ref_type
, 0));
11175 new_stmt
= gimple_build_assign (elt
, ref
);
11176 gimple_set_vuse (new_stmt
, gimple_vuse (gsi_stmt (*gsi
)));
11177 gimple_seq_add_stmt (&stmts
, new_stmt
);
11178 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
, elt
);
11180 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
11181 new_stmt
= gimple_build_assign (
11182 NULL_TREE
, build_constructor (vectype
, ctor_elts
));
11183 data_ref
= NULL_TREE
;
11186 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11187 /* DATA_REF is null if we've already built the statement. */
11190 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
11191 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
11193 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11194 gimple_set_lhs (new_stmt
, new_temp
);
11195 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11197 /* Store vector loads in the corresponding SLP_NODE. */
11199 slp_node
->push_vec_def (new_stmt
);
11202 if (!slp
&& !costing_p
)
11203 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
11206 if (!slp
&& !costing_p
)
11207 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11209 if (costing_p
&& dump_enabled_p ())
11210 dump_printf_loc (MSG_NOTE
, vect_location
,
11211 "vect_model_load_cost: inside_cost = %u, "
11212 "prologue_cost = %u .\n",
11213 inside_cost
, prologue_cost
);
11217 poly_uint64 group_elt
= 0;
11218 unsigned int inside_cost
= 0, prologue_cost
= 0;
11219 /* For costing some adjacent vector loads, we'd like to cost with
11220 the total number of them once instead of cost each one by one. */
11221 unsigned int n_adjacent_loads
= 0;
11222 for (j
= 0; j
< ncopies
; j
++)
11224 /* 1. Create the vector or array pointer update chain. */
11225 if (j
== 0 && !costing_p
)
11227 bool simd_lane_access_p
11228 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
11229 if (simd_lane_access_p
11230 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
11231 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
11232 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
11233 && integer_zerop (DR_INIT (first_dr_info
->dr
))
11234 && alias_sets_conflict_p (get_alias_set (aggr_type
),
11235 get_alias_set (TREE_TYPE (ref_type
)))
11236 && (alignment_support_scheme
== dr_aligned
11237 || alignment_support_scheme
== dr_unaligned_supported
))
11239 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
11240 dataref_offset
= build_int_cst (ref_type
, 0);
11242 else if (diff_first_stmt_info
)
11245 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
11246 aggr_type
, at_loop
, offset
, &dummy
,
11247 gsi
, &ptr_incr
, simd_lane_access_p
,
11249 /* Adjust the pointer by the difference to first_stmt. */
11250 data_reference_p ptrdr
11251 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
11253 = fold_convert (sizetype
,
11254 size_binop (MINUS_EXPR
,
11255 DR_INIT (first_dr_info
->dr
),
11257 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11259 if (alignment_support_scheme
== dr_explicit_realign
)
11261 msq
= vect_setup_realignment (vinfo
,
11262 first_stmt_info_for_drptr
, gsi
,
11263 &realignment_token
,
11264 alignment_support_scheme
,
11265 dataref_ptr
, &at_loop
);
11266 gcc_assert (!compute_in_loop
);
11271 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
11273 offset
, &dummy
, gsi
, &ptr_incr
,
11274 simd_lane_access_p
, bump
);
11276 else if (!costing_p
)
11278 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
11279 if (dataref_offset
)
11280 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
11283 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11287 if (grouped_load
|| slp_perm
)
11288 dr_chain
.create (vec_num
);
11290 gimple
*new_stmt
= NULL
;
11291 for (i
= 0; i
< vec_num
; i
++)
11293 tree final_mask
= NULL_TREE
;
11294 tree final_len
= NULL_TREE
;
11295 tree bias
= NULL_TREE
;
11299 vec_mask
= vec_masks
[vec_num
* j
+ i
];
11301 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
11302 vec_num
* ncopies
, vectype
,
11305 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
11306 final_mask
, vec_mask
, gsi
);
11309 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
11310 gsi
, stmt_info
, bump
);
11313 /* 2. Create the vector-load in the loop. */
11314 switch (alignment_support_scheme
)
11317 case dr_unaligned_supported
:
11322 unsigned int misalign
;
11323 unsigned HOST_WIDE_INT align
;
11324 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
11325 if (alignment_support_scheme
== dr_aligned
)
11327 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
11330 = dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
11334 misalign
= misalignment
;
11335 if (dataref_offset
== NULL_TREE
11336 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
11337 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
11339 align
= least_bit_hwi (misalign
| align
);
11341 /* Compute IFN when LOOP_LENS or final_mask valid. */
11342 machine_mode vmode
= TYPE_MODE (vectype
);
11343 machine_mode new_vmode
= vmode
;
11344 internal_fn partial_ifn
= IFN_LAST
;
11347 opt_machine_mode new_ovmode
11348 = get_len_load_store_mode (vmode
, true, &partial_ifn
);
11349 new_vmode
= new_ovmode
.require ();
11351 = (new_ovmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vmode
);
11352 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
11353 vec_num
* ncopies
, vectype
,
11354 vec_num
* j
+ i
, factor
);
11356 else if (final_mask
)
11358 if (!can_vec_mask_load_store_p (
11359 vmode
, TYPE_MODE (TREE_TYPE (final_mask
)), true,
11361 gcc_unreachable ();
11364 if (partial_ifn
== IFN_MASK_LEN_LOAD
)
11368 /* Pass VF value to 'len' argument of
11369 MASK_LEN_LOAD if LOOP_LENS is invalid. */
11370 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
11374 /* Pass all ones value to 'mask' argument of
11375 MASK_LEN_LOAD if final_mask is invalid. */
11376 mask_vectype
= truth_type_for (vectype
);
11377 final_mask
= build_minus_one_cst (mask_vectype
);
11382 signed char biasval
11383 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
11385 bias
= build_int_cst (intQI_type_node
, biasval
);
11390 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
11392 if (partial_ifn
== IFN_MASK_LEN_LOAD
)
11393 call
= gimple_build_call_internal (IFN_MASK_LEN_LOAD
, 5,
11395 final_mask
, final_len
,
11398 call
= gimple_build_call_internal (IFN_LEN_LOAD
, 4,
11401 gimple_call_set_nothrow (call
, true);
11403 data_ref
= NULL_TREE
;
11405 /* Need conversion if it's wrapped with VnQI. */
11406 if (vmode
!= new_vmode
)
11408 tree new_vtype
= build_vector_type_for_mode (
11409 unsigned_intQI_type_node
, new_vmode
);
11411 = vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
11412 gimple_set_lhs (call
, var
);
11413 vect_finish_stmt_generation (vinfo
, stmt_info
, call
,
11415 tree op
= build1 (VIEW_CONVERT_EXPR
, vectype
, var
);
11416 new_stmt
= gimple_build_assign (vec_dest
,
11417 VIEW_CONVERT_EXPR
, op
);
11420 else if (final_mask
)
11422 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
11423 gcall
*call
= gimple_build_call_internal (IFN_MASK_LOAD
, 3,
11426 gimple_call_set_nothrow (call
, true);
11428 data_ref
= NULL_TREE
;
11432 tree ltype
= vectype
;
11433 tree new_vtype
= NULL_TREE
;
11434 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
11435 unsigned int vect_align
11436 = vect_known_alignment_in_bytes (first_dr_info
, vectype
);
11437 unsigned int scalar_dr_size
11438 = vect_get_scalar_dr_size (first_dr_info
);
11439 /* If there's no peeling for gaps but we have a gap
11440 with slp loads then load the lower half of the
11441 vector only. See get_group_load_store_type for
11442 when we apply this optimization. */
11445 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) && gap
!= 0
11446 && known_eq (nunits
, (group_size
- gap
) * 2)
11447 && known_eq (nunits
, group_size
)
11448 && gap
>= (vect_align
/ scalar_dr_size
))
11452 = vector_vector_composition_type (vectype
, 2,
11454 if (new_vtype
!= NULL_TREE
)
11455 ltype
= half_vtype
;
11458 = (dataref_offset
? dataref_offset
11459 : build_int_cst (ref_type
, 0));
11460 if (ltype
!= vectype
11461 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
11463 unsigned HOST_WIDE_INT gap_offset
11464 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
11465 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
11466 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
11469 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
11470 if (alignment_support_scheme
== dr_aligned
)
11473 TREE_TYPE (data_ref
)
11474 = build_aligned_type (TREE_TYPE (data_ref
),
11475 align
* BITS_PER_UNIT
);
11476 if (ltype
!= vectype
)
11478 vect_copy_ref_info (data_ref
,
11479 DR_REF (first_dr_info
->dr
));
11480 tree tem
= make_ssa_name (ltype
);
11481 new_stmt
= gimple_build_assign (tem
, data_ref
);
11482 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
11485 vec
<constructor_elt
, va_gc
> *v
;
11487 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
11489 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
11490 build_zero_cst (ltype
));
11491 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
11495 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
11496 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
11497 build_zero_cst (ltype
));
11499 gcc_assert (new_vtype
!= NULL_TREE
);
11500 if (new_vtype
== vectype
)
11501 new_stmt
= gimple_build_assign (
11502 vec_dest
, build_constructor (vectype
, v
));
11505 tree new_vname
= make_ssa_name (new_vtype
);
11506 new_stmt
= gimple_build_assign (
11507 new_vname
, build_constructor (new_vtype
, v
));
11508 vect_finish_stmt_generation (vinfo
, stmt_info
,
11510 new_stmt
= gimple_build_assign (
11512 build1 (VIEW_CONVERT_EXPR
, vectype
, new_vname
));
11518 case dr_explicit_realign
:
11524 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
11526 if (compute_in_loop
)
11527 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
11528 &realignment_token
,
11529 dr_explicit_realign
,
11530 dataref_ptr
, NULL
);
11532 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
11533 ptr
= copy_ssa_name (dataref_ptr
);
11535 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
11536 // For explicit realign the target alignment should be
11537 // known at compile time.
11538 unsigned HOST_WIDE_INT align
11539 = DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
11540 new_stmt
= gimple_build_assign (
11541 ptr
, BIT_AND_EXPR
, dataref_ptr
,
11542 build_int_cst (TREE_TYPE (dataref_ptr
),
11543 -(HOST_WIDE_INT
) align
));
11544 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11546 = build2 (MEM_REF
, vectype
, ptr
, build_int_cst (ref_type
, 0));
11547 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
11548 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11549 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
11550 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11551 gimple_assign_set_lhs (new_stmt
, new_temp
);
11552 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
11553 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11556 bump
= size_binop (MULT_EXPR
, vs
, TYPE_SIZE_UNIT (elem_type
));
11557 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
11558 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
, stmt_info
,
11560 new_stmt
= gimple_build_assign (
11561 NULL_TREE
, BIT_AND_EXPR
, ptr
,
11562 build_int_cst (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
11563 if (TREE_CODE (ptr
) == SSA_NAME
)
11564 ptr
= copy_ssa_name (ptr
, new_stmt
);
11566 ptr
= make_ssa_name (TREE_TYPE (ptr
), new_stmt
);
11567 gimple_assign_set_lhs (new_stmt
, ptr
);
11568 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11570 = build2 (MEM_REF
, vectype
, ptr
, build_int_cst (ref_type
, 0));
11573 case dr_explicit_realign_optimized
:
11577 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
11578 new_temp
= copy_ssa_name (dataref_ptr
);
11580 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
11581 // We should only be doing this if we know the target
11582 // alignment at compile time.
11583 unsigned HOST_WIDE_INT align
11584 = DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
11585 new_stmt
= gimple_build_assign (
11586 new_temp
, BIT_AND_EXPR
, dataref_ptr
,
11587 build_int_cst (TREE_TYPE (dataref_ptr
),
11588 -(HOST_WIDE_INT
) align
));
11589 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11590 data_ref
= build2 (MEM_REF
, vectype
, new_temp
,
11591 build_int_cst (ref_type
, 0));
11595 gcc_unreachable ();
11598 /* One common place to cost the above vect load for different
11599 alignment support schemes. */
11602 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11603 only need to take care of the first stmt, whose
11604 stmt_info is first_stmt_info, vec_num iterating on it
11605 will cover the cost for the remaining, it's consistent
11606 with transforming. For the prologue cost for realign,
11607 we only need to count it once for the whole group. */
11608 bool first_stmt_info_p
= first_stmt_info
== stmt_info
;
11609 bool add_realign_cost
= first_stmt_info_p
&& i
== 0;
11610 if (memory_access_type
== VMAT_CONTIGUOUS
11611 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
11612 || (memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
11613 && (!grouped_load
|| first_stmt_info_p
)))
11615 /* Leave realign cases alone to keep them simple. */
11616 if (alignment_support_scheme
== dr_explicit_realign_optimized
11617 || alignment_support_scheme
== dr_explicit_realign
)
11618 vect_get_load_cost (vinfo
, stmt_info
, 1,
11619 alignment_support_scheme
, misalignment
,
11620 add_realign_cost
, &inside_cost
,
11621 &prologue_cost
, cost_vec
, cost_vec
,
11624 n_adjacent_loads
++;
11629 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11630 /* DATA_REF is null if we've already built the statement. */
11633 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
11634 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
11636 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11637 gimple_set_lhs (new_stmt
, new_temp
);
11638 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11641 /* 3. Handle explicit realignment if necessary/supported.
11643 vec_dest = realign_load (msq, lsq, realignment_token) */
11645 && (alignment_support_scheme
== dr_explicit_realign_optimized
11646 || alignment_support_scheme
== dr_explicit_realign
))
11648 lsq
= gimple_assign_lhs (new_stmt
);
11649 if (!realignment_token
)
11650 realignment_token
= dataref_ptr
;
11651 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11652 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
, msq
,
11653 lsq
, realignment_token
);
11654 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11655 gimple_assign_set_lhs (new_stmt
, new_temp
);
11656 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11658 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
11661 if (i
== vec_num
- 1 && j
== ncopies
- 1)
11662 add_phi_arg (phi
, lsq
, loop_latch_edge (containing_loop
),
11668 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
11671 inside_cost
= record_stmt_cost (cost_vec
, 1, vec_perm
,
11672 stmt_info
, 0, vect_body
);
11675 tree perm_mask
= perm_mask_for_reverse (vectype
);
11676 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
11677 perm_mask
, stmt_info
, gsi
);
11678 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
11682 /* Collect vector loads and later create their permutation in
11683 vect_transform_grouped_load (). */
11684 if (!costing_p
&& (grouped_load
|| slp_perm
))
11685 dr_chain
.quick_push (new_temp
);
11687 /* Store vector loads in the corresponding SLP_NODE. */
11688 if (!costing_p
&& slp
&& !slp_perm
)
11689 slp_node
->push_vec_def (new_stmt
);
11691 /* With SLP permutation we load the gaps as well, without
11692 we need to skip the gaps after we manage to fully load
11693 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11694 group_elt
+= nunits
;
11696 && maybe_ne (group_gap_adj
, 0U)
11698 && known_eq (group_elt
, group_size
- group_gap_adj
))
11700 poly_wide_int bump_val
11701 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
)) * group_gap_adj
);
11702 if (tree_int_cst_sgn (vect_dr_behavior (vinfo
, dr_info
)->step
)
11704 bump_val
= -bump_val
;
11705 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
11706 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11711 /* Bump the vector pointer to account for a gap or for excess
11712 elements loaded for a permuted SLP load. */
11714 && maybe_ne (group_gap_adj
, 0U)
11717 poly_wide_int bump_val
11718 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
)) * group_gap_adj
);
11719 if (tree_int_cst_sgn (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
11720 bump_val
= -bump_val
;
11721 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
11722 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11726 if (slp
&& !slp_perm
)
11732 /* For SLP we know we've seen all possible uses of dr_chain so
11733 direct vect_transform_slp_perm_load to DCE the unused parts.
11734 ??? This is a hack to prevent compile-time issues as seen
11735 in PR101120 and friends. */
11738 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, nullptr, vf
,
11739 true, &n_perms
, nullptr);
11740 inside_cost
= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
11741 stmt_info
, 0, vect_body
);
11745 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
11746 gsi
, vf
, false, &n_perms
,
11755 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
);
11756 /* We assume that the cost of a single load-lanes instruction
11757 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11758 If a grouped access is instead being provided by a
11759 load-and-permute operation, include the cost of the
11761 if (costing_p
&& first_stmt_info
== stmt_info
)
11763 /* Uses an even and odd extract operations or shuffle
11764 operations for each needed permute. */
11765 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
11766 int nstmts
= ceil_log2 (group_size
) * group_size
;
11767 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
11768 stmt_info
, 0, vect_body
);
11770 if (dump_enabled_p ())
11771 dump_printf_loc (MSG_NOTE
, vect_location
,
11772 "vect_model_load_cost:"
11773 "strided group_size = %d .\n",
11776 else if (!costing_p
)
11778 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
11780 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11783 else if (!costing_p
)
11784 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
11786 dr_chain
.release ();
11788 if (!slp
&& !costing_p
)
11789 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11793 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS
11794 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
11795 || memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
);
11796 if (n_adjacent_loads
> 0)
11797 vect_get_load_cost (vinfo
, stmt_info
, n_adjacent_loads
,
11798 alignment_support_scheme
, misalignment
, false,
11799 &inside_cost
, &prologue_cost
, cost_vec
, cost_vec
,
11801 if (dump_enabled_p ())
11802 dump_printf_loc (MSG_NOTE
, vect_location
,
11803 "vect_model_load_cost: inside_cost = %u, "
11804 "prologue_cost = %u .\n",
11805 inside_cost
, prologue_cost
);
11811 /* Function vect_is_simple_cond.
11814 LOOP - the loop that is being vectorized.
11815 COND - Condition that is checked for simple use.
11818 *COMP_VECTYPE - the vector type for the comparison.
11819 *DTS - The def types for the arguments of the comparison
11821 Returns whether a COND can be vectorized. Checks whether
11822 condition operands are supportable using vec_is_simple_use. */
11825 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
11826 slp_tree slp_node
, tree
*comp_vectype
,
11827 enum vect_def_type
*dts
, tree vectype
)
11830 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
11834 if (TREE_CODE (cond
) == SSA_NAME
11835 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
11837 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
11838 &slp_op
, &dts
[0], comp_vectype
)
11840 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
11845 if (!COMPARISON_CLASS_P (cond
))
11848 lhs
= TREE_OPERAND (cond
, 0);
11849 rhs
= TREE_OPERAND (cond
, 1);
11851 if (TREE_CODE (lhs
) == SSA_NAME
)
11853 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
11854 &lhs
, &slp_op
, &dts
[0], &vectype1
))
11857 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
11858 || TREE_CODE (lhs
) == FIXED_CST
)
11859 dts
[0] = vect_constant_def
;
11863 if (TREE_CODE (rhs
) == SSA_NAME
)
11865 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
11866 &rhs
, &slp_op
, &dts
[1], &vectype2
))
11869 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
11870 || TREE_CODE (rhs
) == FIXED_CST
)
11871 dts
[1] = vect_constant_def
;
11875 if (vectype1
&& vectype2
11876 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
11877 TYPE_VECTOR_SUBPARTS (vectype2
)))
11880 *comp_vectype
= vectype1
? vectype1
: vectype2
;
11881 /* Invariant comparison. */
11882 if (! *comp_vectype
)
11884 tree scalar_type
= TREE_TYPE (lhs
);
11885 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11886 *comp_vectype
= truth_type_for (vectype
);
11889 /* If we can widen the comparison to match vectype do so. */
11890 if (INTEGRAL_TYPE_P (scalar_type
)
11892 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
11893 TYPE_SIZE (TREE_TYPE (vectype
))))
11894 scalar_type
= build_nonstandard_integer_type
11895 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
11896 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
11904 /* vectorizable_condition.
11906 Check if STMT_INFO is conditional modify expression that can be vectorized.
11907 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11908 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
11911 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
11913 Return true if STMT_INFO is vectorizable in this way. */
11916 vectorizable_condition (vec_info
*vinfo
,
11917 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11919 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
11921 tree scalar_dest
= NULL_TREE
;
11922 tree vec_dest
= NULL_TREE
;
11923 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
11924 tree then_clause
, else_clause
;
11925 tree comp_vectype
= NULL_TREE
;
11926 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
11927 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
11930 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
11931 enum vect_def_type dts
[4]
11932 = {vect_unknown_def_type
, vect_unknown_def_type
,
11933 vect_unknown_def_type
, vect_unknown_def_type
};
11937 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
11939 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
11940 vec
<tree
> vec_oprnds0
= vNULL
;
11941 vec
<tree
> vec_oprnds1
= vNULL
;
11942 vec
<tree
> vec_oprnds2
= vNULL
;
11943 vec
<tree
> vec_oprnds3
= vNULL
;
11945 bool masked
= false;
11947 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
11950 /* Is vectorizable conditional operation? */
11951 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
11955 code
= gimple_assign_rhs_code (stmt
);
11956 if (code
!= COND_EXPR
)
11959 stmt_vec_info reduc_info
= NULL
;
11960 int reduc_index
= -1;
11961 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
11963 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
11968 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
11969 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
11970 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
11971 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
11972 || reduc_index
!= -1);
11976 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
11980 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11981 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
11986 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
11990 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
11994 gcc_assert (ncopies
>= 1);
11995 if (for_reduction
&& ncopies
> 1)
11996 return false; /* FORNOW */
11998 cond_expr
= gimple_assign_rhs1 (stmt
);
12000 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
12001 &comp_vectype
, &dts
[0], vectype
)
12005 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
12006 slp_tree then_slp_node
, else_slp_node
;
12007 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
12008 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
12010 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
12011 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
12014 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
12017 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
12020 masked
= !COMPARISON_CLASS_P (cond_expr
);
12021 vec_cmp_type
= truth_type_for (comp_vectype
);
12023 if (vec_cmp_type
== NULL_TREE
)
12026 cond_code
= TREE_CODE (cond_expr
);
12029 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
12030 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
12033 /* For conditional reductions, the "then" value needs to be the candidate
12034 value calculated by this iteration while the "else" value needs to be
12035 the result carried over from previous iterations. If the COND_EXPR
12036 is the other way around, we need to swap it. */
12037 bool must_invert_cmp_result
= false;
12038 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
12041 must_invert_cmp_result
= true;
12044 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
12045 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
12046 if (new_code
== ERROR_MARK
)
12047 must_invert_cmp_result
= true;
12050 cond_code
= new_code
;
12051 /* Make sure we don't accidentally use the old condition. */
12052 cond_expr
= NULL_TREE
;
12055 std::swap (then_clause
, else_clause
);
12058 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
12060 /* Boolean values may have another representation in vectors
12061 and therefore we prefer bit operations over comparison for
12062 them (which also works for scalar masks). We store opcodes
12063 to use in bitop1 and bitop2. Statement is vectorized as
12064 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12065 depending on bitop1 and bitop2 arity. */
12069 bitop1
= BIT_NOT_EXPR
;
12070 bitop2
= BIT_AND_EXPR
;
12073 bitop1
= BIT_NOT_EXPR
;
12074 bitop2
= BIT_IOR_EXPR
;
12077 bitop1
= BIT_NOT_EXPR
;
12078 bitop2
= BIT_AND_EXPR
;
12079 std::swap (cond_expr0
, cond_expr1
);
12082 bitop1
= BIT_NOT_EXPR
;
12083 bitop2
= BIT_IOR_EXPR
;
12084 std::swap (cond_expr0
, cond_expr1
);
12087 bitop1
= BIT_XOR_EXPR
;
12090 bitop1
= BIT_XOR_EXPR
;
12091 bitop2
= BIT_NOT_EXPR
;
12096 cond_code
= SSA_NAME
;
12099 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
12100 && reduction_type
== EXTRACT_LAST_REDUCTION
12101 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
12103 if (dump_enabled_p ())
12104 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12105 "reduction comparison operation not supported.\n");
12111 if (bitop1
!= NOP_EXPR
)
12113 machine_mode mode
= TYPE_MODE (comp_vectype
);
12116 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
12117 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12120 if (bitop2
!= NOP_EXPR
)
12122 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
12124 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12129 vect_cost_for_stmt kind
= vector_stmt
;
12130 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12131 /* Count one reduction-like operation per vector. */
12132 kind
= vec_to_scalar
;
12133 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
)
12135 || (!expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
,
12137 || !expand_vec_cond_expr_p (vectype
, vec_cmp_type
,
12142 && (!vect_maybe_update_slp_op_vectype
12143 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
12145 && !vect_maybe_update_slp_op_vectype
12146 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
12147 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
12148 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
12150 if (dump_enabled_p ())
12151 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12152 "incompatible vector types for invariants\n");
12156 if (loop_vinfo
&& for_reduction
12157 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
12159 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12161 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST
,
12162 vectype
, OPTIMIZE_FOR_SPEED
))
12163 vect_record_loop_len (loop_vinfo
,
12164 &LOOP_VINFO_LENS (loop_vinfo
),
12165 ncopies
* vec_num
, vectype
, 1);
12167 vect_record_loop_mask (loop_vinfo
,
12168 &LOOP_VINFO_MASKS (loop_vinfo
),
12169 ncopies
* vec_num
, vectype
, NULL
);
12171 /* Extra inactive lanes should be safe for vect_nested_cycle. */
12172 else if (STMT_VINFO_DEF_TYPE (reduc_info
) != vect_nested_cycle
)
12174 if (dump_enabled_p ())
12175 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12176 "conditional reduction prevents the use"
12177 " of partial vectors.\n");
12178 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
12182 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
12183 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
12191 scalar_dest
= gimple_assign_lhs (stmt
);
12192 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
12193 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
12195 bool swap_cond_operands
= false;
12197 /* See whether another part of the vectorized code applies a loop
12198 mask to the condition, or to its inverse. */
12200 vec_loop_masks
*masks
= NULL
;
12201 vec_loop_lens
*lens
= NULL
;
12202 if (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
))
12204 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12205 lens
= &LOOP_VINFO_LENS (loop_vinfo
);
12207 else if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
12209 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12210 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12213 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
12214 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
12215 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12218 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
12219 tree_code orig_code
= cond
.code
;
12220 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
12221 if (!masked
&& loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
12223 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12224 cond_code
= cond
.code
;
12225 swap_cond_operands
= true;
12229 /* Try the inverse of the current mask. We check if the
12230 inverse mask is live and if so we generate a negate of
12231 the current mask such that we still honor NaNs. */
12232 cond
.inverted_p
= true;
12233 cond
.code
= orig_code
;
12234 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
12236 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12237 cond_code
= cond
.code
;
12238 swap_cond_operands
= true;
12239 must_invert_cmp_result
= true;
12246 /* Handle cond expr. */
12248 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
12249 cond_expr
, &vec_oprnds0
, comp_vectype
,
12250 then_clause
, &vec_oprnds2
, vectype
,
12251 reduction_type
!= EXTRACT_LAST_REDUCTION
12252 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
12254 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
12255 cond_expr0
, &vec_oprnds0
, comp_vectype
,
12256 cond_expr1
, &vec_oprnds1
, comp_vectype
,
12257 then_clause
, &vec_oprnds2
, vectype
,
12258 reduction_type
!= EXTRACT_LAST_REDUCTION
12259 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
12261 /* Arguments are ready. Create the new vector stmt. */
12262 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
12264 vec_then_clause
= vec_oprnds2
[i
];
12265 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
12266 vec_else_clause
= vec_oprnds3
[i
];
12268 if (swap_cond_operands
)
12269 std::swap (vec_then_clause
, vec_else_clause
);
12272 vec_compare
= vec_cond_lhs
;
12275 vec_cond_rhs
= vec_oprnds1
[i
];
12276 if (bitop1
== NOP_EXPR
)
12278 gimple_seq stmts
= NULL
;
12279 vec_compare
= gimple_build (&stmts
, cond_code
, vec_cmp_type
,
12280 vec_cond_lhs
, vec_cond_rhs
);
12281 gsi_insert_before (gsi
, stmts
, GSI_SAME_STMT
);
12285 new_temp
= make_ssa_name (vec_cmp_type
);
12287 if (bitop1
== BIT_NOT_EXPR
)
12288 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
12292 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
12294 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12295 if (bitop2
== NOP_EXPR
)
12296 vec_compare
= new_temp
;
12297 else if (bitop2
== BIT_NOT_EXPR
12298 && reduction_type
!= EXTRACT_LAST_REDUCTION
)
12300 /* Instead of doing ~x ? y : z do x ? z : y. */
12301 vec_compare
= new_temp
;
12302 std::swap (vec_then_clause
, vec_else_clause
);
12306 vec_compare
= make_ssa_name (vec_cmp_type
);
12307 if (bitop2
== BIT_NOT_EXPR
)
12309 = gimple_build_assign (vec_compare
, bitop2
, new_temp
);
12312 = gimple_build_assign (vec_compare
, bitop2
,
12313 vec_cond_lhs
, new_temp
);
12314 vect_finish_stmt_generation (vinfo
, stmt_info
,
12320 /* If we decided to apply a loop mask to the result of the vector
12321 comparison, AND the comparison with the mask now. Later passes
12322 should then be able to reuse the AND results between mulitple
12326 for (int i = 0; i < 100; ++i)
12327 x[i] = y[i] ? z[i] : 10;
12329 results in following optimized GIMPLE:
12331 mask__35.8_43 = vect__4.7_41 != { 0, ... };
12332 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12333 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12334 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12335 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12336 vect_iftmp.11_47, { 10, ... }>;
12338 instead of using a masked and unmasked forms of
12339 vec != { 0, ... } (masked in the MASK_LOAD,
12340 unmasked in the VEC_COND_EXPR). */
12342 /* Force vec_compare to be an SSA_NAME rather than a comparison,
12343 in cases where that's necessary. */
12345 tree len
= NULL_TREE
, bias
= NULL_TREE
;
12346 if (masks
|| lens
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
12348 if (!is_gimple_val (vec_compare
))
12350 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
12351 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
12353 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12354 vec_compare
= vec_compare_name
;
12357 if (must_invert_cmp_result
)
12359 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
12360 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
12363 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12364 vec_compare
= vec_compare_name
;
12367 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST
,
12368 vectype
, OPTIMIZE_FOR_SPEED
))
12372 len
= vect_get_loop_len (loop_vinfo
, gsi
, lens
,
12373 vec_num
* ncopies
, vectype
, i
, 1);
12374 signed char biasval
12375 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
12376 bias
= build_int_cst (intQI_type_node
, biasval
);
12380 len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
12381 bias
= build_int_cst (intQI_type_node
, 0);
12387 = vect_get_loop_mask (loop_vinfo
, gsi
, masks
, vec_num
* ncopies
,
12389 tree tmp2
= make_ssa_name (vec_cmp_type
);
12391 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
12393 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
12394 vec_compare
= tmp2
;
12399 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12401 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
12402 tree lhs
= gimple_get_lhs (old_stmt
);
12404 new_stmt
= gimple_build_call_internal
12405 (IFN_LEN_FOLD_EXTRACT_LAST
, 5, else_clause
, vec_compare
,
12406 vec_then_clause
, len
, bias
);
12408 new_stmt
= gimple_build_call_internal
12409 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
12411 gimple_call_set_lhs (new_stmt
, lhs
);
12412 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
12413 if (old_stmt
== gsi_stmt (*gsi
))
12414 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
12417 /* In this case we're moving the definition to later in the
12418 block. That doesn't matter because the only uses of the
12419 lhs are in phi statements. */
12420 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
12421 gsi_remove (&old_gsi
, true);
12422 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12427 new_temp
= make_ssa_name (vec_dest
);
12428 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
12429 vec_then_clause
, vec_else_clause
);
12430 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12433 slp_node
->push_vec_def (new_stmt
);
12435 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
12439 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
12441 vec_oprnds0
.release ();
12442 vec_oprnds1
.release ();
12443 vec_oprnds2
.release ();
12444 vec_oprnds3
.release ();
12449 /* Helper of vectorizable_comparison.
12451 Check if STMT_INFO is comparison expression CODE that can be vectorized.
12452 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12453 comparison, put it in VEC_STMT, and insert it at GSI.
12455 Return true if STMT_INFO is vectorizable in this way. */
12458 vectorizable_comparison_1 (vec_info
*vinfo
, tree vectype
,
12459 stmt_vec_info stmt_info
, tree_code code
,
12460 gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
12461 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
12463 tree lhs
, rhs1
, rhs2
;
12464 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
12465 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
12467 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
12468 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
12470 poly_uint64 nunits
;
12472 enum tree_code bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
12474 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
12475 vec
<tree
> vec_oprnds0
= vNULL
;
12476 vec
<tree
> vec_oprnds1
= vNULL
;
12480 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
12483 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
12486 mask_type
= vectype
;
12487 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
12492 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
12494 gcc_assert (ncopies
>= 1);
12496 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
12499 slp_tree slp_rhs1
, slp_rhs2
;
12500 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
12501 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
12504 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
12505 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
12508 if (vectype1
&& vectype2
12509 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
12510 TYPE_VECTOR_SUBPARTS (vectype2
)))
12513 vectype
= vectype1
? vectype1
: vectype2
;
12515 /* Invariant comparison. */
12518 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
12519 vectype
= mask_type
;
12521 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
12523 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
12526 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
12529 /* Can't compare mask and non-mask types. */
12530 if (vectype1
&& vectype2
12531 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
12534 /* Boolean values may have another representation in vectors
12535 and therefore we prefer bit operations over comparison for
12536 them (which also works for scalar masks). We store opcodes
12537 to use in bitop1 and bitop2. Statement is vectorized as
12538 BITOP2 (rhs1 BITOP1 rhs2) or
12539 rhs1 BITOP2 (BITOP1 rhs2)
12540 depending on bitop1 and bitop2 arity. */
12541 bool swap_p
= false;
12542 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
12544 if (code
== GT_EXPR
)
12546 bitop1
= BIT_NOT_EXPR
;
12547 bitop2
= BIT_AND_EXPR
;
12549 else if (code
== GE_EXPR
)
12551 bitop1
= BIT_NOT_EXPR
;
12552 bitop2
= BIT_IOR_EXPR
;
12554 else if (code
== LT_EXPR
)
12556 bitop1
= BIT_NOT_EXPR
;
12557 bitop2
= BIT_AND_EXPR
;
12560 else if (code
== LE_EXPR
)
12562 bitop1
= BIT_NOT_EXPR
;
12563 bitop2
= BIT_IOR_EXPR
;
12568 bitop1
= BIT_XOR_EXPR
;
12569 if (code
== EQ_EXPR
)
12570 bitop2
= BIT_NOT_EXPR
;
12576 if (bitop1
== NOP_EXPR
)
12578 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
12583 machine_mode mode
= TYPE_MODE (vectype
);
12586 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
12587 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12590 if (bitop2
!= NOP_EXPR
)
12592 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
12593 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12598 /* Put types on constant and invariant SLP children. */
12600 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
12601 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
12603 if (dump_enabled_p ())
12604 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12605 "incompatible vector types for invariants\n");
12609 vect_model_simple_cost (vinfo
, stmt_info
,
12610 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
12611 dts
, ndts
, slp_node
, cost_vec
);
12618 lhs
= gimple_assign_lhs (STMT_VINFO_STMT (stmt_info
));
12619 mask
= vect_create_destination_var (lhs
, mask_type
);
12621 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
12622 rhs1
, &vec_oprnds0
, vectype
,
12623 rhs2
, &vec_oprnds1
, vectype
);
12625 std::swap (vec_oprnds0
, vec_oprnds1
);
12627 /* Arguments are ready. Create the new vector stmt. */
12628 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
12631 vec_rhs2
= vec_oprnds1
[i
];
12633 new_temp
= make_ssa_name (mask
);
12634 if (bitop1
== NOP_EXPR
)
12636 new_stmt
= gimple_build_assign (new_temp
, code
,
12637 vec_rhs1
, vec_rhs2
);
12638 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12642 if (bitop1
== BIT_NOT_EXPR
)
12643 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
12645 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
12647 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12648 if (bitop2
!= NOP_EXPR
)
12650 tree res
= make_ssa_name (mask
);
12651 if (bitop2
== BIT_NOT_EXPR
)
12652 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
12654 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
12656 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12660 slp_node
->push_vec_def (new_stmt
);
12662 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
12666 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
12668 vec_oprnds0
.release ();
12669 vec_oprnds1
.release ();
12674 /* vectorizable_comparison.
12676 Check if STMT_INFO is comparison expression that can be vectorized.
12677 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12678 comparison, put it in VEC_STMT, and insert it at GSI.
12680 Return true if STMT_INFO is vectorizable in this way. */
12683 vectorizable_comparison (vec_info
*vinfo
,
12684 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
12686 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
12688 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
12690 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
12693 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
12696 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
12700 enum tree_code code
= gimple_assign_rhs_code (stmt
);
12701 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12702 if (!vectorizable_comparison_1 (vinfo
, vectype
, stmt_info
, code
, gsi
,
12703 vec_stmt
, slp_node
, cost_vec
))
12707 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
12712 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
12713 can handle all live statements in the node. Otherwise return true
12714 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
12715 VEC_STMT_P is as for vectorizable_live_operation. */
12718 can_vectorize_live_stmts (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12719 slp_tree slp_node
, slp_instance slp_node_instance
,
12721 stmt_vector_for_cost
*cost_vec
)
12725 stmt_vec_info slp_stmt_info
;
12727 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
12729 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
12730 && !vectorizable_live_operation (vinfo
, slp_stmt_info
, slp_node
,
12731 slp_node_instance
, i
,
12732 vec_stmt_p
, cost_vec
))
12736 else if (STMT_VINFO_LIVE_P (stmt_info
)
12737 && !vectorizable_live_operation (vinfo
, stmt_info
,
12738 slp_node
, slp_node_instance
, -1,
12739 vec_stmt_p
, cost_vec
))
12745 /* Make sure the statement is vectorizable. */
12748 vect_analyze_stmt (vec_info
*vinfo
,
12749 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
12750 slp_tree node
, slp_instance node_instance
,
12751 stmt_vector_for_cost
*cost_vec
)
12753 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
12754 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
12756 gimple_seq pattern_def_seq
;
12758 if (dump_enabled_p ())
12759 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
12762 if (gimple_has_volatile_ops (stmt_info
->stmt
))
12763 return opt_result::failure_at (stmt_info
->stmt
,
12765 " stmt has volatile operands: %G\n",
12768 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
12770 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
12772 gimple_stmt_iterator si
;
12774 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
12776 stmt_vec_info pattern_def_stmt_info
12777 = vinfo
->lookup_stmt (gsi_stmt (si
));
12778 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
12779 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
12781 /* Analyze def stmt of STMT if it's a pattern stmt. */
12782 if (dump_enabled_p ())
12783 dump_printf_loc (MSG_NOTE
, vect_location
,
12784 "==> examining pattern def statement: %G",
12785 pattern_def_stmt_info
->stmt
);
12788 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
12789 need_to_vectorize
, node
, node_instance
,
12797 /* Skip stmts that do not need to be vectorized. In loops this is expected
12799 - the COND_EXPR which is the loop exit condition
12800 - any LABEL_EXPRs in the loop
12801 - computations that are used only for array indexing or loop control.
12802 In basic blocks we only analyze statements that are a part of some SLP
12803 instance, therefore, all the statements are relevant.
12805 Pattern statement needs to be analyzed instead of the original statement
12806 if the original statement is not relevant. Otherwise, we analyze both
12807 statements. In basic blocks we are called from some SLP instance
12808 traversal, don't analyze pattern stmts instead, the pattern stmts
12809 already will be part of SLP instance. */
12811 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
12812 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
12813 && !STMT_VINFO_LIVE_P (stmt_info
))
12815 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
12816 && pattern_stmt_info
12817 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
12818 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
12820 /* Analyze PATTERN_STMT instead of the original stmt. */
12821 stmt_info
= pattern_stmt_info
;
12822 if (dump_enabled_p ())
12823 dump_printf_loc (MSG_NOTE
, vect_location
,
12824 "==> examining pattern statement: %G",
12829 if (dump_enabled_p ())
12830 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
12832 return opt_result::success ();
12835 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
12837 && pattern_stmt_info
12838 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
12839 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
12841 /* Analyze PATTERN_STMT too. */
12842 if (dump_enabled_p ())
12843 dump_printf_loc (MSG_NOTE
, vect_location
,
12844 "==> examining pattern statement: %G",
12845 pattern_stmt_info
->stmt
);
12848 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
12849 node_instance
, cost_vec
);
12854 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
12856 case vect_internal_def
:
12859 case vect_reduction_def
:
12860 case vect_nested_cycle
:
12861 gcc_assert (!bb_vinfo
12862 && (relevance
== vect_used_in_outer
12863 || relevance
== vect_used_in_outer_by_reduction
12864 || relevance
== vect_used_by_reduction
12865 || relevance
== vect_unused_in_scope
12866 || relevance
== vect_used_only_live
));
12869 case vect_induction_def
:
12870 case vect_first_order_recurrence
:
12871 gcc_assert (!bb_vinfo
);
12874 case vect_constant_def
:
12875 case vect_external_def
:
12876 case vect_unknown_def_type
:
12878 gcc_unreachable ();
12881 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12883 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (node
);
12885 if (STMT_VINFO_RELEVANT_P (stmt_info
))
12887 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
12888 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
12889 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
12890 *need_to_vectorize
= true;
12893 if (PURE_SLP_STMT (stmt_info
) && !node
)
12895 if (dump_enabled_p ())
12896 dump_printf_loc (MSG_NOTE
, vect_location
,
12897 "handled only by SLP analysis\n");
12898 return opt_result::success ();
12903 && (STMT_VINFO_RELEVANT_P (stmt_info
)
12904 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
12905 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
12906 -mveclibabi= takes preference over library functions with
12907 the simd attribute. */
12908 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
12909 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
12911 || vectorizable_conversion (vinfo
, stmt_info
,
12912 NULL
, NULL
, node
, cost_vec
)
12913 || vectorizable_operation (vinfo
, stmt_info
,
12914 NULL
, NULL
, node
, cost_vec
)
12915 || vectorizable_assignment (vinfo
, stmt_info
,
12916 NULL
, NULL
, node
, cost_vec
)
12917 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
12918 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
12919 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
12920 node
, node_instance
, cost_vec
)
12921 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
12922 NULL
, node
, cost_vec
)
12923 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
12924 || vectorizable_condition (vinfo
, stmt_info
,
12925 NULL
, NULL
, node
, cost_vec
)
12926 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
12928 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
12929 stmt_info
, NULL
, node
)
12930 || vectorizable_recurr (as_a
<loop_vec_info
> (vinfo
),
12931 stmt_info
, NULL
, node
, cost_vec
));
12935 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
12936 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
12937 NULL
, NULL
, node
, cost_vec
)
12938 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
12940 || vectorizable_shift (vinfo
, stmt_info
,
12941 NULL
, NULL
, node
, cost_vec
)
12942 || vectorizable_operation (vinfo
, stmt_info
,
12943 NULL
, NULL
, node
, cost_vec
)
12944 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
12946 || vectorizable_load (vinfo
, stmt_info
,
12947 NULL
, NULL
, node
, cost_vec
)
12948 || vectorizable_store (vinfo
, stmt_info
,
12949 NULL
, NULL
, node
, cost_vec
)
12950 || vectorizable_condition (vinfo
, stmt_info
,
12951 NULL
, NULL
, node
, cost_vec
)
12952 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
12954 || vectorizable_phi (vinfo
, stmt_info
, NULL
, node
, cost_vec
));
12958 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
12961 return opt_result::failure_at (stmt_info
->stmt
,
12963 " relevant stmt not supported: %G",
12966 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
12967 need extra handling, except for vectorizable reductions. */
12969 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
12970 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
12971 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
12972 stmt_info
, node
, node_instance
,
12974 return opt_result::failure_at (stmt_info
->stmt
,
12976 " live stmt not supported: %G",
12979 return opt_result::success ();
12983 /* Function vect_transform_stmt.
12985 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
12988 vect_transform_stmt (vec_info
*vinfo
,
12989 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
12990 slp_tree slp_node
, slp_instance slp_node_instance
)
12992 bool is_store
= false;
12993 gimple
*vec_stmt
= NULL
;
12996 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
12998 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
13000 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (slp_node
);
13002 switch (STMT_VINFO_TYPE (stmt_info
))
13004 case type_demotion_vec_info_type
:
13005 case type_promotion_vec_info_type
:
13006 case type_conversion_vec_info_type
:
13007 done
= vectorizable_conversion (vinfo
, stmt_info
,
13008 gsi
, &vec_stmt
, slp_node
, NULL
);
13012 case induc_vec_info_type
:
13013 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
13014 stmt_info
, &vec_stmt
, slp_node
,
13019 case shift_vec_info_type
:
13020 done
= vectorizable_shift (vinfo
, stmt_info
,
13021 gsi
, &vec_stmt
, slp_node
, NULL
);
13025 case op_vec_info_type
:
13026 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
13031 case assignment_vec_info_type
:
13032 done
= vectorizable_assignment (vinfo
, stmt_info
,
13033 gsi
, &vec_stmt
, slp_node
, NULL
);
13037 case load_vec_info_type
:
13038 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
13043 case store_vec_info_type
:
13044 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
13046 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))
13047 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info
))))
13048 /* In case of interleaving, the whole chain is vectorized when the
13049 last store in the chain is reached. Store stmts before the last
13050 one are skipped, and there vec_stmt_info shouldn't be freed
13055 done
= vectorizable_store (vinfo
, stmt_info
,
13056 gsi
, &vec_stmt
, slp_node
, NULL
);
13062 case condition_vec_info_type
:
13063 done
= vectorizable_condition (vinfo
, stmt_info
,
13064 gsi
, &vec_stmt
, slp_node
, NULL
);
13068 case comparison_vec_info_type
:
13069 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
13074 case call_vec_info_type
:
13075 done
= vectorizable_call (vinfo
, stmt_info
,
13076 gsi
, &vec_stmt
, slp_node
, NULL
);
13079 case call_simd_clone_vec_info_type
:
13080 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
13084 case reduc_vec_info_type
:
13085 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
13086 gsi
, &vec_stmt
, slp_node
);
13090 case cycle_phi_info_type
:
13091 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
13092 &vec_stmt
, slp_node
, slp_node_instance
);
13096 case lc_phi_info_type
:
13097 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
13098 stmt_info
, &vec_stmt
, slp_node
);
13102 case recurr_info_type
:
13103 done
= vectorizable_recurr (as_a
<loop_vec_info
> (vinfo
),
13104 stmt_info
, &vec_stmt
, slp_node
, NULL
);
13108 case phi_info_type
:
13109 done
= vectorizable_phi (vinfo
, stmt_info
, &vec_stmt
, slp_node
, NULL
);
13114 if (!STMT_VINFO_LIVE_P (stmt_info
))
13116 if (dump_enabled_p ())
13117 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
13118 "stmt not supported.\n");
13119 gcc_unreachable ();
13124 if (!slp_node
&& vec_stmt
)
13125 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
13127 if (STMT_VINFO_TYPE (stmt_info
) != store_vec_info_type
)
13129 /* Handle stmts whose DEF is used outside the loop-nest that is
13130 being vectorized. */
13131 done
= can_vectorize_live_stmts (vinfo
, stmt_info
, slp_node
,
13132 slp_node_instance
, true, NULL
);
13137 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
13143 /* Remove a group of stores (for SLP or interleaving), free their
13147 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
13149 stmt_vec_info next_stmt_info
= first_stmt_info
;
13151 while (next_stmt_info
)
13153 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
13154 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
13155 /* Free the attached stmt_vec_info and remove the stmt. */
13156 vinfo
->remove_stmt (next_stmt_info
);
13157 next_stmt_info
= tmp
;
13161 /* If NUNITS is nonzero, return a vector type that contains NUNITS
13162 elements of type SCALAR_TYPE, or null if the target doesn't support
13165 If NUNITS is zero, return a vector type that contains elements of
13166 type SCALAR_TYPE, choosing whichever vector size the target prefers.
13168 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13169 for this vectorization region and want to "autodetect" the best choice.
13170 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13171 and we want the new type to be interoperable with it. PREVAILING_MODE
13172 in this case can be a scalar integer mode or a vector mode; when it
13173 is a vector mode, the function acts like a tree-level version of
13174 related_vector_mode. */
13177 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
13178 tree scalar_type
, poly_uint64 nunits
)
13180 tree orig_scalar_type
= scalar_type
;
13181 scalar_mode inner_mode
;
13182 machine_mode simd_mode
;
13185 if ((!INTEGRAL_TYPE_P (scalar_type
)
13186 && !POINTER_TYPE_P (scalar_type
)
13187 && !SCALAR_FLOAT_TYPE_P (scalar_type
))
13188 || (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
13189 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
)))
13192 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
13194 /* Interoperability between modes requires one to be a constant multiple
13195 of the other, so that the number of vectors required for each operation
13196 is a compile-time constant. */
13197 if (prevailing_mode
!= VOIDmode
13198 && !constant_multiple_p (nunits
* nbytes
,
13199 GET_MODE_SIZE (prevailing_mode
))
13200 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode
),
13204 /* For vector types of elements whose mode precision doesn't
13205 match their types precision we use a element type of mode
13206 precision. The vectorization routines will have to make sure
13207 they support the proper result truncation/extension.
13208 We also make sure to build vector types with INTEGER_TYPE
13209 component type only. */
13210 if (INTEGRAL_TYPE_P (scalar_type
)
13211 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
13212 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
13213 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
13214 TYPE_UNSIGNED (scalar_type
));
13216 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13217 When the component mode passes the above test simply use a type
13218 corresponding to that mode. The theory is that any use that
13219 would cause problems with this will disable vectorization anyway. */
13220 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
13221 && !INTEGRAL_TYPE_P (scalar_type
))
13222 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
13224 /* We can't build a vector type of elements with alignment bigger than
13226 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
13227 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
13228 TYPE_UNSIGNED (scalar_type
));
13230 /* If we felt back to using the mode fail if there was
13231 no scalar type for it. */
13232 if (scalar_type
== NULL_TREE
)
13235 /* If no prevailing mode was supplied, use the mode the target prefers.
13236 Otherwise lookup a vector mode based on the prevailing mode. */
13237 if (prevailing_mode
== VOIDmode
)
13239 gcc_assert (known_eq (nunits
, 0U));
13240 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
13241 if (SCALAR_INT_MODE_P (simd_mode
))
13243 /* Traditional behavior is not to take the integer mode
13244 literally, but simply to use it as a way of determining
13245 the vector size. It is up to mode_for_vector to decide
13246 what the TYPE_MODE should be.
13248 Note that nunits == 1 is allowed in order to support single
13249 element vector types. */
13250 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
13251 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
13255 else if (SCALAR_INT_MODE_P (prevailing_mode
)
13256 || !related_vector_mode (prevailing_mode
,
13257 inner_mode
, nunits
).exists (&simd_mode
))
13259 /* Fall back to using mode_for_vector, mostly in the hope of being
13260 able to use an integer mode. */
13261 if (known_eq (nunits
, 0U)
13262 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
13265 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
13269 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
13271 /* In cases where the mode was chosen by mode_for_vector, check that
13272 the target actually supports the chosen mode, or that it at least
13273 allows the vector mode to be replaced by a like-sized integer. */
13274 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
13275 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
13278 /* Re-attach the address-space qualifier if we canonicalized the scalar
13280 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
13281 return build_qualified_type
13282 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
13287 /* Function get_vectype_for_scalar_type.
13289 Returns the vector type corresponding to SCALAR_TYPE as supported
13290 by the target. If GROUP_SIZE is nonzero and we're performing BB
13291 vectorization, make sure that the number of elements in the vector
13292 is no bigger than GROUP_SIZE. */
13295 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
13296 unsigned int group_size
)
13298 /* For BB vectorization, we should always have a group size once we've
13299 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13300 are tentative requests during things like early data reference
13301 analysis and pattern recognition. */
13302 if (is_a
<bb_vec_info
> (vinfo
))
13303 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
13307 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
13309 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
13310 vinfo
->vector_mode
= TYPE_MODE (vectype
);
13312 /* Register the natural choice of vector type, before the group size
13313 has been applied. */
13315 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
13317 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13318 try again with an explicit number of elements. */
13321 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
13323 /* Start with the biggest number of units that fits within
13324 GROUP_SIZE and halve it until we find a valid vector type.
13325 Usually either the first attempt will succeed or all will
13326 fail (in the latter case because GROUP_SIZE is too small
13327 for the target), but it's possible that a target could have
13328 a hole between supported vector types.
13330 If GROUP_SIZE is not a power of 2, this has the effect of
13331 trying the largest power of 2 that fits within the group,
13332 even though the group is not a multiple of that vector size.
13333 The BB vectorizer will then try to carve up the group into
13335 unsigned int nunits
= 1 << floor_log2 (group_size
);
13338 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
13339 scalar_type
, nunits
);
13342 while (nunits
> 1 && !vectype
);
13348 /* Return the vector type corresponding to SCALAR_TYPE as supported
13349 by the target. NODE, if nonnull, is the SLP tree node that will
13350 use the returned vector type. */
13353 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
13355 unsigned int group_size
= 0;
13357 group_size
= SLP_TREE_LANES (node
);
13358 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
13361 /* Function get_mask_type_for_scalar_type.
13363 Returns the mask type corresponding to a result of comparison
13364 of vectors of specified SCALAR_TYPE as supported by target.
13365 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13366 make sure that the number of elements in the vector is no bigger
13367 than GROUP_SIZE. */
13370 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
13371 unsigned int group_size
)
13373 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
13378 return truth_type_for (vectype
);
13381 /* Function get_mask_type_for_scalar_type.
13383 Returns the mask type corresponding to a result of comparison
13384 of vectors of specified SCALAR_TYPE as supported by target.
13385 NODE, if nonnull, is the SLP tree node that will use the returned
13389 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
13392 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, node
);
13397 return truth_type_for (vectype
);
13400 /* Function get_same_sized_vectype
13402 Returns a vector type corresponding to SCALAR_TYPE of size
13403 VECTOR_TYPE if supported by the target. */
13406 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
13408 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
13409 return truth_type_for (vector_type
);
13411 poly_uint64 nunits
;
13412 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
13413 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
13416 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
13417 scalar_type
, nunits
);
13420 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13421 would not change the chosen vector modes. */
13424 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
13426 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
13427 i
!= vinfo
->used_vector_modes
.end (); ++i
)
13428 if (!VECTOR_MODE_P (*i
)
13429 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
13434 /* Function vect_is_simple_use.
13437 VINFO - the vect info of the loop or basic block that is being vectorized.
13438 OPERAND - operand in the loop or bb.
13440 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13441 case OPERAND is an SSA_NAME that is defined in the vectorizable region
13442 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13443 the definition could be anywhere in the function
13444 DT - the type of definition
13446 Returns whether a stmt with OPERAND can be vectorized.
13447 For loops, supportable operands are constants, loop invariants, and operands
13448 that are defined by the current iteration of the loop. Unsupportable
13449 operands are those that are defined by a previous iteration of the loop (as
13450 is the case in reduction/induction computations).
13451 For basic blocks, supportable operands are constants and bb invariants.
13452 For now, operands defined outside the basic block are not supported. */
13455 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
13456 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
13458 if (def_stmt_info_out
)
13459 *def_stmt_info_out
= NULL
;
13461 *def_stmt_out
= NULL
;
13462 *dt
= vect_unknown_def_type
;
13464 if (dump_enabled_p ())
13466 dump_printf_loc (MSG_NOTE
, vect_location
,
13467 "vect_is_simple_use: operand ");
13468 if (TREE_CODE (operand
) == SSA_NAME
13469 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
13470 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
13472 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
13475 if (CONSTANT_CLASS_P (operand
))
13476 *dt
= vect_constant_def
;
13477 else if (is_gimple_min_invariant (operand
))
13478 *dt
= vect_external_def
;
13479 else if (TREE_CODE (operand
) != SSA_NAME
)
13480 *dt
= vect_unknown_def_type
;
13481 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
13482 *dt
= vect_external_def
;
13485 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
13486 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
13488 *dt
= vect_external_def
;
13491 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
13492 def_stmt
= stmt_vinfo
->stmt
;
13493 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
13494 if (def_stmt_info_out
)
13495 *def_stmt_info_out
= stmt_vinfo
;
13498 *def_stmt_out
= def_stmt
;
13501 if (dump_enabled_p ())
13503 dump_printf (MSG_NOTE
, ", type of def: ");
13506 case vect_uninitialized_def
:
13507 dump_printf (MSG_NOTE
, "uninitialized\n");
13509 case vect_constant_def
:
13510 dump_printf (MSG_NOTE
, "constant\n");
13512 case vect_external_def
:
13513 dump_printf (MSG_NOTE
, "external\n");
13515 case vect_internal_def
:
13516 dump_printf (MSG_NOTE
, "internal\n");
13518 case vect_induction_def
:
13519 dump_printf (MSG_NOTE
, "induction\n");
13521 case vect_reduction_def
:
13522 dump_printf (MSG_NOTE
, "reduction\n");
13524 case vect_double_reduction_def
:
13525 dump_printf (MSG_NOTE
, "double reduction\n");
13527 case vect_nested_cycle
:
13528 dump_printf (MSG_NOTE
, "nested cycle\n");
13530 case vect_first_order_recurrence
:
13531 dump_printf (MSG_NOTE
, "first order recurrence\n");
13533 case vect_unknown_def_type
:
13534 dump_printf (MSG_NOTE
, "unknown\n");
13539 if (*dt
== vect_unknown_def_type
)
13541 if (dump_enabled_p ())
13542 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
13543 "Unsupported pattern.\n");
13550 /* Function vect_is_simple_use.
13552 Same as vect_is_simple_use but also determines the vector operand
13553 type of OPERAND and stores it to *VECTYPE. If the definition of
13554 OPERAND is vect_uninitialized_def, vect_constant_def or
13555 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
13556 is responsible to compute the best suited vector type for the
13560 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
13561 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
13562 gimple
**def_stmt_out
)
13564 stmt_vec_info def_stmt_info
;
13566 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
13570 *def_stmt_out
= def_stmt
;
13571 if (def_stmt_info_out
)
13572 *def_stmt_info_out
= def_stmt_info
;
13574 /* Now get a vector type if the def is internal, otherwise supply
13575 NULL_TREE and leave it up to the caller to figure out a proper
13576 type for the use stmt. */
13577 if (*dt
== vect_internal_def
13578 || *dt
== vect_induction_def
13579 || *dt
== vect_reduction_def
13580 || *dt
== vect_double_reduction_def
13581 || *dt
== vect_nested_cycle
13582 || *dt
== vect_first_order_recurrence
)
13584 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
13585 gcc_assert (*vectype
!= NULL_TREE
);
13586 if (dump_enabled_p ())
13587 dump_printf_loc (MSG_NOTE
, vect_location
,
13588 "vect_is_simple_use: vectype %T\n", *vectype
);
13590 else if (*dt
== vect_uninitialized_def
13591 || *dt
== vect_constant_def
13592 || *dt
== vect_external_def
)
13593 *vectype
= NULL_TREE
;
13595 gcc_unreachable ();
13600 /* Function vect_is_simple_use.
13602 Same as vect_is_simple_use but determines the operand by operand
13603 position OPERAND from either STMT or SLP_NODE, filling in *OP
13604 and *SLP_DEF (when SLP_NODE is not NULL). */
13607 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
13608 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
13609 enum vect_def_type
*dt
,
13610 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
13614 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
13616 *vectype
= SLP_TREE_VECTYPE (child
);
13617 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
13619 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
13620 return vect_is_simple_use (*op
, vinfo
, dt
, def_stmt_info_out
);
13624 if (def_stmt_info_out
)
13625 *def_stmt_info_out
= NULL
;
13626 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
13627 *dt
= SLP_TREE_DEF_TYPE (child
);
13634 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
13636 if (gimple_assign_rhs_code (ass
) == COND_EXPR
13637 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
13640 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
13642 *op
= gimple_op (ass
, operand
);
13644 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
13645 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
13647 *op
= gimple_op (ass
, operand
+ 1);
13649 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
13650 *op
= gimple_call_arg (call
, operand
);
13652 gcc_unreachable ();
13653 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
13657 /* If OP is not NULL and is external or constant update its vector
13658 type with VECTYPE. Returns true if successful or false if not,
13659 for example when conflicting vector types are present. */
13662 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
13664 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
13666 if (SLP_TREE_VECTYPE (op
))
13667 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
13668 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
13669 should be handled by patters. Allow vect_constant_def for now. */
13670 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
13671 && SLP_TREE_DEF_TYPE (op
) == vect_external_def
)
13673 SLP_TREE_VECTYPE (op
) = vectype
;
13677 /* Function supportable_widening_operation
13679 Check whether an operation represented by the code CODE is a
13680 widening operation that is supported by the target platform in
13681 vector form (i.e., when operating on arguments of type VECTYPE_IN
13682 producing a result of type VECTYPE_OUT).
13684 Widening operations we currently support are NOP (CONVERT), FLOAT,
13685 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
13686 are supported by the target platform either directly (via vector
13687 tree-codes), or via target builtins.
13690 - CODE1 and CODE2 are codes of vector operations to be used when
13691 vectorizing the operation, if available.
13692 - MULTI_STEP_CVT determines the number of required intermediate steps in
13693 case of multi-step conversion (like char->short->int - in that case
13694 MULTI_STEP_CVT will be 1).
13695 - INTERM_TYPES contains the intermediate type required to perform the
13696 widening operation (short in the above example). */
13699 supportable_widening_operation (vec_info
*vinfo
,
13701 stmt_vec_info stmt_info
,
13702 tree vectype_out
, tree vectype_in
,
13703 code_helper
*code1
,
13704 code_helper
*code2
,
13705 int *multi_step_cvt
,
13706 vec
<tree
> *interm_types
)
13708 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
13709 class loop
*vect_loop
= NULL
;
13710 machine_mode vec_mode
;
13711 enum insn_code icode1
, icode2
;
13712 optab optab1
= unknown_optab
, optab2
= unknown_optab
;
13713 tree vectype
= vectype_in
;
13714 tree wide_vectype
= vectype_out
;
13715 tree_code c1
= MAX_TREE_CODES
, c2
= MAX_TREE_CODES
;
13717 tree prev_type
, intermediate_type
;
13718 machine_mode intermediate_mode
, prev_mode
;
13719 optab optab3
, optab4
;
13721 *multi_step_cvt
= 0;
13723 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
13725 switch (code
.safe_as_tree_code ())
13727 case MAX_TREE_CODES
:
13728 /* Don't set c1 and c2 if code is not a tree_code. */
13731 case WIDEN_MULT_EXPR
:
13732 /* The result of a vectorized widening operation usually requires
13733 two vectors (because the widened results do not fit into one vector).
13734 The generated vector results would normally be expected to be
13735 generated in the same order as in the original scalar computation,
13736 i.e. if 8 results are generated in each vector iteration, they are
13737 to be organized as follows:
13738 vect1: [res1,res2,res3,res4],
13739 vect2: [res5,res6,res7,res8].
13741 However, in the special case that the result of the widening
13742 operation is used in a reduction computation only, the order doesn't
13743 matter (because when vectorizing a reduction we change the order of
13744 the computation). Some targets can take advantage of this and
13745 generate more efficient code. For example, targets like Altivec,
13746 that support widen_mult using a sequence of {mult_even,mult_odd}
13747 generate the following vectors:
13748 vect1: [res1,res3,res5,res7],
13749 vect2: [res2,res4,res6,res8].
13751 When vectorizing outer-loops, we execute the inner-loop sequentially
13752 (each vectorized inner-loop iteration contributes to VF outer-loop
13753 iterations in parallel). We therefore don't allow to change the
13754 order of the computation in the inner-loop during outer-loop
13756 /* TODO: Another case in which order doesn't *really* matter is when we
13757 widen and then contract again, e.g. (short)((int)x * y >> 8).
13758 Normally, pack_trunc performs an even/odd permute, whereas the
13759 repack from an even/odd expansion would be an interleave, which
13760 would be significantly simpler for e.g. AVX2. */
13761 /* In any case, in order to avoid duplicating the code below, recurse
13762 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
13763 are properly set up for the caller. If we fail, we'll continue with
13764 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
13766 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
13767 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
13768 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
13769 stmt_info
, vectype_out
,
13771 code2
, multi_step_cvt
,
13774 /* Elements in a vector with vect_used_by_reduction property cannot
13775 be reordered if the use chain with this property does not have the
13776 same operation. One such an example is s += a * b, where elements
13777 in a and b cannot be reordered. Here we check if the vector defined
13778 by STMT is only directly used in the reduction statement. */
13779 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
13780 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
13782 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
13785 c1
= VEC_WIDEN_MULT_LO_EXPR
;
13786 c2
= VEC_WIDEN_MULT_HI_EXPR
;
13789 case DOT_PROD_EXPR
:
13790 c1
= DOT_PROD_EXPR
;
13791 c2
= DOT_PROD_EXPR
;
13799 case VEC_WIDEN_MULT_EVEN_EXPR
:
13800 /* Support the recursion induced just above. */
13801 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
13802 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
13805 case WIDEN_LSHIFT_EXPR
:
13806 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
13807 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
13811 c1
= VEC_UNPACK_LO_EXPR
;
13812 c2
= VEC_UNPACK_HI_EXPR
;
13816 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
13817 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
13820 case FIX_TRUNC_EXPR
:
13821 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
13822 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
13826 gcc_unreachable ();
13829 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
13830 std::swap (c1
, c2
);
13832 if (code
== FIX_TRUNC_EXPR
)
13834 /* The signedness is determined from output operand. */
13835 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
13836 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
13838 else if (CONVERT_EXPR_CODE_P (code
.safe_as_tree_code ())
13839 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
13840 && VECTOR_BOOLEAN_TYPE_P (vectype
)
13841 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
13842 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
13844 /* If the input and result modes are the same, a different optab
13845 is needed where we pass in the number of units in vectype. */
13846 optab1
= vec_unpacks_sbool_lo_optab
;
13847 optab2
= vec_unpacks_sbool_hi_optab
;
13850 vec_mode
= TYPE_MODE (vectype
);
13851 if (widening_fn_p (code
))
13853 /* If this is an internal fn then we must check whether the target
13854 supports either a low-high split or an even-odd split. */
13855 internal_fn ifn
= as_internal_fn ((combined_fn
) code
);
13857 internal_fn lo
, hi
, even
, odd
;
13858 lookup_hilo_internal_fn (ifn
, &lo
, &hi
);
13859 *code1
= as_combined_fn (lo
);
13860 *code2
= as_combined_fn (hi
);
13861 optab1
= direct_internal_fn_optab (lo
, {vectype
, vectype
});
13862 optab2
= direct_internal_fn_optab (hi
, {vectype
, vectype
});
13864 /* If we don't support low-high, then check for even-odd. */
13866 || (icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
13868 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
13870 lookup_evenodd_internal_fn (ifn
, &even
, &odd
);
13871 *code1
= as_combined_fn (even
);
13872 *code2
= as_combined_fn (odd
);
13873 optab1
= direct_internal_fn_optab (even
, {vectype
, vectype
});
13874 optab2
= direct_internal_fn_optab (odd
, {vectype
, vectype
});
13877 else if (code
.is_tree_code ())
13879 if (code
== FIX_TRUNC_EXPR
)
13881 /* The signedness is determined from output operand. */
13882 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
13883 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
13885 else if (CONVERT_EXPR_CODE_P ((tree_code
) code
.safe_as_tree_code ())
13886 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
13887 && VECTOR_BOOLEAN_TYPE_P (vectype
)
13888 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
13889 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
13891 /* If the input and result modes are the same, a different optab
13892 is needed where we pass in the number of units in vectype. */
13893 optab1
= vec_unpacks_sbool_lo_optab
;
13894 optab2
= vec_unpacks_sbool_hi_optab
;
13898 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
13899 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
13905 if (!optab1
|| !optab2
)
13908 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
13909 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
13913 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
13914 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
13916 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
13918 /* For scalar masks we may have different boolean
13919 vector types having the same QImode. Thus we
13920 add additional check for elements number. */
13921 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
13922 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
13926 /* Check if it's a multi-step conversion that can be done using intermediate
13929 prev_type
= vectype
;
13930 prev_mode
= vec_mode
;
13932 if (!CONVERT_EXPR_CODE_P (code
.safe_as_tree_code ()))
13935 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13936 intermediate steps in promotion sequence. We try
13937 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
13939 interm_types
->create (MAX_INTERM_CVT_STEPS
);
13940 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
13942 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
13943 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
13945 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
13946 else if (VECTOR_MODE_P (intermediate_mode
))
13948 tree intermediate_element_type
13949 = lang_hooks
.types
.type_for_mode (GET_MODE_INNER (intermediate_mode
),
13950 TYPE_UNSIGNED (prev_type
));
13952 = build_vector_type_for_mode (intermediate_element_type
,
13953 intermediate_mode
);
13957 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
13958 TYPE_UNSIGNED (prev_type
));
13960 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
13961 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
13962 && intermediate_mode
== prev_mode
13963 && SCALAR_INT_MODE_P (prev_mode
))
13965 /* If the input and result modes are the same, a different optab
13966 is needed where we pass in the number of units in vectype. */
13967 optab3
= vec_unpacks_sbool_lo_optab
;
13968 optab4
= vec_unpacks_sbool_hi_optab
;
13972 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
13973 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
13976 if (!optab3
|| !optab4
13977 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
13978 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
13979 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
13980 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
13981 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
13982 == CODE_FOR_nothing
)
13983 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
13984 == CODE_FOR_nothing
))
13987 interm_types
->quick_push (intermediate_type
);
13988 (*multi_step_cvt
)++;
13990 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
13991 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
13993 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
13995 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
13996 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
14000 prev_type
= intermediate_type
;
14001 prev_mode
= intermediate_mode
;
14004 interm_types
->release ();
14009 /* Function supportable_narrowing_operation
14011 Check whether an operation represented by the code CODE is a
14012 narrowing operation that is supported by the target platform in
14013 vector form (i.e., when operating on arguments of type VECTYPE_IN
14014 and producing a result of type VECTYPE_OUT).
14016 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14017 and FLOAT. This function checks if these operations are supported by
14018 the target platform directly via vector tree-codes.
14021 - CODE1 is the code of a vector operation to be used when
14022 vectorizing the operation, if available.
14023 - MULTI_STEP_CVT determines the number of required intermediate steps in
14024 case of multi-step conversion (like int->short->char - in that case
14025 MULTI_STEP_CVT will be 1).
14026 - INTERM_TYPES contains the intermediate type required to perform the
14027 narrowing operation (short in the above example). */
14030 supportable_narrowing_operation (code_helper code
,
14031 tree vectype_out
, tree vectype_in
,
14032 code_helper
*code1
, int *multi_step_cvt
,
14033 vec
<tree
> *interm_types
)
14035 machine_mode vec_mode
;
14036 enum insn_code icode1
;
14037 optab optab1
, interm_optab
;
14038 tree vectype
= vectype_in
;
14039 tree narrow_vectype
= vectype_out
;
14041 tree intermediate_type
, prev_type
;
14042 machine_mode intermediate_mode
, prev_mode
;
14044 unsigned HOST_WIDE_INT n_elts
;
14047 if (!code
.is_tree_code ())
14050 *multi_step_cvt
= 0;
14051 switch ((tree_code
) code
)
14054 c1
= VEC_PACK_TRUNC_EXPR
;
14055 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
14056 && VECTOR_BOOLEAN_TYPE_P (vectype
)
14057 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
))
14058 && TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&n_elts
)
14059 && n_elts
< BITS_PER_UNIT
)
14060 optab1
= vec_pack_sbool_trunc_optab
;
14062 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
14065 case FIX_TRUNC_EXPR
:
14066 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
14067 /* The signedness is determined from output operand. */
14068 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
14072 c1
= VEC_PACK_FLOAT_EXPR
;
14073 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
14077 gcc_unreachable ();
14083 vec_mode
= TYPE_MODE (vectype
);
14084 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
14089 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
14091 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14093 /* For scalar masks we may have different boolean
14094 vector types having the same QImode. Thus we
14095 add additional check for elements number. */
14096 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
14097 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
14101 if (code
== FLOAT_EXPR
)
14104 /* Check if it's a multi-step conversion that can be done using intermediate
14106 prev_mode
= vec_mode
;
14107 prev_type
= vectype
;
14108 if (code
== FIX_TRUNC_EXPR
)
14109 uns
= TYPE_UNSIGNED (vectype_out
);
14111 uns
= TYPE_UNSIGNED (vectype
);
14113 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14114 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14115 costly than signed. */
14116 if (code
== FIX_TRUNC_EXPR
&& uns
)
14118 enum insn_code icode2
;
14121 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
14123 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
14124 if (interm_optab
!= unknown_optab
14125 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
14126 && insn_data
[icode1
].operand
[0].mode
14127 == insn_data
[icode2
].operand
[0].mode
)
14130 optab1
= interm_optab
;
14135 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14136 intermediate steps in promotion sequence. We try
14137 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14138 interm_types
->create (MAX_INTERM_CVT_STEPS
);
14139 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
14141 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
14142 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
14144 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
14147 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
14148 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
14149 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
14150 && SCALAR_INT_MODE_P (prev_mode
)
14151 && TYPE_VECTOR_SUBPARTS (intermediate_type
).is_constant (&n_elts
)
14152 && n_elts
< BITS_PER_UNIT
)
14153 interm_optab
= vec_pack_sbool_trunc_optab
;
14156 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
14159 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
14160 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
14161 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
14162 == CODE_FOR_nothing
))
14165 interm_types
->quick_push (intermediate_type
);
14166 (*multi_step_cvt
)++;
14168 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
14170 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14172 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
14173 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
14177 prev_mode
= intermediate_mode
;
14178 prev_type
= intermediate_type
;
14179 optab1
= interm_optab
;
14182 interm_types
->release ();
14186 /* Generate and return a vector mask of MASK_TYPE such that
14187 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14188 Add the statements to SEQ. */
14191 vect_gen_while (gimple_seq
*seq
, tree mask_type
, tree start_index
,
14192 tree end_index
, const char *name
)
14194 tree cmp_type
= TREE_TYPE (start_index
);
14195 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
14196 cmp_type
, mask_type
,
14197 OPTIMIZE_FOR_SPEED
));
14198 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
14199 start_index
, end_index
,
14200 build_zero_cst (mask_type
));
14203 tmp
= make_temp_ssa_name (mask_type
, NULL
, name
);
14205 tmp
= make_ssa_name (mask_type
);
14206 gimple_call_set_lhs (call
, tmp
);
14207 gimple_seq_add_stmt (seq
, call
);
14211 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
14212 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14215 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
14218 tree tmp
= vect_gen_while (seq
, mask_type
, start_index
, end_index
);
14219 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
14222 /* Try to compute the vector types required to vectorize STMT_INFO,
14223 returning true on success and false if vectorization isn't possible.
14224 If GROUP_SIZE is nonzero and we're performing BB vectorization,
14225 take sure that the number of elements in the vectors is no bigger
14230 - Set *STMT_VECTYPE_OUT to:
14231 - NULL_TREE if the statement doesn't need to be vectorized;
14232 - the equivalent of STMT_VINFO_VECTYPE otherwise.
14234 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14235 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14236 statement does not help to determine the overall number of units. */
14239 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
14240 tree
*stmt_vectype_out
,
14241 tree
*nunits_vectype_out
,
14242 unsigned int group_size
)
14244 gimple
*stmt
= stmt_info
->stmt
;
14246 /* For BB vectorization, we should always have a group size once we've
14247 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14248 are tentative requests during things like early data reference
14249 analysis and pattern recognition. */
14250 if (is_a
<bb_vec_info
> (vinfo
))
14251 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
14255 *stmt_vectype_out
= NULL_TREE
;
14256 *nunits_vectype_out
= NULL_TREE
;
14258 if (gimple_get_lhs (stmt
) == NULL_TREE
14259 /* MASK_STORE has no lhs, but is ok. */
14260 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
14262 if (is_a
<gcall
*> (stmt
))
14264 /* Ignore calls with no lhs. These must be calls to
14265 #pragma omp simd functions, and what vectorization factor
14266 it really needs can't be determined until
14267 vectorizable_simd_clone_call. */
14268 if (dump_enabled_p ())
14269 dump_printf_loc (MSG_NOTE
, vect_location
,
14270 "defer to SIMD clone analysis.\n");
14271 return opt_result::success ();
14274 return opt_result::failure_at (stmt
,
14275 "not vectorized: irregular stmt.%G", stmt
);
14279 tree scalar_type
= NULL_TREE
;
14280 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
14282 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
14283 if (dump_enabled_p ())
14284 dump_printf_loc (MSG_NOTE
, vect_location
,
14285 "precomputed vectype: %T\n", vectype
);
14287 else if (vect_use_mask_type_p (stmt_info
))
14289 unsigned int precision
= stmt_info
->mask_precision
;
14290 scalar_type
= build_nonstandard_integer_type (precision
, 1);
14291 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
14293 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
14294 " data-type %T\n", scalar_type
);
14295 if (dump_enabled_p ())
14296 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
14300 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
14301 scalar_type
= TREE_TYPE (DR_REF (dr
));
14302 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
14303 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
14305 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
14307 if (dump_enabled_p ())
14310 dump_printf_loc (MSG_NOTE
, vect_location
,
14311 "get vectype for scalar type (group size %d):"
14312 " %T\n", group_size
, scalar_type
);
14314 dump_printf_loc (MSG_NOTE
, vect_location
,
14315 "get vectype for scalar type: %T\n", scalar_type
);
14317 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
14319 return opt_result::failure_at (stmt
,
14321 " unsupported data-type %T\n",
14324 if (dump_enabled_p ())
14325 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
14328 if (scalar_type
&& VECTOR_MODE_P (TYPE_MODE (scalar_type
)))
14329 return opt_result::failure_at (stmt
,
14330 "not vectorized: vector stmt in loop:%G",
14333 *stmt_vectype_out
= vectype
;
14335 /* Don't try to compute scalar types if the stmt produces a boolean
14336 vector; use the existing vector type instead. */
14337 tree nunits_vectype
= vectype
;
14338 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14340 /* The number of units is set according to the smallest scalar
14341 type (or the largest vector size, but we only support one
14342 vector size per vectorization). */
14343 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
14344 TREE_TYPE (vectype
));
14345 if (scalar_type
!= TREE_TYPE (vectype
))
14347 if (dump_enabled_p ())
14348 dump_printf_loc (MSG_NOTE
, vect_location
,
14349 "get vectype for smallest scalar type: %T\n",
14351 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
14353 if (!nunits_vectype
)
14354 return opt_result::failure_at
14355 (stmt
, "not vectorized: unsupported data-type %T\n",
14357 if (dump_enabled_p ())
14358 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
14363 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
14364 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)))
14365 return opt_result::failure_at (stmt
,
14366 "Not vectorized: Incompatible number "
14367 "of vector subparts between %T and %T\n",
14368 nunits_vectype
, *stmt_vectype_out
);
14370 if (dump_enabled_p ())
14372 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
14373 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
14374 dump_printf (MSG_NOTE
, "\n");
14377 *nunits_vectype_out
= nunits_vectype
;
14378 return opt_result::success ();
14381 /* Generate and return statement sequence that sets vector length LEN that is:
14383 min_of_start_and_end = min (START_INDEX, END_INDEX);
14384 left_len = END_INDEX - min_of_start_and_end;
14385 rhs = min (left_len, LEN_LIMIT);
14388 Note: the cost of the code generated by this function is modeled
14389 by vect_estimate_min_profitable_iters, so changes here may need
14390 corresponding changes there. */
14393 vect_gen_len (tree len
, tree start_index
, tree end_index
, tree len_limit
)
14395 gimple_seq stmts
= NULL
;
14396 tree len_type
= TREE_TYPE (len
);
14397 gcc_assert (TREE_TYPE (start_index
) == len_type
);
14399 tree min
= gimple_build (&stmts
, MIN_EXPR
, len_type
, start_index
, end_index
);
14400 tree left_len
= gimple_build (&stmts
, MINUS_EXPR
, len_type
, end_index
, min
);
14401 tree rhs
= gimple_build (&stmts
, MIN_EXPR
, len_type
, left_len
, len_limit
);
14402 gimple
* stmt
= gimple_build_assign (len
, rhs
);
14403 gimple_seq_add_stmt (&stmts
, stmt
);