1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
,
95 stmt_vec_info stmt_info
, slp_tree node
,
96 tree vectype
, int misalign
,
97 enum vect_cost_model_location where
)
99 if ((kind
== vector_load
|| kind
== unaligned_load
)
100 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
101 kind
= vector_gather_load
;
102 if ((kind
== vector_store
|| kind
== unaligned_store
)
103 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
104 kind
= vector_scatter_store
;
106 stmt_info_for_cost si
107 = { count
, kind
, where
, stmt_info
, node
, vectype
, misalign
};
108 body_cost_vec
->safe_push (si
);
111 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
115 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
116 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
117 tree vectype
, int misalign
,
118 enum vect_cost_model_location where
)
120 return record_stmt_cost (body_cost_vec
, count
, kind
, stmt_info
, NULL
,
121 vectype
, misalign
, where
);
125 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
126 enum vect_cost_for_stmt kind
, slp_tree node
,
127 tree vectype
, int misalign
,
128 enum vect_cost_model_location where
)
130 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, node
,
131 vectype
, misalign
, where
);
135 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
136 enum vect_cost_for_stmt kind
,
137 enum vect_cost_model_location where
)
139 gcc_assert (kind
== cond_branch_taken
|| kind
== cond_branch_not_taken
140 || kind
== scalar_stmt
);
141 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, NULL
,
142 NULL_TREE
, 0, where
);
145 /* Return a variable of type ELEM_TYPE[NELEMS]. */
148 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
150 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
154 /* ARRAY is an array of vectors created by create_vector_array.
155 Return an SSA_NAME for the vector in index N. The reference
156 is part of the vectorization of STMT_INFO and the vector is associated
157 with scalar destination SCALAR_DEST. */
160 read_vector_array (vec_info
*vinfo
,
161 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
162 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
164 tree vect_type
, vect
, vect_name
, array_ref
;
167 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
168 vect_type
= TREE_TYPE (TREE_TYPE (array
));
169 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
170 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
171 build_int_cst (size_type_node
, n
),
172 NULL_TREE
, NULL_TREE
);
174 new_stmt
= gimple_build_assign (vect
, array_ref
);
175 vect_name
= make_ssa_name (vect
, new_stmt
);
176 gimple_assign_set_lhs (new_stmt
, vect_name
);
177 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
182 /* ARRAY is an array of vectors created by create_vector_array.
183 Emit code to store SSA_NAME VECT in index N of the array.
184 The store is part of the vectorization of STMT_INFO. */
187 write_vector_array (vec_info
*vinfo
,
188 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
189 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
194 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
195 build_int_cst (size_type_node
, n
),
196 NULL_TREE
, NULL_TREE
);
198 new_stmt
= gimple_build_assign (array_ref
, vect
);
199 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
202 /* PTR is a pointer to an array of type TYPE. Return a representation
203 of *PTR. The memory reference replaces those in FIRST_DR
207 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
211 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
212 /* Arrays have the same alignment as their type. */
213 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
217 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
218 Emit the clobber before *GSI. */
221 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
222 gimple_stmt_iterator
*gsi
, tree var
)
224 tree clobber
= build_clobber (TREE_TYPE (var
));
225 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
226 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
229 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
231 /* Function vect_mark_relevant.
233 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
236 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
237 enum vect_relevant relevant
, bool live_p
)
239 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
240 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
242 if (dump_enabled_p ())
243 dump_printf_loc (MSG_NOTE
, vect_location
,
244 "mark relevant %d, live %d: %G", relevant
, live_p
,
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
253 /* This is the last stmt in a sequence that was detected as a
254 pattern that can potentially be vectorized. Don't mark the stmt
255 as relevant/live because it's not going to be vectorized.
256 Instead mark the pattern-stmt that replaces it. */
258 if (dump_enabled_p ())
259 dump_printf_loc (MSG_NOTE
, vect_location
,
260 "last stmt in pattern. don't mark"
261 " relevant/live.\n");
262 stmt_vec_info old_stmt_info
= stmt_info
;
263 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
264 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
265 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
266 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
269 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
270 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
271 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
273 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
274 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
276 if (dump_enabled_p ())
277 dump_printf_loc (MSG_NOTE
, vect_location
,
278 "already marked relevant/live.\n");
282 worklist
->safe_push (stmt_info
);
286 /* Function is_simple_and_all_uses_invariant
288 Return true if STMT_INFO is simple and all uses of it are invariant. */
291 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
292 loop_vec_info loop_vinfo
)
297 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
301 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
303 enum vect_def_type dt
= vect_uninitialized_def
;
305 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
309 "use not simple.\n");
313 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
319 /* Function vect_stmt_relevant_p.
321 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
322 is "relevant for vectorization".
324 A stmt is considered "relevant for vectorization" if:
325 - it has uses outside the loop.
326 - it has vdefs (it alters memory).
327 - control stmts in the loop (except for the exit condition).
329 CHECKME: what other side effects would the vectorizer allow? */
332 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
333 enum vect_relevant
*relevant
, bool *live_p
)
335 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
337 imm_use_iterator imm_iter
;
341 *relevant
= vect_unused_in_scope
;
344 /* cond stmt other than loop exit cond. */
345 if (is_ctrl_stmt (stmt_info
->stmt
)
346 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
347 *relevant
= vect_used_in_scope
;
349 /* changing memory. */
350 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
351 if (gimple_vdef (stmt_info
->stmt
)
352 && !gimple_clobber_p (stmt_info
->stmt
))
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE
, vect_location
,
356 "vec_stmt_relevant_p: stmt has vdefs.\n");
357 *relevant
= vect_used_in_scope
;
360 /* uses outside the loop. */
361 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
363 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
365 basic_block bb
= gimple_bb (USE_STMT (use_p
));
366 if (!flow_bb_inside_loop_p (loop
, bb
))
368 if (is_gimple_debug (USE_STMT (use_p
)))
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE
, vect_location
,
373 "vec_stmt_relevant_p: used out of loop.\n");
375 /* We expect all such uses to be in the loop exit phis
376 (because of loop closed form) */
377 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
378 gcc_assert (bb
== single_exit (loop
)->dest
);
385 if (*live_p
&& *relevant
== vect_unused_in_scope
386 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE
, vect_location
,
390 "vec_stmt_relevant_p: stmt live but not relevant.\n");
391 *relevant
= vect_used_only_live
;
394 return (*live_p
|| *relevant
);
398 /* Function exist_non_indexing_operands_for_use_p
400 USE is one of the uses attached to STMT_INFO. Check if USE is
401 used in STMT_INFO for anything other than indexing an array. */
404 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
408 /* USE corresponds to some operand in STMT. If there is no data
409 reference in STMT, then any operand that corresponds to USE
410 is not indexing an array. */
411 if (!STMT_VINFO_DATA_REF (stmt_info
))
414 /* STMT has a data_ref. FORNOW this means that its of one of
418 (This should have been verified in analyze_data_refs).
420 'var' in the second case corresponds to a def, not a use,
421 so USE cannot correspond to any operands that are not used
424 Therefore, all we need to check is if STMT falls into the
425 first case, and whether var corresponds to USE. */
427 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
428 if (!assign
|| !gimple_assign_copy_p (assign
))
430 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
431 if (call
&& gimple_call_internal_p (call
))
433 internal_fn ifn
= gimple_call_internal_fn (call
);
434 int mask_index
= internal_fn_mask_index (ifn
);
436 && use
== gimple_call_arg (call
, mask_index
))
438 int stored_value_index
= internal_fn_stored_value_index (ifn
);
439 if (stored_value_index
>= 0
440 && use
== gimple_call_arg (call
, stored_value_index
))
442 if (internal_gather_scatter_fn_p (ifn
)
443 && use
== gimple_call_arg (call
, 1))
449 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
451 operand
= gimple_assign_rhs1 (assign
);
452 if (TREE_CODE (operand
) != SSA_NAME
)
463 Function process_use.
466 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
467 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
468 that defined USE. This is done by calling mark_relevant and passing it
469 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
470 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
474 Generally, LIVE_P and RELEVANT are used to define the liveness and
475 relevance info of the DEF_STMT of this USE:
476 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
477 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
479 - case 1: If USE is used only for address computations (e.g. array indexing),
480 which does not need to be directly vectorized, then the liveness/relevance
481 of the respective DEF_STMT is left unchanged.
482 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
483 we skip DEF_STMT cause it had already been processed.
484 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
485 "relevant" will be modified accordingly.
487 Return true if everything is as expected. Return false otherwise. */
490 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
491 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
494 stmt_vec_info dstmt_vinfo
;
495 enum vect_def_type dt
;
497 /* case 1: we are only interested in uses that need to be vectorized. Uses
498 that are used for address computation are not considered relevant. */
499 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
500 return opt_result::success ();
502 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
503 return opt_result::failure_at (stmt_vinfo
->stmt
,
505 " unsupported use in stmt.\n");
508 return opt_result::success ();
510 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
511 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
513 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
514 We have to force the stmt live since the epilogue loop needs it to
515 continue computing the reduction. */
516 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
517 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
518 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
519 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
520 && bb
->loop_father
== def_bb
->loop_father
)
522 if (dump_enabled_p ())
523 dump_printf_loc (MSG_NOTE
, vect_location
,
524 "reduc-stmt defining reduc-phi in the same nest.\n");
525 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
526 return opt_result::success ();
529 /* case 3a: outer-loop stmt defining an inner-loop stmt:
530 outer-loop-header-bb:
536 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE
, vect_location
,
540 "outer-loop def-stmt defining inner-loop stmt.\n");
544 case vect_unused_in_scope
:
545 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
546 vect_used_in_scope
: vect_unused_in_scope
;
549 case vect_used_in_outer_by_reduction
:
550 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
551 relevant
= vect_used_by_reduction
;
554 case vect_used_in_outer
:
555 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
556 relevant
= vect_used_in_scope
;
559 case vect_used_in_scope
:
567 /* case 3b: inner-loop stmt defining an outer-loop stmt:
568 outer-loop-header-bb:
572 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
574 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
576 if (dump_enabled_p ())
577 dump_printf_loc (MSG_NOTE
, vect_location
,
578 "inner-loop def-stmt defining outer-loop stmt.\n");
582 case vect_unused_in_scope
:
583 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
584 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
585 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
588 case vect_used_by_reduction
:
589 case vect_used_only_live
:
590 relevant
= vect_used_in_outer_by_reduction
;
593 case vect_used_in_scope
:
594 relevant
= vect_used_in_outer
;
601 /* We are also not interested in uses on loop PHI backedges that are
602 inductions. Otherwise we'll needlessly vectorize the IV increment
603 and cause hybrid SLP for SLP inductions. Unless the PHI is live
605 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
606 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
607 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
608 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
609 loop_latch_edge (bb
->loop_father
))
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_NOTE
, vect_location
,
614 "induction value on backedge.\n");
615 return opt_result::success ();
619 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
620 return opt_result::success ();
624 /* Function vect_mark_stmts_to_be_vectorized.
626 Not all stmts in the loop need to be vectorized. For example:
635 Stmt 1 and 3 do not need to be vectorized, because loop control and
636 addressing of vectorized data-refs are handled differently.
638 This pass detects such stmts. */
641 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
643 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
644 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
645 unsigned int nbbs
= loop
->num_nodes
;
646 gimple_stmt_iterator si
;
650 enum vect_relevant relevant
;
652 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
654 auto_vec
<stmt_vec_info
, 64> worklist
;
656 /* 1. Init worklist. */
657 for (i
= 0; i
< nbbs
; i
++)
660 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
662 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
667 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
668 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
670 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
672 if (is_gimple_debug (gsi_stmt (si
)))
674 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
675 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE
, vect_location
,
677 "init: stmt relevant? %G", stmt_info
->stmt
);
679 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
680 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
684 /* 2. Process_worklist */
685 while (worklist
.length () > 0)
690 stmt_vec_info stmt_vinfo
= worklist
.pop ();
691 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE
, vect_location
,
693 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
695 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696 (DEF_STMT) as relevant/irrelevant according to the relevance property
698 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
700 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
701 propagated as is to the DEF_STMTs of its USEs.
703 One exception is when STMT has been identified as defining a reduction
704 variable; in this case we set the relevance to vect_used_by_reduction.
705 This is because we distinguish between two kinds of relevant stmts -
706 those that are used by a reduction computation, and those that are
707 (also) used by a regular computation. This allows us later on to
708 identify stmts that are used solely by a reduction, and therefore the
709 order of the results that they produce does not have to be kept. */
711 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
713 case vect_reduction_def
:
714 gcc_assert (relevant
!= vect_unused_in_scope
);
715 if (relevant
!= vect_unused_in_scope
716 && relevant
!= vect_used_in_scope
717 && relevant
!= vect_used_by_reduction
718 && relevant
!= vect_used_only_live
)
719 return opt_result::failure_at
720 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
723 case vect_nested_cycle
:
724 if (relevant
!= vect_unused_in_scope
725 && relevant
!= vect_used_in_outer_by_reduction
726 && relevant
!= vect_used_in_outer
)
727 return opt_result::failure_at
728 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
731 case vect_double_reduction_def
:
732 if (relevant
!= vect_unused_in_scope
733 && relevant
!= vect_used_by_reduction
734 && relevant
!= vect_used_only_live
)
735 return opt_result::failure_at
736 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
743 if (is_pattern_stmt_p (stmt_vinfo
))
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
750 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
751 tree op
= gimple_assign_rhs1 (assign
);
754 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
757 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
758 loop_vinfo
, relevant
, &worklist
, false);
761 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
762 loop_vinfo
, relevant
, &worklist
, false);
767 for (; i
< gimple_num_ops (assign
); i
++)
769 op
= gimple_op (assign
, i
);
770 if (TREE_CODE (op
) == SSA_NAME
)
773 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
780 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
782 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
784 tree arg
= gimple_call_arg (call
, i
);
786 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
794 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
796 tree op
= USE_FROM_PTR (use_p
);
798 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
806 gather_scatter_info gs_info
;
807 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
810 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
819 } /* while worklist */
821 return opt_result::success ();
824 /* Function vect_model_simple_cost.
826 Models cost for simple operations, i.e. those that only emit ncopies of a
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
831 vect_model_simple_cost (vec_info
*,
832 stmt_vec_info stmt_info
, int ncopies
,
833 enum vect_def_type
*dt
,
836 stmt_vector_for_cost
*cost_vec
,
837 vect_cost_for_stmt kind
= vector_stmt
)
839 int inside_cost
= 0, prologue_cost
= 0;
841 gcc_assert (cost_vec
!= NULL
);
843 /* ??? Somehow we need to fix this at the callers. */
845 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
848 /* Cost the "broadcast" of a scalar operand in to a vector operand.
849 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
851 for (int i
= 0; i
< ndts
; i
++)
852 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
853 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
854 stmt_info
, 0, vect_prologue
);
856 /* Pass the inside-of-loop statements to the target-specific cost model. */
857 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
858 stmt_info
, 0, vect_body
);
860 if (dump_enabled_p ())
861 dump_printf_loc (MSG_NOTE
, vect_location
,
862 "vect_model_simple_cost: inside_cost = %d, "
863 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
867 /* Model cost for type demotion and promotion operations. PWR is
868 normally zero for single-step promotions and demotions. It will be
869 one if two-step promotion/demotion is required, and so on. NCOPIES
870 is the number of vector results (and thus number of instructions)
871 for the narrowest end of the operation chain. Each additional
872 step doubles the number of instructions required. If WIDEN_ARITH
873 is true the stmt is doing widening arithmetic. */
876 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
877 enum vect_def_type
*dt
,
878 unsigned int ncopies
, int pwr
,
879 stmt_vector_for_cost
*cost_vec
,
883 int inside_cost
= 0, prologue_cost
= 0;
885 for (i
= 0; i
< pwr
+ 1; i
++)
887 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
,
889 ? vector_stmt
: vec_promote_demote
,
890 stmt_info
, 0, vect_body
);
894 /* FORNOW: Assuming maximum 2 args per stmts. */
895 for (i
= 0; i
< 2; i
++)
896 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
897 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
898 stmt_info
, 0, vect_prologue
);
900 if (dump_enabled_p ())
901 dump_printf_loc (MSG_NOTE
, vect_location
,
902 "vect_model_promotion_demotion_cost: inside_cost = %d, "
903 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
906 /* Returns true if the current function returns DECL. */
909 cfun_returns (tree decl
)
913 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
915 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
918 if (gimple_return_retval (ret
) == decl
)
920 /* We often end up with an aggregate copy to the result decl,
921 handle that case as well. First skip intermediate clobbers
926 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
928 while (gimple_clobber_p (def
));
929 if (is_a
<gassign
*> (def
)
930 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
931 && gimple_assign_rhs1 (def
) == decl
)
937 /* Function vect_model_store_cost
939 Models cost for stores. In the case of grouped accesses, one access
940 has the overhead of the grouped access attributed to it. */
943 vect_model_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
944 vect_memory_access_type memory_access_type
,
945 dr_alignment_support alignment_support_scheme
,
947 vec_load_store_type vls_type
, slp_tree slp_node
,
948 stmt_vector_for_cost
*cost_vec
)
950 unsigned int inside_cost
= 0, prologue_cost
= 0;
951 stmt_vec_info first_stmt_info
= stmt_info
;
952 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
954 /* ??? Somehow we need to fix this at the callers. */
956 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
958 if (vls_type
== VLS_STORE_INVARIANT
)
961 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
962 stmt_info
, 0, vect_prologue
);
965 /* Grouped stores update all elements in the group at once,
966 so we want the DR for the first statement. */
967 if (!slp_node
&& grouped_access_p
)
968 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
970 /* True if we should include any once-per-group costs as well as
971 the cost of the statement itself. For SLP we only get called
972 once per group anyhow. */
973 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
975 /* We assume that the cost of a single store-lanes instruction is
976 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
977 access is instead being provided by a permute-and-store operation,
978 include the cost of the permutes. */
980 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
982 /* Uses a high and low interleave or shuffle operations for each
984 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
985 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
986 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
987 stmt_info
, 0, vect_body
);
989 if (dump_enabled_p ())
990 dump_printf_loc (MSG_NOTE
, vect_location
,
991 "vect_model_store_cost: strided group_size = %d .\n",
995 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
996 /* Costs of the stores. */
997 if (memory_access_type
== VMAT_ELEMENTWISE
998 || memory_access_type
== VMAT_GATHER_SCATTER
)
1000 /* N scalar stores plus extracting the elements. */
1001 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1002 inside_cost
+= record_stmt_cost (cost_vec
,
1003 ncopies
* assumed_nunits
,
1004 scalar_store
, stmt_info
, 0, vect_body
);
1007 vect_get_store_cost (vinfo
, stmt_info
, ncopies
, alignment_support_scheme
,
1008 misalignment
, &inside_cost
, cost_vec
);
1010 if (memory_access_type
== VMAT_ELEMENTWISE
1011 || memory_access_type
== VMAT_STRIDED_SLP
)
1013 /* N scalar stores plus extracting the elements. */
1014 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1015 inside_cost
+= record_stmt_cost (cost_vec
,
1016 ncopies
* assumed_nunits
,
1017 vec_to_scalar
, stmt_info
, 0, vect_body
);
1020 /* When vectorizing a store into the function result assign
1021 a penalty if the function returns in a multi-register location.
1022 In this case we assume we'll end up with having to spill the
1023 vector result and do piecewise loads as a conservative estimate. */
1024 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
1026 && (TREE_CODE (base
) == RESULT_DECL
1027 || (DECL_P (base
) && cfun_returns (base
)))
1028 && !aggregate_value_p (base
, cfun
->decl
))
1030 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
1031 /* ??? Handle PARALLEL in some way. */
1034 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1035 /* Assume that a single reg-reg move is possible and cheap,
1036 do not account for vector to gp register move cost. */
1040 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1042 stmt_info
, 0, vect_epilogue
);
1044 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1046 stmt_info
, 0, vect_epilogue
);
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_NOTE
, vect_location
,
1053 "vect_model_store_cost: inside_cost = %d, "
1054 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1058 /* Calculate cost of DR's memory access. */
1060 vect_get_store_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1061 dr_alignment_support alignment_support_scheme
,
1063 unsigned int *inside_cost
,
1064 stmt_vector_for_cost
*body_cost_vec
)
1066 switch (alignment_support_scheme
)
1070 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1071 vector_store
, stmt_info
, 0,
1074 if (dump_enabled_p ())
1075 dump_printf_loc (MSG_NOTE
, vect_location
,
1076 "vect_model_store_cost: aligned.\n");
1080 case dr_unaligned_supported
:
1082 /* Here, we assign an additional cost for the unaligned store. */
1083 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1084 unaligned_store
, stmt_info
,
1085 misalignment
, vect_body
);
1086 if (dump_enabled_p ())
1087 dump_printf_loc (MSG_NOTE
, vect_location
,
1088 "vect_model_store_cost: unaligned supported by "
1093 case dr_unaligned_unsupported
:
1095 *inside_cost
= VECT_MAX_COST
;
1097 if (dump_enabled_p ())
1098 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1099 "vect_model_store_cost: unsupported access.\n");
1109 /* Function vect_model_load_cost
1111 Models cost for loads. In the case of grouped accesses, one access has
1112 the overhead of the grouped access attributed to it. Since unaligned
1113 accesses are supported for loads, we also account for the costs of the
1114 access scheme chosen. */
1117 vect_model_load_cost (vec_info
*vinfo
,
1118 stmt_vec_info stmt_info
, unsigned ncopies
, poly_uint64 vf
,
1119 vect_memory_access_type memory_access_type
,
1120 dr_alignment_support alignment_support_scheme
,
1122 gather_scatter_info
*gs_info
,
1124 stmt_vector_for_cost
*cost_vec
)
1126 unsigned int inside_cost
= 0, prologue_cost
= 0;
1127 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1129 gcc_assert (cost_vec
);
1131 /* ??? Somehow we need to fix this at the callers. */
1133 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1135 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1137 /* If the load is permuted then the alignment is determined by
1138 the first group element not by the first scalar stmt DR. */
1139 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1140 /* Record the cost for the permutation. */
1141 unsigned n_perms
, n_loads
;
1142 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
,
1143 vf
, true, &n_perms
, &n_loads
);
1144 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1145 first_stmt_info
, 0, vect_body
);
1147 /* And adjust the number of loads performed. This handles
1148 redundancies as well as loads that are later dead. */
1152 /* Grouped loads read all elements in the group at once,
1153 so we want the DR for the first statement. */
1154 stmt_vec_info first_stmt_info
= stmt_info
;
1155 if (!slp_node
&& grouped_access_p
)
1156 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1158 /* True if we should include any once-per-group costs as well as
1159 the cost of the statement itself. For SLP we only get called
1160 once per group anyhow. */
1161 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1163 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1164 ones we actually need. Account for the cost of unused results. */
1165 if (first_stmt_p
&& !slp_node
&& memory_access_type
== VMAT_LOAD_STORE_LANES
)
1167 unsigned int gaps
= DR_GROUP_SIZE (first_stmt_info
);
1168 stmt_vec_info next_stmt_info
= first_stmt_info
;
1172 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
1174 while (next_stmt_info
);
1177 if (dump_enabled_p ())
1178 dump_printf_loc (MSG_NOTE
, vect_location
,
1179 "vect_model_load_cost: %d unused vectors.\n",
1181 vect_get_load_cost (vinfo
, stmt_info
, ncopies
* gaps
,
1182 alignment_support_scheme
, misalignment
, false,
1183 &inside_cost
, &prologue_cost
,
1184 cost_vec
, cost_vec
, true);
1188 /* We assume that the cost of a single load-lanes instruction is
1189 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1190 access is instead being provided by a load-and-permute operation,
1191 include the cost of the permutes. */
1193 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1195 /* Uses an even and odd extract operations or shuffle operations
1196 for each needed permute. */
1197 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1198 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1199 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1200 stmt_info
, 0, vect_body
);
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE
, vect_location
,
1204 "vect_model_load_cost: strided group_size = %d .\n",
1208 /* The loads themselves. */
1209 if (memory_access_type
== VMAT_ELEMENTWISE
1210 || memory_access_type
== VMAT_GATHER_SCATTER
)
1212 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1213 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1214 if (memory_access_type
== VMAT_GATHER_SCATTER
1215 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
1216 /* For emulated gathers N offset vector element extracts
1217 (we assume the scalar scaling and ptr + offset add is consumed by
1219 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
* assumed_nunits
,
1220 vec_to_scalar
, stmt_info
, 0,
1222 /* N scalar loads plus gathering them into a vector. */
1223 inside_cost
+= record_stmt_cost (cost_vec
,
1224 ncopies
* assumed_nunits
,
1225 scalar_load
, stmt_info
, 0, vect_body
);
1227 else if (memory_access_type
== VMAT_INVARIANT
)
1229 /* Invariant loads will ideally be hoisted and splat to a vector. */
1230 prologue_cost
+= record_stmt_cost (cost_vec
, 1,
1231 scalar_load
, stmt_info
, 0,
1233 prologue_cost
+= record_stmt_cost (cost_vec
, 1,
1234 scalar_to_vec
, stmt_info
, 0,
1238 vect_get_load_cost (vinfo
, stmt_info
, ncopies
,
1239 alignment_support_scheme
, misalignment
, first_stmt_p
,
1240 &inside_cost
, &prologue_cost
,
1241 cost_vec
, cost_vec
, true);
1242 if (memory_access_type
== VMAT_ELEMENTWISE
1243 || memory_access_type
== VMAT_STRIDED_SLP
1244 || (memory_access_type
== VMAT_GATHER_SCATTER
1245 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
))
1246 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1247 stmt_info
, 0, vect_body
);
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE
, vect_location
,
1251 "vect_model_load_cost: inside_cost = %d, "
1252 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1256 /* Calculate cost of DR's memory access. */
1258 vect_get_load_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1259 dr_alignment_support alignment_support_scheme
,
1261 bool add_realign_cost
, unsigned int *inside_cost
,
1262 unsigned int *prologue_cost
,
1263 stmt_vector_for_cost
*prologue_cost_vec
,
1264 stmt_vector_for_cost
*body_cost_vec
,
1265 bool record_prologue_costs
)
1267 switch (alignment_support_scheme
)
1271 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1272 stmt_info
, 0, vect_body
);
1274 if (dump_enabled_p ())
1275 dump_printf_loc (MSG_NOTE
, vect_location
,
1276 "vect_model_load_cost: aligned.\n");
1280 case dr_unaligned_supported
:
1282 /* Here, we assign an additional cost for the unaligned load. */
1283 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1284 unaligned_load
, stmt_info
,
1285 misalignment
, vect_body
);
1287 if (dump_enabled_p ())
1288 dump_printf_loc (MSG_NOTE
, vect_location
,
1289 "vect_model_load_cost: unaligned supported by "
1294 case dr_explicit_realign
:
1296 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1297 vector_load
, stmt_info
, 0, vect_body
);
1298 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1299 vec_perm
, stmt_info
, 0, vect_body
);
1301 /* FIXME: If the misalignment remains fixed across the iterations of
1302 the containing loop, the following cost should be added to the
1304 if (targetm
.vectorize
.builtin_mask_for_load
)
1305 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1306 stmt_info
, 0, vect_body
);
1308 if (dump_enabled_p ())
1309 dump_printf_loc (MSG_NOTE
, vect_location
,
1310 "vect_model_load_cost: explicit realign\n");
1314 case dr_explicit_realign_optimized
:
1316 if (dump_enabled_p ())
1317 dump_printf_loc (MSG_NOTE
, vect_location
,
1318 "vect_model_load_cost: unaligned software "
1321 /* Unaligned software pipeline has a load of an address, an initial
1322 load, and possibly a mask operation to "prime" the loop. However,
1323 if this is an access in a group of loads, which provide grouped
1324 access, then the above cost should only be considered for one
1325 access in the group. Inside the loop, there is a load op
1326 and a realignment op. */
1328 if (add_realign_cost
&& record_prologue_costs
)
1330 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1331 vector_stmt
, stmt_info
,
1333 if (targetm
.vectorize
.builtin_mask_for_load
)
1334 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1335 vector_stmt
, stmt_info
,
1339 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1340 stmt_info
, 0, vect_body
);
1341 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1342 stmt_info
, 0, vect_body
);
1344 if (dump_enabled_p ())
1345 dump_printf_loc (MSG_NOTE
, vect_location
,
1346 "vect_model_load_cost: explicit realign optimized"
1352 case dr_unaligned_unsupported
:
1354 *inside_cost
= VECT_MAX_COST
;
1356 if (dump_enabled_p ())
1357 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1358 "vect_model_load_cost: unsupported access.\n");
1367 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1368 the loop preheader for the vectorized stmt STMT_VINFO. */
1371 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1372 gimple_stmt_iterator
*gsi
)
1375 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1377 vinfo
->insert_on_entry (stmt_vinfo
, new_stmt
);
1379 if (dump_enabled_p ())
1380 dump_printf_loc (MSG_NOTE
, vect_location
,
1381 "created new init_stmt: %G", new_stmt
);
1384 /* Function vect_init_vector.
1386 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1387 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1388 vector type a vector with all elements equal to VAL is created first.
1389 Place the initialization at GSI if it is not NULL. Otherwise, place the
1390 initialization at the loop preheader.
1391 Return the DEF of INIT_STMT.
1392 It will be used in the vectorization of STMT_INFO. */
1395 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1396 gimple_stmt_iterator
*gsi
)
1401 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1402 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1404 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1405 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1407 /* Scalar boolean value should be transformed into
1408 all zeros or all ones value before building a vector. */
1409 if (VECTOR_BOOLEAN_TYPE_P (type
))
1411 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1412 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1414 if (CONSTANT_CLASS_P (val
))
1415 val
= integer_zerop (val
) ? false_val
: true_val
;
1418 new_temp
= make_ssa_name (TREE_TYPE (type
));
1419 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1420 val
, true_val
, false_val
);
1421 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1427 gimple_seq stmts
= NULL
;
1428 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1429 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1430 TREE_TYPE (type
), val
);
1432 /* ??? Condition vectorization expects us to do
1433 promotion of invariant/external defs. */
1434 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1435 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1436 !gsi_end_p (gsi2
); )
1438 init_stmt
= gsi_stmt (gsi2
);
1439 gsi_remove (&gsi2
, false);
1440 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1444 val
= build_vector_from_val (type
, val
);
1447 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1448 init_stmt
= gimple_build_assign (new_temp
, val
);
1449 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1454 /* Function vect_get_vec_defs_for_operand.
1456 OP is an operand in STMT_VINFO. This function returns a vector of
1457 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1459 In the case that OP is an SSA_NAME which is defined in the loop, then
1460 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1462 In case OP is an invariant or constant, a new stmt that creates a vector def
1463 needs to be introduced. VECTYPE may be used to specify a required type for
1464 vector invariant. */
1467 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1469 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1472 enum vect_def_type dt
;
1474 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1476 if (dump_enabled_p ())
1477 dump_printf_loc (MSG_NOTE
, vect_location
,
1478 "vect_get_vec_defs_for_operand: %T\n", op
);
1480 stmt_vec_info def_stmt_info
;
1481 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1482 &def_stmt_info
, &def_stmt
);
1483 gcc_assert (is_simple_use
);
1484 if (def_stmt
&& dump_enabled_p ())
1485 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1487 vec_oprnds
->create (ncopies
);
1488 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1490 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1494 vector_type
= vectype
;
1495 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1496 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1497 vector_type
= truth_type_for (stmt_vectype
);
1499 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1501 gcc_assert (vector_type
);
1502 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1504 vec_oprnds
->quick_push (vop
);
1508 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1509 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1510 for (unsigned i
= 0; i
< ncopies
; ++i
)
1511 vec_oprnds
->quick_push (gimple_get_lhs
1512 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1517 /* Get vectorized definitions for OP0 and OP1. */
1520 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1522 tree op0
, vec
<tree
> *vec_oprnds0
, tree vectype0
,
1523 tree op1
, vec
<tree
> *vec_oprnds1
, tree vectype1
,
1524 tree op2
, vec
<tree
> *vec_oprnds2
, tree vectype2
,
1525 tree op3
, vec
<tree
> *vec_oprnds3
, tree vectype3
)
1530 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1532 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1534 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1536 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1541 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1542 op0
, vec_oprnds0
, vectype0
);
1544 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1545 op1
, vec_oprnds1
, vectype1
);
1547 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1548 op2
, vec_oprnds2
, vectype2
);
1550 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1551 op3
, vec_oprnds3
, vectype3
);
1556 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1558 tree op0
, vec
<tree
> *vec_oprnds0
,
1559 tree op1
, vec
<tree
> *vec_oprnds1
,
1560 tree op2
, vec
<tree
> *vec_oprnds2
,
1561 tree op3
, vec
<tree
> *vec_oprnds3
)
1563 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1564 op0
, vec_oprnds0
, NULL_TREE
,
1565 op1
, vec_oprnds1
, NULL_TREE
,
1566 op2
, vec_oprnds2
, NULL_TREE
,
1567 op3
, vec_oprnds3
, NULL_TREE
);
1570 /* Helper function called by vect_finish_replace_stmt and
1571 vect_finish_stmt_generation. Set the location of the new
1572 statement and create and return a stmt_vec_info for it. */
1575 vect_finish_stmt_generation_1 (vec_info
*,
1576 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1578 if (dump_enabled_p ())
1579 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1583 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1585 /* While EH edges will generally prevent vectorization, stmt might
1586 e.g. be in a must-not-throw region. Ensure newly created stmts
1587 that could throw are part of the same region. */
1588 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1589 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1590 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1593 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1596 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1597 which sets the same scalar result as STMT_INFO did. Create and return a
1598 stmt_vec_info for VEC_STMT. */
1601 vect_finish_replace_stmt (vec_info
*vinfo
,
1602 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1604 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1605 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1607 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1608 gsi_replace (&gsi
, vec_stmt
, true);
1610 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1613 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1614 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1617 vect_finish_stmt_generation (vec_info
*vinfo
,
1618 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1619 gimple_stmt_iterator
*gsi
)
1621 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1623 if (!gsi_end_p (*gsi
)
1624 && gimple_has_mem_ops (vec_stmt
))
1626 gimple
*at_stmt
= gsi_stmt (*gsi
);
1627 tree vuse
= gimple_vuse (at_stmt
);
1628 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1630 tree vdef
= gimple_vdef (at_stmt
);
1631 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1632 gimple_set_modified (vec_stmt
, true);
1633 /* If we have an SSA vuse and insert a store, update virtual
1634 SSA form to avoid triggering the renamer. Do so only
1635 if we can easily see all uses - which is what almost always
1636 happens with the way vectorized stmts are inserted. */
1637 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1638 && ((is_gimple_assign (vec_stmt
)
1639 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1640 || (is_gimple_call (vec_stmt
)
1641 && (!(gimple_call_flags (vec_stmt
)
1642 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
))
1643 || (gimple_call_lhs (vec_stmt
)
1644 && !is_gimple_reg (gimple_call_lhs (vec_stmt
)))))))
1646 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1647 gimple_set_vdef (vec_stmt
, new_vdef
);
1648 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1652 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1653 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1656 /* We want to vectorize a call to combined function CFN with function
1657 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1658 as the types of all inputs. Check whether this is possible using
1659 an internal function, returning its code if so or IFN_LAST if not. */
1662 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1663 tree vectype_out
, tree vectype_in
)
1666 if (internal_fn_p (cfn
))
1667 ifn
= as_internal_fn (cfn
);
1669 ifn
= associated_internal_fn (fndecl
);
1670 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1672 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1673 if (info
.vectorizable
)
1675 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1676 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1677 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1678 OPTIMIZE_FOR_SPEED
))
1686 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1687 gimple_stmt_iterator
*);
1689 /* Check whether a load or store statement in the loop described by
1690 LOOP_VINFO is possible in a loop using partial vectors. This is
1691 testing whether the vectorizer pass has the appropriate support,
1692 as well as whether the target does.
1694 VLS_TYPE says whether the statement is a load or store and VECTYPE
1695 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1696 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1697 says how the load or store is going to be implemented and GROUP_SIZE
1698 is the number of load or store statements in the containing group.
1699 If the access is a gather load or scatter store, GS_INFO describes
1700 its arguments. If the load or store is conditional, SCALAR_MASK is the
1701 condition under which it occurs.
1703 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1704 vectors is not supported, otherwise record the required rgroup control
1708 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo
, tree vectype
,
1710 vec_load_store_type vls_type
,
1712 vect_memory_access_type
1714 gather_scatter_info
*gs_info
,
1717 /* Invariant loads need no special support. */
1718 if (memory_access_type
== VMAT_INVARIANT
)
1721 unsigned int nvectors
;
1723 nvectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1725 nvectors
= vect_get_num_copies (loop_vinfo
, vectype
);
1727 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1728 machine_mode vecmode
= TYPE_MODE (vectype
);
1729 bool is_load
= (vls_type
== VLS_LOAD
);
1730 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1733 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1734 : !vect_store_lanes_supported (vectype
, group_size
, true))
1736 if (dump_enabled_p ())
1737 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1738 "can't operate on partial vectors because"
1739 " the target doesn't have an appropriate"
1740 " load/store-lanes instruction.\n");
1741 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1744 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1749 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1751 internal_fn ifn
= (is_load
1752 ? IFN_MASK_GATHER_LOAD
1753 : IFN_MASK_SCATTER_STORE
);
1754 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1755 gs_info
->memory_type
,
1756 gs_info
->offset_vectype
,
1759 if (dump_enabled_p ())
1760 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1761 "can't operate on partial vectors because"
1762 " the target doesn't have an appropriate"
1763 " gather load or scatter store instruction.\n");
1764 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1767 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1772 if (memory_access_type
!= VMAT_CONTIGUOUS
1773 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1775 /* Element X of the data must come from iteration i * VF + X of the
1776 scalar loop. We need more work to support other mappings. */
1777 if (dump_enabled_p ())
1778 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1779 "can't operate on partial vectors because an"
1780 " access isn't contiguous.\n");
1781 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1785 if (!VECTOR_MODE_P (vecmode
))
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1789 "can't operate on partial vectors when emulating"
1790 " vector operations.\n");
1791 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1795 /* We might load more scalars than we need for permuting SLP loads.
1796 We checked in get_group_load_store_type that the extra elements
1797 don't leak into a new vector. */
1798 auto group_memory_nvectors
= [](poly_uint64 size
, poly_uint64 nunits
)
1800 unsigned int nvectors
;
1801 if (can_div_away_from_zero_p (size
, nunits
, &nvectors
))
1806 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1807 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1808 machine_mode mask_mode
;
1809 bool using_partial_vectors_p
= false;
1810 if (targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1811 && can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1813 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1814 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1815 using_partial_vectors_p
= true;
1819 if (get_len_load_store_mode (vecmode
, is_load
).exists (&vmode
))
1821 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1822 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
1823 unsigned factor
= (vecmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vecmode
);
1824 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, factor
);
1825 using_partial_vectors_p
= true;
1828 if (!using_partial_vectors_p
)
1830 if (dump_enabled_p ())
1831 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1832 "can't operate on partial vectors because the"
1833 " target doesn't have the appropriate partial"
1834 " vectorization load or store.\n");
1835 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1839 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1840 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1841 that needs to be applied to all loads and stores in a vectorized loop.
1842 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1843 otherwise return VEC_MASK & LOOP_MASK.
1845 MASK_TYPE is the type of both masks. If new statements are needed,
1846 insert them before GSI. */
1849 prepare_vec_mask (loop_vec_info loop_vinfo
, tree mask_type
, tree loop_mask
,
1850 tree vec_mask
, gimple_stmt_iterator
*gsi
)
1852 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1856 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1858 if (loop_vinfo
->vec_cond_masked_set
.contains ({ vec_mask
, loop_mask
}))
1861 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1862 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1863 vec_mask
, loop_mask
);
1865 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1869 /* Determine whether we can use a gather load or scatter store to vectorize
1870 strided load or store STMT_INFO by truncating the current offset to a
1871 smaller width. We need to be able to construct an offset vector:
1873 { 0, X, X*2, X*3, ... }
1875 without loss of precision, where X is STMT_INFO's DR_STEP.
1877 Return true if this is possible, describing the gather load or scatter
1878 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1881 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1882 loop_vec_info loop_vinfo
, bool masked_p
,
1883 gather_scatter_info
*gs_info
)
1885 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1886 data_reference
*dr
= dr_info
->dr
;
1887 tree step
= DR_STEP (dr
);
1888 if (TREE_CODE (step
) != INTEGER_CST
)
1890 /* ??? Perhaps we could use range information here? */
1891 if (dump_enabled_p ())
1892 dump_printf_loc (MSG_NOTE
, vect_location
,
1893 "cannot truncate variable step.\n");
1897 /* Get the number of bits in an element. */
1898 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1899 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1900 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1902 /* Set COUNT to the upper limit on the number of elements - 1.
1903 Start with the maximum vectorization factor. */
1904 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1906 /* Try lowering COUNT to the number of scalar latch iterations. */
1907 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1908 widest_int max_iters
;
1909 if (max_loop_iterations (loop
, &max_iters
)
1910 && max_iters
< count
)
1911 count
= max_iters
.to_shwi ();
1913 /* Try scales of 1 and the element size. */
1914 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1915 wi::overflow_type overflow
= wi::OVF_NONE
;
1916 for (int i
= 0; i
< 2; ++i
)
1918 int scale
= scales
[i
];
1920 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1923 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1924 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1927 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1928 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1930 /* Find the narrowest viable offset type. */
1931 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1932 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1935 /* See whether the target supports the operation with an offset
1936 no narrower than OFFSET_TYPE. */
1937 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1938 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1939 vectype
, memory_type
, offset_type
, scale
,
1940 &gs_info
->ifn
, &gs_info
->offset_vectype
)
1941 || gs_info
->ifn
== IFN_LAST
)
1944 gs_info
->decl
= NULL_TREE
;
1945 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1946 but we don't need to store that here. */
1947 gs_info
->base
= NULL_TREE
;
1948 gs_info
->element_type
= TREE_TYPE (vectype
);
1949 gs_info
->offset
= fold_convert (offset_type
, step
);
1950 gs_info
->offset_dt
= vect_constant_def
;
1951 gs_info
->scale
= scale
;
1952 gs_info
->memory_type
= memory_type
;
1956 if (overflow
&& dump_enabled_p ())
1957 dump_printf_loc (MSG_NOTE
, vect_location
,
1958 "truncating gather/scatter offset to %d bits"
1959 " might change its value.\n", element_bits
);
1964 /* Return true if we can use gather/scatter internal functions to
1965 vectorize STMT_INFO, which is a grouped or strided load or store.
1966 MASKED_P is true if load or store is conditional. When returning
1967 true, fill in GS_INFO with the information required to perform the
1971 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1972 loop_vec_info loop_vinfo
, bool masked_p
,
1973 gather_scatter_info
*gs_info
)
1975 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1976 || gs_info
->ifn
== IFN_LAST
)
1977 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1980 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1981 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1983 gcc_assert (TYPE_PRECISION (new_offset_type
)
1984 >= TYPE_PRECISION (old_offset_type
));
1985 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1987 if (dump_enabled_p ())
1988 dump_printf_loc (MSG_NOTE
, vect_location
,
1989 "using gather/scatter for strided/grouped access,"
1990 " scale = %d\n", gs_info
->scale
);
1995 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1996 elements with a known constant step. Return -1 if that step
1997 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2000 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
2002 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2003 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
2007 /* If the target supports a permute mask that reverses the elements in
2008 a vector of type VECTYPE, return that mask, otherwise return null. */
2011 perm_mask_for_reverse (tree vectype
)
2013 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2015 /* The encoding has a single stepped pattern. */
2016 vec_perm_builder
sel (nunits
, 1, 3);
2017 for (int i
= 0; i
< 3; ++i
)
2018 sel
.quick_push (nunits
- 1 - i
);
2020 vec_perm_indices
indices (sel
, 1, nunits
);
2021 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), TYPE_MODE (vectype
),
2024 return vect_gen_perm_mask_checked (vectype
, indices
);
2027 /* A subroutine of get_load_store_type, with a subset of the same
2028 arguments. Handle the case where STMT_INFO is a load or store that
2029 accesses consecutive elements with a negative step. Sets *POFFSET
2030 to the offset to be applied to the DR for the first access. */
2032 static vect_memory_access_type
2033 get_negative_load_store_type (vec_info
*vinfo
,
2034 stmt_vec_info stmt_info
, tree vectype
,
2035 vec_load_store_type vls_type
,
2036 unsigned int ncopies
, poly_int64
*poffset
)
2038 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2039 dr_alignment_support alignment_support_scheme
;
2043 if (dump_enabled_p ())
2044 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2045 "multiple types with negative step.\n");
2046 return VMAT_ELEMENTWISE
;
2049 /* For backward running DRs the first access in vectype actually is
2050 N-1 elements before the address of the DR. */
2051 *poffset
= ((-TYPE_VECTOR_SUBPARTS (vectype
) + 1)
2052 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype
))));
2054 int misalignment
= dr_misalignment (dr_info
, vectype
, *poffset
);
2055 alignment_support_scheme
2056 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
, misalignment
);
2057 if (alignment_support_scheme
!= dr_aligned
2058 && alignment_support_scheme
!= dr_unaligned_supported
)
2060 if (dump_enabled_p ())
2061 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2062 "negative step but alignment required.\n");
2064 return VMAT_ELEMENTWISE
;
2067 if (vls_type
== VLS_STORE_INVARIANT
)
2069 if (dump_enabled_p ())
2070 dump_printf_loc (MSG_NOTE
, vect_location
,
2071 "negative step with invariant source;"
2072 " no permute needed.\n");
2073 return VMAT_CONTIGUOUS_DOWN
;
2076 if (!perm_mask_for_reverse (vectype
))
2078 if (dump_enabled_p ())
2079 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2080 "negative step and reversing not supported.\n");
2082 return VMAT_ELEMENTWISE
;
2085 return VMAT_CONTIGUOUS_REVERSE
;
2088 /* STMT_INFO is either a masked or unconditional store. Return the value
2092 vect_get_store_rhs (stmt_vec_info stmt_info
)
2094 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2096 gcc_assert (gimple_assign_single_p (assign
));
2097 return gimple_assign_rhs1 (assign
);
2099 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2101 internal_fn ifn
= gimple_call_internal_fn (call
);
2102 int index
= internal_fn_stored_value_index (ifn
);
2103 gcc_assert (index
>= 0);
2104 return gimple_call_arg (call
, index
);
2109 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2111 This function returns a vector type which can be composed with NETLS pieces,
2112 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2113 same vector size as the return vector. It checks target whether supports
2114 pieces-size vector mode for construction firstly, if target fails to, check
2115 pieces-size scalar mode for construction further. It returns NULL_TREE if
2116 fails to find the available composition.
2118 For example, for (vtype=V16QI, nelts=4), we can probably get:
2119 - V16QI with PTYPE V4QI.
2120 - V4SI with PTYPE SI.
2124 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
2126 gcc_assert (VECTOR_TYPE_P (vtype
));
2127 gcc_assert (known_gt (nelts
, 0U));
2129 machine_mode vmode
= TYPE_MODE (vtype
);
2130 if (!VECTOR_MODE_P (vmode
))
2133 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
2134 unsigned int pbsize
;
2135 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
2137 /* First check if vec_init optab supports construction from
2138 vector pieces directly. */
2139 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
2140 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
2142 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
2143 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
2144 != CODE_FOR_nothing
))
2146 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
2150 /* Otherwise check if exists an integer type of the same piece size and
2151 if vec_init optab supports construction from it directly. */
2152 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
2153 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
2154 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
2155 != CODE_FOR_nothing
))
2157 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
2158 return build_vector_type (*ptype
, nelts
);
2165 /* A subroutine of get_load_store_type, with a subset of the same
2166 arguments. Handle the case where STMT_INFO is part of a grouped load
2169 For stores, the statements in the group are all consecutive
2170 and there is no gap at the end. For loads, the statements in the
2171 group might not be consecutive; there can be gaps between statements
2172 as well as at the end. */
2175 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2176 tree vectype
, slp_tree slp_node
,
2177 bool masked_p
, vec_load_store_type vls_type
,
2178 vect_memory_access_type
*memory_access_type
,
2179 poly_int64
*poffset
,
2180 dr_alignment_support
*alignment_support_scheme
,
2182 gather_scatter_info
*gs_info
)
2184 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2185 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2186 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2187 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2188 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2189 bool single_element_p
= (stmt_info
== first_stmt_info
2190 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2191 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2192 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2194 /* True if the vectorized statements would access beyond the last
2195 statement in the group. */
2196 bool overrun_p
= false;
2198 /* True if we can cope with such overrun by peeling for gaps, so that
2199 there is at least one final scalar iteration after the vector loop. */
2200 bool can_overrun_p
= (!masked_p
2201 && vls_type
== VLS_LOAD
2205 /* There can only be a gap at the end of the group if the stride is
2206 known at compile time. */
2207 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2209 /* Stores can't yet have gaps. */
2210 gcc_assert (slp_node
|| vls_type
== VLS_LOAD
|| gap
== 0);
2214 /* For SLP vectorization we directly vectorize a subchain
2215 without permutation. */
2216 if (! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
2218 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node
)[0]);
2219 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2221 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2222 separated by the stride, until we have a complete vector.
2223 Fall back to scalar accesses if that isn't possible. */
2224 if (multiple_p (nunits
, group_size
))
2225 *memory_access_type
= VMAT_STRIDED_SLP
;
2227 *memory_access_type
= VMAT_ELEMENTWISE
;
2231 overrun_p
= loop_vinfo
&& gap
!= 0;
2232 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2234 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2235 "Grouped store with gaps requires"
2236 " non-consecutive accesses\n");
2239 /* An overrun is fine if the trailing elements are smaller
2240 than the alignment boundary B. Every vector access will
2241 be a multiple of B and so we are guaranteed to access a
2242 non-gap element in the same B-sized block. */
2244 && gap
< (vect_known_alignment_in_bytes (first_dr_info
,
2246 / vect_get_scalar_dr_size (first_dr_info
)))
2249 /* If the gap splits the vector in half and the target
2250 can do half-vector operations avoid the epilogue peeling
2251 by simply loading half of the vector only. Usually
2252 the construction with an upper zero half will be elided. */
2253 dr_alignment_support alss
;
2254 int misalign
= dr_misalignment (first_dr_info
, vectype
);
2258 && (((alss
= vect_supportable_dr_alignment (vinfo
, first_dr_info
,
2259 vectype
, misalign
)))
2261 || alss
== dr_unaligned_supported
)
2262 && known_eq (nunits
, (group_size
- gap
) * 2)
2263 && known_eq (nunits
, group_size
)
2264 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2268 if (overrun_p
&& !can_overrun_p
)
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2272 "Peeling for outer loop is not supported\n");
2275 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2278 if (single_element_p
)
2279 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2280 only correct for single element "interleaving" SLP. */
2281 *memory_access_type
= get_negative_load_store_type
2282 (vinfo
, stmt_info
, vectype
, vls_type
, 1, poffset
);
2285 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2286 separated by the stride, until we have a complete vector.
2287 Fall back to scalar accesses if that isn't possible. */
2288 if (multiple_p (nunits
, group_size
))
2289 *memory_access_type
= VMAT_STRIDED_SLP
;
2291 *memory_access_type
= VMAT_ELEMENTWISE
;
2296 gcc_assert (!loop_vinfo
|| cmp
> 0);
2297 *memory_access_type
= VMAT_CONTIGUOUS
;
2300 /* When we have a contiguous access across loop iterations
2301 but the access in the loop doesn't cover the full vector
2302 we can end up with no gap recorded but still excess
2303 elements accessed, see PR103116. Make sure we peel for
2304 gaps if necessary and sufficient and give up if not. */
2306 && *memory_access_type
== VMAT_CONTIGUOUS
2307 && SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
2308 && !multiple_p (group_size
* LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
2311 unsigned HOST_WIDE_INT cnunits
, cvf
;
2313 || !nunits
.is_constant (&cnunits
)
2314 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&cvf
)
2315 /* Peeling for gaps assumes that a single scalar iteration
2316 is enough to make sure the last vector iteration doesn't
2317 access excess elements.
2318 ??? Enhancements include peeling multiple iterations
2319 or using masked loads with a static mask. */
2320 || (group_size
* cvf
) % cnunits
+ group_size
< cnunits
)
2322 if (dump_enabled_p ())
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2324 "peeling for gaps insufficient for "
2334 /* We can always handle this case using elementwise accesses,
2335 but see if something more efficient is available. */
2336 *memory_access_type
= VMAT_ELEMENTWISE
;
2338 /* If there is a gap at the end of the group then these optimizations
2339 would access excess elements in the last iteration. */
2340 bool would_overrun_p
= (gap
!= 0);
2341 /* An overrun is fine if the trailing elements are smaller than the
2342 alignment boundary B. Every vector access will be a multiple of B
2343 and so we are guaranteed to access a non-gap element in the
2344 same B-sized block. */
2347 && gap
< (vect_known_alignment_in_bytes (first_dr_info
, vectype
)
2348 / vect_get_scalar_dr_size (first_dr_info
)))
2349 would_overrun_p
= false;
2351 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2352 && (can_overrun_p
|| !would_overrun_p
)
2353 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2355 /* First cope with the degenerate case of a single-element
2357 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2360 /* Otherwise try using LOAD/STORE_LANES. */
2361 else if (vls_type
== VLS_LOAD
2362 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2363 : vect_store_lanes_supported (vectype
, group_size
,
2366 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2367 overrun_p
= would_overrun_p
;
2370 /* If that fails, try using permuting loads. */
2371 else if (vls_type
== VLS_LOAD
2372 ? vect_grouped_load_supported (vectype
, single_element_p
,
2374 : vect_grouped_store_supported (vectype
, group_size
))
2376 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2377 overrun_p
= would_overrun_p
;
2381 /* As a last resort, trying using a gather load or scatter store.
2383 ??? Although the code can handle all group sizes correctly,
2384 it probably isn't a win to use separate strided accesses based
2385 on nearby locations. Or, even if it's a win over scalar code,
2386 it might not be a win over vectorizing at a lower VF, if that
2387 allows us to use contiguous accesses. */
2388 if (*memory_access_type
== VMAT_ELEMENTWISE
2391 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2393 *memory_access_type
= VMAT_GATHER_SCATTER
;
2396 if (*memory_access_type
== VMAT_GATHER_SCATTER
2397 || *memory_access_type
== VMAT_ELEMENTWISE
)
2399 *alignment_support_scheme
= dr_unaligned_supported
;
2400 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2404 *misalignment
= dr_misalignment (first_dr_info
, vectype
, *poffset
);
2405 *alignment_support_scheme
2406 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, vectype
,
2410 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2412 /* STMT is the leader of the group. Check the operands of all the
2413 stmts of the group. */
2414 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2415 while (next_stmt_info
)
2417 tree op
= vect_get_store_rhs (next_stmt_info
);
2418 enum vect_def_type dt
;
2419 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2421 if (dump_enabled_p ())
2422 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2423 "use not simple.\n");
2426 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2432 gcc_assert (can_overrun_p
);
2433 if (dump_enabled_p ())
2434 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2435 "Data access with gaps requires scalar "
2437 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2443 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2444 if there is a memory access type that the vectorized form can use,
2445 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2446 or scatters, fill in GS_INFO accordingly. In addition
2447 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2448 the target does not support the alignment scheme. *MISALIGNMENT
2449 is set according to the alignment of the access (including
2450 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2452 SLP says whether we're performing SLP rather than loop vectorization.
2453 MASKED_P is true if the statement is conditional on a vectorized mask.
2454 VECTYPE is the vector type that the vectorized statements will use.
2455 NCOPIES is the number of vector statements that will be needed. */
2458 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2459 tree vectype
, slp_tree slp_node
,
2460 bool masked_p
, vec_load_store_type vls_type
,
2461 unsigned int ncopies
,
2462 vect_memory_access_type
*memory_access_type
,
2463 poly_int64
*poffset
,
2464 dr_alignment_support
*alignment_support_scheme
,
2466 gather_scatter_info
*gs_info
)
2468 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2469 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2470 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2472 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2474 *memory_access_type
= VMAT_GATHER_SCATTER
;
2475 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2477 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2478 &gs_info
->offset_dt
,
2479 &gs_info
->offset_vectype
))
2481 if (dump_enabled_p ())
2482 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2483 "%s index use not simple.\n",
2484 vls_type
== VLS_LOAD
? "gather" : "scatter");
2487 else if (gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
2489 if (vls_type
!= VLS_LOAD
)
2491 if (dump_enabled_p ())
2492 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2493 "unsupported emulated scatter.\n");
2496 else if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant ()
2497 || !TYPE_VECTOR_SUBPARTS
2498 (gs_info
->offset_vectype
).is_constant ()
2499 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2500 (gs_info
->offset_vectype
),
2501 TYPE_VECTOR_SUBPARTS (vectype
)))
2503 if (dump_enabled_p ())
2504 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2505 "unsupported vector types for emulated "
2510 /* Gather-scatter accesses perform only component accesses, alignment
2511 is irrelevant for them. */
2512 *alignment_support_scheme
= dr_unaligned_supported
;
2514 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2516 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
,
2518 vls_type
, memory_access_type
, poffset
,
2519 alignment_support_scheme
,
2520 misalignment
, gs_info
))
2523 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2525 gcc_assert (!slp_node
);
2527 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2529 *memory_access_type
= VMAT_GATHER_SCATTER
;
2531 *memory_access_type
= VMAT_ELEMENTWISE
;
2532 /* Alignment is irrelevant here. */
2533 *alignment_support_scheme
= dr_unaligned_supported
;
2537 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2540 gcc_assert (vls_type
== VLS_LOAD
);
2541 *memory_access_type
= VMAT_INVARIANT
;
2542 /* Invariant accesses perform only component accesses, alignment
2543 is irrelevant for them. */
2544 *alignment_support_scheme
= dr_unaligned_supported
;
2549 *memory_access_type
= get_negative_load_store_type
2550 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
, poffset
);
2552 *memory_access_type
= VMAT_CONTIGUOUS
;
2553 *misalignment
= dr_misalignment (STMT_VINFO_DR_INFO (stmt_info
),
2555 *alignment_support_scheme
2556 = vect_supportable_dr_alignment (vinfo
,
2557 STMT_VINFO_DR_INFO (stmt_info
),
2558 vectype
, *misalignment
);
2562 if ((*memory_access_type
== VMAT_ELEMENTWISE
2563 || *memory_access_type
== VMAT_STRIDED_SLP
)
2564 && !nunits
.is_constant ())
2566 if (dump_enabled_p ())
2567 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2568 "Not using elementwise accesses due to variable "
2569 "vectorization factor.\n");
2573 if (*alignment_support_scheme
== dr_unaligned_unsupported
)
2575 if (dump_enabled_p ())
2576 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2577 "unsupported unaligned access\n");
2581 /* FIXME: At the moment the cost model seems to underestimate the
2582 cost of using elementwise accesses. This check preserves the
2583 traditional behavior until that can be fixed. */
2584 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2585 if (!first_stmt_info
)
2586 first_stmt_info
= stmt_info
;
2587 if (*memory_access_type
== VMAT_ELEMENTWISE
2588 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2589 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2590 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2591 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2593 if (dump_enabled_p ())
2594 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2595 "not falling back to elementwise accesses\n");
2601 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2602 conditional operation STMT_INFO. When returning true, store the mask
2603 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2604 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2605 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2608 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2609 slp_tree slp_node
, unsigned mask_index
,
2610 tree
*mask
, slp_tree
*mask_node
,
2611 vect_def_type
*mask_dt_out
, tree
*mask_vectype_out
)
2613 enum vect_def_type mask_dt
;
2615 slp_tree mask_node_1
;
2616 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, mask_index
,
2617 mask
, &mask_node_1
, &mask_dt
, &mask_vectype
))
2619 if (dump_enabled_p ())
2620 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2621 "mask use not simple.\n");
2625 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask
)))
2627 if (dump_enabled_p ())
2628 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2629 "mask argument is not a boolean.\n");
2633 /* If the caller is not prepared for adjusting an external/constant
2634 SLP mask vector type fail. */
2637 && SLP_TREE_DEF_TYPE (mask_node_1
) != vect_internal_def
)
2639 if (dump_enabled_p ())
2640 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2641 "SLP mask argument is not vectorized.\n");
2645 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2647 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2649 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2651 if (dump_enabled_p ())
2652 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2653 "could not find an appropriate vector mask type.\n");
2657 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2658 TYPE_VECTOR_SUBPARTS (vectype
)))
2660 if (dump_enabled_p ())
2661 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2662 "vector mask type %T"
2663 " does not match vector data type %T.\n",
2664 mask_vectype
, vectype
);
2669 *mask_dt_out
= mask_dt
;
2670 *mask_vectype_out
= mask_vectype
;
2672 *mask_node
= mask_node_1
;
2676 /* Return true if stored value RHS is suitable for vectorizing store
2677 statement STMT_INFO. When returning true, store the type of the
2678 definition in *RHS_DT_OUT, the type of the vectorized store value in
2679 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2682 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2683 slp_tree slp_node
, tree rhs
,
2684 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2685 vec_load_store_type
*vls_type_out
)
2687 /* In the case this is a store from a constant make sure
2688 native_encode_expr can handle it. */
2689 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2691 if (dump_enabled_p ())
2692 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2693 "cannot encode constant as a byte sequence.\n");
2698 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2700 if (gimple_call_internal_p (call
)
2701 && internal_store_fn_p (gimple_call_internal_fn (call
)))
2702 op_no
= internal_fn_stored_value_index (gimple_call_internal_fn (call
));
2705 enum vect_def_type rhs_dt
;
2708 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, op_no
,
2709 &rhs
, &slp_op
, &rhs_dt
, &rhs_vectype
))
2711 if (dump_enabled_p ())
2712 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2713 "use not simple.\n");
2717 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2718 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2720 if (dump_enabled_p ())
2721 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2722 "incompatible vector types.\n");
2726 *rhs_dt_out
= rhs_dt
;
2727 *rhs_vectype_out
= rhs_vectype
;
2728 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2729 *vls_type_out
= VLS_STORE_INVARIANT
;
2731 *vls_type_out
= VLS_STORE
;
2735 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2736 Note that we support masks with floating-point type, in which case the
2737 floats are interpreted as a bitmask. */
2740 vect_build_all_ones_mask (vec_info
*vinfo
,
2741 stmt_vec_info stmt_info
, tree masktype
)
2743 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2744 return build_int_cst (masktype
, -1);
2745 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2747 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2748 mask
= build_vector_from_val (masktype
, mask
);
2749 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2751 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2755 for (int j
= 0; j
< 6; ++j
)
2757 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2758 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2759 mask
= build_vector_from_val (masktype
, mask
);
2760 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2765 /* Build an all-zero merge value of type VECTYPE while vectorizing
2766 STMT_INFO as a gather load. */
2769 vect_build_zero_merge_argument (vec_info
*vinfo
,
2770 stmt_vec_info stmt_info
, tree vectype
)
2773 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2774 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2775 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2779 for (int j
= 0; j
< 6; ++j
)
2781 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2782 merge
= build_real (TREE_TYPE (vectype
), r
);
2786 merge
= build_vector_from_val (vectype
, merge
);
2787 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2790 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2791 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2792 the gather load operation. If the load is conditional, MASK is the
2793 unvectorized condition and MASK_DT is its definition type, otherwise
2797 vect_build_gather_load_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2798 gimple_stmt_iterator
*gsi
,
2800 gather_scatter_info
*gs_info
,
2803 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2804 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2805 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2806 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2807 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2808 edge pe
= loop_preheader_edge (loop
);
2809 enum { NARROW
, NONE
, WIDEN
} modifier
;
2810 poly_uint64 gather_off_nunits
2811 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2813 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2814 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2815 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2816 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2817 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2818 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2819 tree scaletype
= TREE_VALUE (arglist
);
2820 tree real_masktype
= masktype
;
2821 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2823 || TREE_CODE (masktype
) == INTEGER_TYPE
2824 || types_compatible_p (srctype
, masktype
)));
2826 masktype
= truth_type_for (srctype
);
2828 tree mask_halftype
= masktype
;
2829 tree perm_mask
= NULL_TREE
;
2830 tree mask_perm_mask
= NULL_TREE
;
2831 if (known_eq (nunits
, gather_off_nunits
))
2833 else if (known_eq (nunits
* 2, gather_off_nunits
))
2837 /* Currently widening gathers and scatters are only supported for
2838 fixed-length vectors. */
2839 int count
= gather_off_nunits
.to_constant ();
2840 vec_perm_builder
sel (count
, count
, 1);
2841 for (int i
= 0; i
< count
; ++i
)
2842 sel
.quick_push (i
| (count
/ 2));
2844 vec_perm_indices
indices (sel
, 1, count
);
2845 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2848 else if (known_eq (nunits
, gather_off_nunits
* 2))
2852 /* Currently narrowing gathers and scatters are only supported for
2853 fixed-length vectors. */
2854 int count
= nunits
.to_constant ();
2855 vec_perm_builder
sel (count
, count
, 1);
2856 sel
.quick_grow (count
);
2857 for (int i
= 0; i
< count
; ++i
)
2858 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2859 vec_perm_indices
indices (sel
, 2, count
);
2860 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2864 if (mask
&& VECTOR_TYPE_P (real_masktype
))
2866 for (int i
= 0; i
< count
; ++i
)
2867 sel
[i
] = i
| (count
/ 2);
2868 indices
.new_vector (sel
, 2, count
);
2869 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2872 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2877 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2878 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2880 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2881 if (!is_gimple_min_invariant (ptr
))
2884 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2885 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2886 gcc_assert (!new_bb
);
2889 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2891 tree vec_oprnd0
= NULL_TREE
;
2892 tree vec_mask
= NULL_TREE
;
2893 tree src_op
= NULL_TREE
;
2894 tree mask_op
= NULL_TREE
;
2895 tree prev_res
= NULL_TREE
;
2899 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2900 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2903 auto_vec
<tree
> vec_oprnds0
;
2904 auto_vec
<tree
> vec_masks
;
2905 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2906 modifier
== WIDEN
? ncopies
/ 2 : ncopies
,
2907 gs_info
->offset
, &vec_oprnds0
);
2909 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2910 modifier
== NARROW
? ncopies
/ 2 : ncopies
,
2911 mask
, &vec_masks
, masktype
);
2912 for (int j
= 0; j
< ncopies
; ++j
)
2915 if (modifier
== WIDEN
&& (j
& 1))
2916 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
2917 perm_mask
, stmt_info
, gsi
);
2919 op
= vec_oprnd0
= vec_oprnds0
[modifier
== WIDEN
? j
/ 2 : j
];
2921 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2923 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2924 TYPE_VECTOR_SUBPARTS (idxtype
)));
2925 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2926 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2927 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2928 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2934 if (mask_perm_mask
&& (j
& 1))
2935 mask_op
= permute_vec_elements (vinfo
, mask_op
, mask_op
,
2936 mask_perm_mask
, stmt_info
, gsi
);
2939 if (modifier
== NARROW
)
2942 vec_mask
= vec_masks
[j
/ 2];
2945 vec_mask
= vec_masks
[j
];
2948 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2950 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2951 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2952 gcc_assert (known_eq (sub1
, sub2
));
2953 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2954 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2956 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2957 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2961 if (modifier
== NARROW
&& !VECTOR_TYPE_P (real_masktype
))
2963 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2965 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2966 : VEC_UNPACK_LO_EXPR
,
2968 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2974 tree mask_arg
= mask_op
;
2975 if (masktype
!= real_masktype
)
2977 tree utype
, optype
= TREE_TYPE (mask_op
);
2978 if (VECTOR_TYPE_P (real_masktype
)
2979 || TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2980 utype
= real_masktype
;
2982 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2983 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2984 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2986 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2987 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2989 if (!useless_type_conversion_p (real_masktype
, utype
))
2991 gcc_assert (TYPE_PRECISION (utype
)
2992 <= TYPE_PRECISION (real_masktype
));
2993 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2994 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2995 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2998 src_op
= build_zero_cst (srctype
);
3000 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
3003 if (!useless_type_conversion_p (vectype
, rettype
))
3005 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
3006 TYPE_VECTOR_SUBPARTS (rettype
)));
3007 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
3008 gimple_call_set_lhs (new_stmt
, op
);
3009 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3010 var
= make_ssa_name (vec_dest
);
3011 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
3012 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
3013 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3017 var
= make_ssa_name (vec_dest
, new_stmt
);
3018 gimple_call_set_lhs (new_stmt
, var
);
3019 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3022 if (modifier
== NARROW
)
3029 var
= permute_vec_elements (vinfo
, prev_res
, var
, perm_mask
,
3031 new_stmt
= SSA_NAME_DEF_STMT (var
);
3034 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3036 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3039 /* Prepare the base and offset in GS_INFO for vectorization.
3040 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3041 to the vectorized offset argument for the first copy of STMT_INFO.
3042 STMT_INFO is the statement described by GS_INFO and LOOP is the
3046 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo
,
3047 class loop
*loop
, stmt_vec_info stmt_info
,
3048 slp_tree slp_node
, gather_scatter_info
*gs_info
,
3049 tree
*dataref_ptr
, vec
<tree
> *vec_offset
)
3051 gimple_seq stmts
= NULL
;
3052 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
3056 edge pe
= loop_preheader_edge (loop
);
3057 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3058 gcc_assert (!new_bb
);
3061 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_offset
);
3065 = vect_get_num_copies (loop_vinfo
, gs_info
->offset_vectype
);
3066 vect_get_vec_defs_for_operand (loop_vinfo
, stmt_info
, ncopies
,
3067 gs_info
->offset
, vec_offset
,
3068 gs_info
->offset_vectype
);
3072 /* Prepare to implement a grouped or strided load or store using
3073 the gather load or scatter store operation described by GS_INFO.
3074 STMT_INFO is the load or store statement.
3076 Set *DATAREF_BUMP to the amount that should be added to the base
3077 address after each copy of the vectorized statement. Set *VEC_OFFSET
3078 to an invariant offset vector in which element I has the value
3079 I * DR_STEP / SCALE. */
3082 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
3083 loop_vec_info loop_vinfo
,
3084 gather_scatter_info
*gs_info
,
3085 tree
*dataref_bump
, tree
*vec_offset
)
3087 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3088 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3090 tree bump
= size_binop (MULT_EXPR
,
3091 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
3092 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
3093 *dataref_bump
= cse_and_gimplify_to_preheader (loop_vinfo
, bump
);
3095 /* The offset given in GS_INFO can have pointer type, so use the element
3096 type of the vector instead. */
3097 tree offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
3099 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3100 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
3101 ssize_int (gs_info
->scale
));
3102 step
= fold_convert (offset_type
, step
);
3104 /* Create {0, X, X*2, X*3, ...}. */
3105 tree offset
= fold_build2 (VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
3106 build_zero_cst (offset_type
), step
);
3107 *vec_offset
= cse_and_gimplify_to_preheader (loop_vinfo
, offset
);
3110 /* Return the amount that should be added to a vector pointer to move
3111 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3112 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3116 vect_get_data_ptr_increment (vec_info
*vinfo
,
3117 dr_vec_info
*dr_info
, tree aggr_type
,
3118 vect_memory_access_type memory_access_type
)
3120 if (memory_access_type
== VMAT_INVARIANT
)
3121 return size_zero_node
;
3123 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3124 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3125 if (tree_int_cst_sgn (step
) == -1)
3126 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3130 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3133 vectorizable_bswap (vec_info
*vinfo
,
3134 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3135 gimple
**vec_stmt
, slp_tree slp_node
,
3137 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3140 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3141 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3144 op
= gimple_call_arg (stmt
, 0);
3145 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3146 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3148 /* Multiple types in SLP are handled by creating the appropriate number of
3149 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3154 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3156 gcc_assert (ncopies
>= 1);
3158 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3162 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3163 unsigned word_bytes
;
3164 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3167 /* The encoding uses one stepped pattern for each byte in the word. */
3168 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3169 for (unsigned i
= 0; i
< 3; ++i
)
3170 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3171 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3173 vec_perm_indices
indices (elts
, 1, num_bytes
);
3174 machine_mode vmode
= TYPE_MODE (char_vectype
);
3175 if (!can_vec_perm_const_p (vmode
, vmode
, indices
))
3181 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3183 if (dump_enabled_p ())
3184 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3185 "incompatible vector types for invariants\n");
3189 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3190 DUMP_VECT_SCOPE ("vectorizable_bswap");
3191 record_stmt_cost (cost_vec
,
3192 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3193 record_stmt_cost (cost_vec
,
3195 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
,
3196 vec_perm
, stmt_info
, 0, vect_body
);
3200 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3203 vec
<tree
> vec_oprnds
= vNULL
;
3204 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
3206 /* Arguments are ready. create the new vector stmt. */
3209 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3212 tree tem
= make_ssa_name (char_vectype
);
3213 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3214 char_vectype
, vop
));
3215 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3216 tree tem2
= make_ssa_name (char_vectype
);
3217 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3218 tem
, tem
, bswap_vconst
);
3219 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3220 tem
= make_ssa_name (vectype
);
3221 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3223 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3225 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3227 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3231 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3233 vec_oprnds
.release ();
3237 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3238 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3239 in a single step. On success, store the binary pack code in
3243 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3244 tree_code
*convert_code
)
3246 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3247 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3251 int multi_step_cvt
= 0;
3252 auto_vec
<tree
, 8> interm_types
;
3253 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3254 &code
, &multi_step_cvt
, &interm_types
)
3258 *convert_code
= code
;
3262 /* Function vectorizable_call.
3264 Check if STMT_INFO performs a function call that can be vectorized.
3265 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3266 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3267 Return true if STMT_INFO is vectorizable in this way. */
3270 vectorizable_call (vec_info
*vinfo
,
3271 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3272 gimple
**vec_stmt
, slp_tree slp_node
,
3273 stmt_vector_for_cost
*cost_vec
)
3279 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3280 tree vectype_out
, vectype_in
;
3281 poly_uint64 nunits_in
;
3282 poly_uint64 nunits_out
;
3283 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3284 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3285 tree fndecl
, new_temp
, rhs_type
;
3286 enum vect_def_type dt
[4]
3287 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3288 vect_unknown_def_type
};
3289 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3290 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3291 int ndts
= ARRAY_SIZE (dt
);
3293 auto_vec
<tree
, 8> vargs
;
3294 enum { NARROW
, NONE
, WIDEN
} modifier
;
3298 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3301 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3305 /* Is STMT_INFO a vectorizable call? */
3306 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3310 if (gimple_call_internal_p (stmt
)
3311 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3312 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3313 /* Handled by vectorizable_load and vectorizable_store. */
3316 if (gimple_call_lhs (stmt
) == NULL_TREE
3317 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3320 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3322 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3324 /* Process function arguments. */
3325 rhs_type
= NULL_TREE
;
3326 vectype_in
= NULL_TREE
;
3327 nargs
= gimple_call_num_args (stmt
);
3329 /* Bail out if the function has more than four arguments, we do not have
3330 interesting builtin functions to vectorize with more than two arguments
3331 except for fma. No arguments is also not good. */
3332 if (nargs
== 0 || nargs
> 4)
3335 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3336 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3337 if (cfn
== CFN_GOMP_SIMD_LANE
)
3340 rhs_type
= unsigned_type_node
;
3344 if (internal_fn_p (cfn
))
3345 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3347 for (i
= 0; i
< nargs
; i
++)
3349 if ((int) i
== mask_opno
)
3351 if (!vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_opno
,
3352 &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3357 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3358 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3360 if (dump_enabled_p ())
3361 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3362 "use not simple.\n");
3366 /* We can only handle calls with arguments of the same type. */
3368 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3370 if (dump_enabled_p ())
3371 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3372 "argument types differ.\n");
3376 rhs_type
= TREE_TYPE (op
);
3379 vectype_in
= vectypes
[i
];
3380 else if (vectypes
[i
]
3381 && !types_compatible_p (vectypes
[i
], vectype_in
))
3383 if (dump_enabled_p ())
3384 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3385 "argument vector types differ.\n");
3389 /* If all arguments are external or constant defs, infer the vector type
3390 from the scalar type. */
3392 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3394 gcc_assert (vectype_in
);
3397 if (dump_enabled_p ())
3398 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3399 "no vectype for scalar type %T\n", rhs_type
);
3403 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3404 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3405 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3406 by a pack of the two vectors into an SI vector. We would need
3407 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3408 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3410 if (dump_enabled_p ())
3411 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3412 "mismatched vector sizes %T and %T\n",
3413 vectype_in
, vectype_out
);
3417 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3418 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3420 if (dump_enabled_p ())
3421 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3422 "mixed mask and nonmask vector types\n");
3426 if (vect_emulated_vector_p (vectype_in
) || vect_emulated_vector_p (vectype_out
))
3428 if (dump_enabled_p ())
3429 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3430 "use emulated vector type for call\n");
3435 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3436 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3437 if (known_eq (nunits_in
* 2, nunits_out
))
3439 else if (known_eq (nunits_out
, nunits_in
))
3441 else if (known_eq (nunits_out
* 2, nunits_in
))
3446 /* We only handle functions that do not read or clobber memory. */
3447 if (gimple_vuse (stmt
))
3449 if (dump_enabled_p ())
3450 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3451 "function reads from or writes to memory.\n");
3455 /* For now, we only vectorize functions if a target specific builtin
3456 is available. TODO -- in some cases, it might be profitable to
3457 insert the calls for pieces of the vector, in order to be able
3458 to vectorize other operations in the loop. */
3460 internal_fn ifn
= IFN_LAST
;
3461 tree callee
= gimple_call_fndecl (stmt
);
3463 /* First try using an internal function. */
3464 tree_code convert_code
= ERROR_MARK
;
3466 && (modifier
== NONE
3467 || (modifier
== NARROW
3468 && simple_integer_narrowing (vectype_out
, vectype_in
,
3470 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3473 /* If that fails, try asking for a target-specific built-in function. */
3474 if (ifn
== IFN_LAST
)
3476 if (cfn
!= CFN_LAST
)
3477 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3478 (cfn
, vectype_out
, vectype_in
);
3479 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3480 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3481 (callee
, vectype_out
, vectype_in
);
3484 if (ifn
== IFN_LAST
&& !fndecl
)
3486 if (cfn
== CFN_GOMP_SIMD_LANE
3489 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3490 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3491 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3492 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3494 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3495 { 0, 1, 2, ... vf - 1 } vector. */
3496 gcc_assert (nargs
== 0);
3498 else if (modifier
== NONE
3499 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3500 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3501 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3502 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3503 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3504 slp_op
, vectype_in
, cost_vec
);
3507 if (dump_enabled_p ())
3508 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3509 "function is not vectorizable.\n");
3516 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3517 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3519 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3521 /* Sanity check: make sure that at least one copy of the vectorized stmt
3522 needs to be generated. */
3523 gcc_assert (ncopies
>= 1);
3525 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
3526 internal_fn cond_fn
= get_conditional_internal_fn (ifn
);
3527 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3528 if (!vec_stmt
) /* transformation not required. */
3531 for (i
= 0; i
< nargs
; ++i
)
3532 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
],
3534 ? vectypes
[i
] : vectype_in
))
3536 if (dump_enabled_p ())
3537 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3538 "incompatible vector types for invariants\n");
3541 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3542 DUMP_VECT_SCOPE ("vectorizable_call");
3543 vect_model_simple_cost (vinfo
, stmt_info
,
3544 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3545 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3546 record_stmt_cost (cost_vec
, ncopies
/ 2,
3547 vec_promote_demote
, stmt_info
, 0, vect_body
);
3550 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
3551 && (reduc_idx
>= 0 || mask_opno
>= 0))
3554 && (cond_fn
== IFN_LAST
3555 || !direct_internal_fn_supported_p (cond_fn
, vectype_out
,
3556 OPTIMIZE_FOR_SPEED
)))
3558 if (dump_enabled_p ())
3559 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3560 "can't use a fully-masked loop because no"
3561 " conditional operation is available.\n");
3562 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
3566 unsigned int nvectors
3568 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3570 tree scalar_mask
= NULL_TREE
;
3572 scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3573 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3574 vectype_out
, scalar_mask
);
3582 if (dump_enabled_p ())
3583 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3586 scalar_dest
= gimple_call_lhs (stmt
);
3587 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3589 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3590 unsigned int vect_nargs
= nargs
;
3591 if (masked_loop_p
&& reduc_idx
>= 0)
3597 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3599 tree prev_res
= NULL_TREE
;
3600 vargs
.safe_grow (vect_nargs
, true);
3601 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3602 for (j
= 0; j
< ncopies
; ++j
)
3604 /* Build argument list for the vectorized call. */
3607 vec
<tree
> vec_oprnds0
;
3609 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3610 vec_oprnds0
= vec_defs
[0];
3612 /* Arguments are ready. Create the new vector stmt. */
3613 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3616 if (masked_loop_p
&& reduc_idx
>= 0)
3618 unsigned int vec_num
= vec_oprnds0
.length ();
3619 /* Always true for SLP. */
3620 gcc_assert (ncopies
== 1);
3621 vargs
[varg
++] = vect_get_loop_mask (gsi
, masks
, vec_num
,
3625 for (k
= 0; k
< nargs
; k
++)
3627 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3628 vargs
[varg
++] = vec_oprndsk
[i
];
3630 if (masked_loop_p
&& reduc_idx
>= 0)
3631 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3633 if (modifier
== NARROW
)
3635 /* We don't define any narrowing conditional functions
3637 gcc_assert (mask_opno
< 0);
3638 tree half_res
= make_ssa_name (vectype_in
);
3640 = gimple_build_call_internal_vec (ifn
, vargs
);
3641 gimple_call_set_lhs (call
, half_res
);
3642 gimple_call_set_nothrow (call
, true);
3643 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3646 prev_res
= half_res
;
3649 new_temp
= make_ssa_name (vec_dest
);
3650 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3651 prev_res
, half_res
);
3652 vect_finish_stmt_generation (vinfo
, stmt_info
,
3657 if (mask_opno
>= 0 && masked_loop_p
)
3659 unsigned int vec_num
= vec_oprnds0
.length ();
3660 /* Always true for SLP. */
3661 gcc_assert (ncopies
== 1);
3662 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3664 vargs
[mask_opno
] = prepare_vec_mask
3665 (loop_vinfo
, TREE_TYPE (mask
), mask
,
3666 vargs
[mask_opno
], gsi
);
3670 if (ifn
!= IFN_LAST
)
3671 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3673 call
= gimple_build_call_vec (fndecl
, vargs
);
3674 new_temp
= make_ssa_name (vec_dest
, call
);
3675 gimple_call_set_lhs (call
, new_temp
);
3676 gimple_call_set_nothrow (call
, true);
3677 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3680 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3686 if (masked_loop_p
&& reduc_idx
>= 0)
3687 vargs
[varg
++] = vect_get_loop_mask (gsi
, masks
, ncopies
,
3689 for (i
= 0; i
< nargs
; i
++)
3691 op
= gimple_call_arg (stmt
, i
);
3694 vec_defs
.quick_push (vNULL
);
3695 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3699 vargs
[varg
++] = vec_defs
[i
][j
];
3701 if (masked_loop_p
&& reduc_idx
>= 0)
3702 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3704 if (mask_opno
>= 0 && masked_loop_p
)
3706 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3709 = prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
3710 vargs
[mask_opno
], gsi
);
3714 if (cfn
== CFN_GOMP_SIMD_LANE
)
3716 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3718 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3719 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3720 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3721 new_temp
= make_ssa_name (vec_dest
);
3722 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3723 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3725 else if (modifier
== NARROW
)
3727 /* We don't define any narrowing conditional functions at
3729 gcc_assert (mask_opno
< 0);
3730 tree half_res
= make_ssa_name (vectype_in
);
3731 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3732 gimple_call_set_lhs (call
, half_res
);
3733 gimple_call_set_nothrow (call
, true);
3734 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3737 prev_res
= half_res
;
3740 new_temp
= make_ssa_name (vec_dest
);
3741 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3742 prev_res
, half_res
);
3743 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3748 if (ifn
!= IFN_LAST
)
3749 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3751 call
= gimple_build_call_vec (fndecl
, vargs
);
3752 new_temp
= make_ssa_name (vec_dest
, call
);
3753 gimple_call_set_lhs (call
, new_temp
);
3754 gimple_call_set_nothrow (call
, true);
3755 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3759 if (j
== (modifier
== NARROW
? 1 : 0))
3760 *vec_stmt
= new_stmt
;
3761 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3763 for (i
= 0; i
< nargs
; i
++)
3765 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3766 vec_oprndsi
.release ();
3769 else if (modifier
== NARROW
)
3771 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3772 /* We don't define any narrowing conditional functions at present. */
3773 gcc_assert (mask_opno
< 0);
3774 for (j
= 0; j
< ncopies
; ++j
)
3776 /* Build argument list for the vectorized call. */
3778 vargs
.create (nargs
* 2);
3784 vec
<tree
> vec_oprnds0
;
3786 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3787 vec_oprnds0
= vec_defs
[0];
3789 /* Arguments are ready. Create the new vector stmt. */
3790 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3794 for (k
= 0; k
< nargs
; k
++)
3796 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3797 vargs
.quick_push (vec_oprndsk
[i
]);
3798 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3801 if (ifn
!= IFN_LAST
)
3802 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3804 call
= gimple_build_call_vec (fndecl
, vargs
);
3805 new_temp
= make_ssa_name (vec_dest
, call
);
3806 gimple_call_set_lhs (call
, new_temp
);
3807 gimple_call_set_nothrow (call
, true);
3808 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3809 SLP_TREE_VEC_STMTS (slp_node
).quick_push (call
);
3814 for (i
= 0; i
< nargs
; i
++)
3816 op
= gimple_call_arg (stmt
, i
);
3819 vec_defs
.quick_push (vNULL
);
3820 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
3821 op
, &vec_defs
[i
], vectypes
[i
]);
3823 vec_oprnd0
= vec_defs
[i
][2*j
];
3824 vec_oprnd1
= vec_defs
[i
][2*j
+1];
3826 vargs
.quick_push (vec_oprnd0
);
3827 vargs
.quick_push (vec_oprnd1
);
3830 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3831 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3832 gimple_call_set_lhs (new_stmt
, new_temp
);
3833 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3835 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3839 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3841 for (i
= 0; i
< nargs
; i
++)
3843 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3844 vec_oprndsi
.release ();
3848 /* No current target implements this case. */
3853 /* The call in STMT might prevent it from being removed in dce.
3854 We however cannot remove it here, due to the way the ssa name
3855 it defines is mapped to the new definition. So just replace
3856 rhs of the statement with something harmless. */
3861 stmt_info
= vect_orig_stmt (stmt_info
);
3862 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3865 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3866 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3872 struct simd_call_arg_info
3876 HOST_WIDE_INT linear_step
;
3877 enum vect_def_type dt
;
3879 bool simd_lane_linear
;
3882 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3883 is linear within simd lane (but not within whole loop), note it in
3887 vect_simd_lane_linear (tree op
, class loop
*loop
,
3888 struct simd_call_arg_info
*arginfo
)
3890 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3892 if (!is_gimple_assign (def_stmt
)
3893 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3894 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3897 tree base
= gimple_assign_rhs1 (def_stmt
);
3898 HOST_WIDE_INT linear_step
= 0;
3899 tree v
= gimple_assign_rhs2 (def_stmt
);
3900 while (TREE_CODE (v
) == SSA_NAME
)
3903 def_stmt
= SSA_NAME_DEF_STMT (v
);
3904 if (is_gimple_assign (def_stmt
))
3905 switch (gimple_assign_rhs_code (def_stmt
))
3908 t
= gimple_assign_rhs2 (def_stmt
);
3909 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3911 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3912 v
= gimple_assign_rhs1 (def_stmt
);
3915 t
= gimple_assign_rhs2 (def_stmt
);
3916 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3918 linear_step
= tree_to_shwi (t
);
3919 v
= gimple_assign_rhs1 (def_stmt
);
3922 t
= gimple_assign_rhs1 (def_stmt
);
3923 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3924 || (TYPE_PRECISION (TREE_TYPE (v
))
3925 < TYPE_PRECISION (TREE_TYPE (t
))))
3934 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3936 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3937 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3942 arginfo
->linear_step
= linear_step
;
3944 arginfo
->simd_lane_linear
= true;
3950 /* Return the number of elements in vector type VECTYPE, which is associated
3951 with a SIMD clone. At present these vectors always have a constant
3954 static unsigned HOST_WIDE_INT
3955 simd_clone_subparts (tree vectype
)
3957 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3960 /* Function vectorizable_simd_clone_call.
3962 Check if STMT_INFO performs a function call that can be vectorized
3963 by calling a simd clone of the function.
3964 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3965 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3966 Return true if STMT_INFO is vectorizable in this way. */
3969 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3970 gimple_stmt_iterator
*gsi
,
3971 gimple
**vec_stmt
, slp_tree slp_node
,
3972 stmt_vector_for_cost
*)
3977 tree vec_oprnd0
= NULL_TREE
;
3980 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3981 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3982 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3983 tree fndecl
, new_temp
;
3985 auto_vec
<simd_call_arg_info
> arginfo
;
3986 vec
<tree
> vargs
= vNULL
;
3988 tree lhs
, rtype
, ratype
;
3989 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3991 /* Is STMT a vectorizable call? */
3992 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3996 fndecl
= gimple_call_fndecl (stmt
);
3997 if (fndecl
== NULL_TREE
)
4000 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
4001 if (node
== NULL
|| node
->simd_clones
== NULL
)
4004 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4007 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4011 if (gimple_call_lhs (stmt
)
4012 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
4015 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
4017 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4019 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
4026 /* Process function arguments. */
4027 nargs
= gimple_call_num_args (stmt
);
4029 /* Bail out if the function has zero arguments. */
4033 arginfo
.reserve (nargs
, true);
4035 for (i
= 0; i
< nargs
; i
++)
4037 simd_call_arg_info thisarginfo
;
4040 thisarginfo
.linear_step
= 0;
4041 thisarginfo
.align
= 0;
4042 thisarginfo
.op
= NULL_TREE
;
4043 thisarginfo
.simd_lane_linear
= false;
4045 op
= gimple_call_arg (stmt
, i
);
4046 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
4047 &thisarginfo
.vectype
)
4048 || thisarginfo
.dt
== vect_uninitialized_def
)
4050 if (dump_enabled_p ())
4051 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4052 "use not simple.\n");
4056 if (thisarginfo
.dt
== vect_constant_def
4057 || thisarginfo
.dt
== vect_external_def
)
4058 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
4061 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
4062 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo
.vectype
))
4064 if (dump_enabled_p ())
4065 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4066 "vector mask arguments are not supported\n");
4071 /* For linear arguments, the analyze phase should have saved
4072 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4073 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
4074 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
4076 gcc_assert (vec_stmt
);
4077 thisarginfo
.linear_step
4078 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
4080 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
4081 thisarginfo
.simd_lane_linear
4082 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
4083 == boolean_true_node
);
4084 /* If loop has been peeled for alignment, we need to adjust it. */
4085 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
4086 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
4087 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
4089 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
4090 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
4091 tree opt
= TREE_TYPE (thisarginfo
.op
);
4092 bias
= fold_convert (TREE_TYPE (step
), bias
);
4093 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
4095 = fold_build2 (POINTER_TYPE_P (opt
)
4096 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
4097 thisarginfo
.op
, bias
);
4101 && thisarginfo
.dt
!= vect_constant_def
4102 && thisarginfo
.dt
!= vect_external_def
4104 && TREE_CODE (op
) == SSA_NAME
4105 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
4107 && tree_fits_shwi_p (iv
.step
))
4109 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
4110 thisarginfo
.op
= iv
.base
;
4112 else if ((thisarginfo
.dt
== vect_constant_def
4113 || thisarginfo
.dt
== vect_external_def
)
4114 && POINTER_TYPE_P (TREE_TYPE (op
)))
4115 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
4116 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4118 if (POINTER_TYPE_P (TREE_TYPE (op
))
4119 && !thisarginfo
.linear_step
4121 && thisarginfo
.dt
!= vect_constant_def
4122 && thisarginfo
.dt
!= vect_external_def
4125 && TREE_CODE (op
) == SSA_NAME
)
4126 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
4128 arginfo
.quick_push (thisarginfo
);
4131 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4132 if (!vf
.is_constant ())
4134 if (dump_enabled_p ())
4135 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4136 "not considering SIMD clones; not yet supported"
4137 " for variable-width vectors.\n");
4141 unsigned int badness
= 0;
4142 struct cgraph_node
*bestn
= NULL
;
4143 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
4144 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
4146 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4147 n
= n
->simdclone
->next_clone
)
4149 unsigned int this_badness
= 0;
4150 unsigned int num_calls
;
4151 if (!constant_multiple_p (vf
, n
->simdclone
->simdlen
, &num_calls
)
4152 || n
->simdclone
->nargs
!= nargs
)
4155 this_badness
+= exact_log2 (num_calls
) * 4096;
4156 if (n
->simdclone
->inbranch
)
4157 this_badness
+= 8192;
4158 int target_badness
= targetm
.simd_clone
.usable (n
);
4159 if (target_badness
< 0)
4161 this_badness
+= target_badness
* 512;
4162 /* FORNOW: Have to add code to add the mask argument. */
4163 if (n
->simdclone
->inbranch
)
4165 for (i
= 0; i
< nargs
; i
++)
4167 switch (n
->simdclone
->args
[i
].arg_type
)
4169 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4170 if (!useless_type_conversion_p
4171 (n
->simdclone
->args
[i
].orig_type
,
4172 TREE_TYPE (gimple_call_arg (stmt
, i
))))
4174 else if (arginfo
[i
].dt
== vect_constant_def
4175 || arginfo
[i
].dt
== vect_external_def
4176 || arginfo
[i
].linear_step
)
4179 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4180 if (arginfo
[i
].dt
!= vect_constant_def
4181 && arginfo
[i
].dt
!= vect_external_def
)
4184 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4185 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4186 if (arginfo
[i
].dt
== vect_constant_def
4187 || arginfo
[i
].dt
== vect_external_def
4188 || (arginfo
[i
].linear_step
4189 != n
->simdclone
->args
[i
].linear_step
))
4192 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4193 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4194 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4195 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4196 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4197 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4201 case SIMD_CLONE_ARG_TYPE_MASK
:
4204 if (i
== (size_t) -1)
4206 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4211 if (arginfo
[i
].align
)
4212 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4213 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4215 if (i
== (size_t) -1)
4217 if (bestn
== NULL
|| this_badness
< badness
)
4220 badness
= this_badness
;
4227 for (i
= 0; i
< nargs
; i
++)
4228 if ((arginfo
[i
].dt
== vect_constant_def
4229 || arginfo
[i
].dt
== vect_external_def
)
4230 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4232 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
));
4233 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4235 if (arginfo
[i
].vectype
== NULL
4236 || !constant_multiple_p (bestn
->simdclone
->simdlen
,
4237 simd_clone_subparts (arginfo
[i
].vectype
)))
4241 fndecl
= bestn
->decl
;
4242 nunits
= bestn
->simdclone
->simdlen
;
4243 ncopies
= vector_unroll_factor (vf
, nunits
);
4245 /* If the function isn't const, only allow it in simd loops where user
4246 has asserted that at least nunits consecutive iterations can be
4247 performed using SIMD instructions. */
4248 if ((loop
== NULL
|| maybe_lt ((unsigned) loop
->safelen
, nunits
))
4249 && gimple_vuse (stmt
))
4252 /* Sanity check: make sure that at least one copy of the vectorized stmt
4253 needs to be generated. */
4254 gcc_assert (ncopies
>= 1);
4256 if (!vec_stmt
) /* transformation not required. */
4258 /* When the original call is pure or const but the SIMD ABI dictates
4259 an aggregate return we will have to use a virtual definition and
4260 in a loop eventually even need to add a virtual PHI. That's
4261 not straight-forward so allow to fix this up via renaming. */
4262 if (gimple_call_lhs (stmt
)
4263 && !gimple_vdef (stmt
)
4264 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn
->decl
))) == ARRAY_TYPE
)
4265 vinfo
->any_known_not_updated_vssa
= true;
4266 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4267 for (i
= 0; i
< nargs
; i
++)
4268 if ((bestn
->simdclone
->args
[i
].arg_type
4269 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4270 || (bestn
->simdclone
->args
[i
].arg_type
4271 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4273 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4276 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4277 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4278 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4279 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4280 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4281 tree sll
= arginfo
[i
].simd_lane_linear
4282 ? boolean_true_node
: boolean_false_node
;
4283 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4285 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4286 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4287 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4288 dt, slp_node, cost_vec); */
4294 if (dump_enabled_p ())
4295 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4298 scalar_dest
= gimple_call_lhs (stmt
);
4299 vec_dest
= NULL_TREE
;
4304 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4305 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4306 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4309 rtype
= TREE_TYPE (ratype
);
4313 auto_vec
<vec
<tree
> > vec_oprnds
;
4314 auto_vec
<unsigned> vec_oprnds_i
;
4315 vec_oprnds
.safe_grow_cleared (nargs
, true);
4316 vec_oprnds_i
.safe_grow_cleared (nargs
, true);
4317 for (j
= 0; j
< ncopies
; ++j
)
4319 /* Build argument list for the vectorized call. */
4321 vargs
.create (nargs
);
4325 for (i
= 0; i
< nargs
; i
++)
4327 unsigned int k
, l
, m
, o
;
4329 op
= gimple_call_arg (stmt
, i
);
4330 switch (bestn
->simdclone
->args
[i
].arg_type
)
4332 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4333 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4334 o
= vector_unroll_factor (nunits
,
4335 simd_clone_subparts (atype
));
4336 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4338 if (simd_clone_subparts (atype
)
4339 < simd_clone_subparts (arginfo
[i
].vectype
))
4341 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4342 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4343 / simd_clone_subparts (atype
));
4344 gcc_assert ((k
& (k
- 1)) == 0);
4347 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4348 ncopies
* o
/ k
, op
,
4350 vec_oprnds_i
[i
] = 0;
4351 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4355 vec_oprnd0
= arginfo
[i
].op
;
4356 if ((m
& (k
- 1)) == 0)
4357 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4359 arginfo
[i
].op
= vec_oprnd0
;
4361 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4363 bitsize_int ((m
& (k
- 1)) * prec
));
4365 = gimple_build_assign (make_ssa_name (atype
),
4367 vect_finish_stmt_generation (vinfo
, stmt_info
,
4369 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4373 k
= (simd_clone_subparts (atype
)
4374 / simd_clone_subparts (arginfo
[i
].vectype
));
4375 gcc_assert ((k
& (k
- 1)) == 0);
4376 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4378 vec_alloc (ctor_elts
, k
);
4381 for (l
= 0; l
< k
; l
++)
4383 if (m
== 0 && l
== 0)
4385 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4389 vec_oprnds_i
[i
] = 0;
4390 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4393 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4394 arginfo
[i
].op
= vec_oprnd0
;
4397 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4401 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0
),
4405 = build1 (VIEW_CONVERT_EXPR
, atype
, vec_oprnd0
);
4407 = gimple_build_assign (make_ssa_name (atype
),
4409 vect_finish_stmt_generation (vinfo
, stmt_info
,
4411 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4414 vargs
.safe_push (vec_oprnd0
);
4417 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4419 = gimple_build_assign (make_ssa_name (atype
),
4421 vect_finish_stmt_generation (vinfo
, stmt_info
,
4423 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4428 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4429 vargs
.safe_push (op
);
4431 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4432 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4437 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4438 &stmts
, true, NULL_TREE
);
4442 edge pe
= loop_preheader_edge (loop
);
4443 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4444 gcc_assert (!new_bb
);
4446 if (arginfo
[i
].simd_lane_linear
)
4448 vargs
.safe_push (arginfo
[i
].op
);
4451 tree phi_res
= copy_ssa_name (op
);
4452 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4453 add_phi_arg (new_phi
, arginfo
[i
].op
,
4454 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4456 = POINTER_TYPE_P (TREE_TYPE (op
))
4457 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4458 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4459 ? sizetype
: TREE_TYPE (op
);
4461 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4463 tree tcst
= wide_int_to_tree (type
, cst
);
4464 tree phi_arg
= copy_ssa_name (op
);
4466 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4467 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4468 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4469 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4471 arginfo
[i
].op
= phi_res
;
4472 vargs
.safe_push (phi_res
);
4477 = POINTER_TYPE_P (TREE_TYPE (op
))
4478 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4479 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4480 ? sizetype
: TREE_TYPE (op
);
4482 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4484 tree tcst
= wide_int_to_tree (type
, cst
);
4485 new_temp
= make_ssa_name (TREE_TYPE (op
));
4487 = gimple_build_assign (new_temp
, code
,
4488 arginfo
[i
].op
, tcst
);
4489 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4490 vargs
.safe_push (new_temp
);
4493 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4494 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4495 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4496 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4497 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4498 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4504 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4508 || known_eq (simd_clone_subparts (rtype
), nunits
));
4510 new_temp
= create_tmp_var (ratype
);
4511 else if (useless_type_conversion_p (vectype
, rtype
))
4512 new_temp
= make_ssa_name (vec_dest
, new_call
);
4514 new_temp
= make_ssa_name (rtype
, new_call
);
4515 gimple_call_set_lhs (new_call
, new_temp
);
4517 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4518 gimple
*new_stmt
= new_call
;
4522 if (!multiple_p (simd_clone_subparts (vectype
), nunits
))
4525 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4526 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4527 k
= vector_unroll_factor (nunits
,
4528 simd_clone_subparts (vectype
));
4529 gcc_assert ((k
& (k
- 1)) == 0);
4530 for (l
= 0; l
< k
; l
++)
4535 t
= build_fold_addr_expr (new_temp
);
4536 t
= build2 (MEM_REF
, vectype
, t
,
4537 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4540 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4541 bitsize_int (prec
), bitsize_int (l
* prec
));
4542 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4543 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4545 if (j
== 0 && l
== 0)
4546 *vec_stmt
= new_stmt
;
4547 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4551 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4554 else if (!multiple_p (nunits
, simd_clone_subparts (vectype
)))
4556 unsigned int k
= (simd_clone_subparts (vectype
)
4557 / simd_clone_subparts (rtype
));
4558 gcc_assert ((k
& (k
- 1)) == 0);
4559 if ((j
& (k
- 1)) == 0)
4560 vec_alloc (ret_ctor_elts
, k
);
4564 o
= vector_unroll_factor (nunits
,
4565 simd_clone_subparts (rtype
));
4566 for (m
= 0; m
< o
; m
++)
4568 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4569 size_int (m
), NULL_TREE
, NULL_TREE
);
4570 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4572 vect_finish_stmt_generation (vinfo
, stmt_info
,
4574 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4575 gimple_assign_lhs (new_stmt
));
4577 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4580 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4581 if ((j
& (k
- 1)) != k
- 1)
4583 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4585 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4586 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4588 if ((unsigned) j
== k
- 1)
4589 *vec_stmt
= new_stmt
;
4590 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4595 tree t
= build_fold_addr_expr (new_temp
);
4596 t
= build2 (MEM_REF
, vectype
, t
,
4597 build_int_cst (TREE_TYPE (t
), 0));
4598 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4599 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4600 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4602 else if (!useless_type_conversion_p (vectype
, rtype
))
4604 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
4606 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4607 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4612 *vec_stmt
= new_stmt
;
4613 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4616 for (i
= 0; i
< nargs
; ++i
)
4618 vec
<tree
> oprndsi
= vec_oprnds
[i
];
4623 /* The call in STMT might prevent it from being removed in dce.
4624 We however cannot remove it here, due to the way the ssa name
4625 it defines is mapped to the new definition. So just replace
4626 rhs of the statement with something harmless. */
4634 type
= TREE_TYPE (scalar_dest
);
4635 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4636 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4639 new_stmt
= gimple_build_nop ();
4640 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4641 unlink_stmt_vdef (stmt
);
4647 /* Function vect_gen_widened_results_half
4649 Create a vector stmt whose code, type, number of arguments, and result
4650 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4651 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4652 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4653 needs to be created (DECL is a function-decl of a target-builtin).
4654 STMT_INFO is the original scalar stmt that we are vectorizing. */
4657 vect_gen_widened_results_half (vec_info
*vinfo
, enum tree_code code
,
4658 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4659 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4660 stmt_vec_info stmt_info
)
4665 /* Generate half of the widened result: */
4666 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4667 if (op_type
!= binary_op
)
4669 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4670 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4671 gimple_assign_set_lhs (new_stmt
, new_temp
);
4672 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4678 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4679 For multi-step conversions store the resulting vectors and call the function
4683 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4685 stmt_vec_info stmt_info
,
4686 vec
<tree
> &vec_dsts
,
4687 gimple_stmt_iterator
*gsi
,
4688 slp_tree slp_node
, enum tree_code code
)
4691 tree vop0
, vop1
, new_tmp
, vec_dest
;
4693 vec_dest
= vec_dsts
.pop ();
4695 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4697 /* Create demotion operation. */
4698 vop0
= (*vec_oprnds
)[i
];
4699 vop1
= (*vec_oprnds
)[i
+ 1];
4700 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4701 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4702 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4703 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4706 /* Store the resulting vector for next recursive call. */
4707 (*vec_oprnds
)[i
/2] = new_tmp
;
4710 /* This is the last step of the conversion sequence. Store the
4711 vectors in SLP_NODE or in vector info of the scalar statement
4712 (or in STMT_VINFO_RELATED_STMT chain). */
4714 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4716 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4720 /* For multi-step demotion operations we first generate demotion operations
4721 from the source type to the intermediate types, and then combine the
4722 results (stored in VEC_OPRNDS) in demotion operation to the destination
4726 /* At each level of recursion we have half of the operands we had at the
4728 vec_oprnds
->truncate ((i
+1)/2);
4729 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
4731 stmt_info
, vec_dsts
, gsi
,
4732 slp_node
, VEC_PACK_TRUNC_EXPR
);
4735 vec_dsts
.quick_push (vec_dest
);
4739 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4740 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4741 STMT_INFO. For multi-step conversions store the resulting vectors and
4742 call the function recursively. */
4745 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
4746 vec
<tree
> *vec_oprnds0
,
4747 vec
<tree
> *vec_oprnds1
,
4748 stmt_vec_info stmt_info
, tree vec_dest
,
4749 gimple_stmt_iterator
*gsi
,
4750 enum tree_code code1
,
4751 enum tree_code code2
, int op_type
)
4754 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4755 gimple
*new_stmt1
, *new_stmt2
;
4756 vec
<tree
> vec_tmp
= vNULL
;
4758 vec_tmp
.create (vec_oprnds0
->length () * 2);
4759 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4761 if (op_type
== binary_op
)
4762 vop1
= (*vec_oprnds1
)[i
];
4766 /* Generate the two halves of promotion operation. */
4767 new_stmt1
= vect_gen_widened_results_half (vinfo
, code1
, vop0
, vop1
,
4768 op_type
, vec_dest
, gsi
,
4770 new_stmt2
= vect_gen_widened_results_half (vinfo
, code2
, vop0
, vop1
,
4771 op_type
, vec_dest
, gsi
,
4773 if (is_gimple_call (new_stmt1
))
4775 new_tmp1
= gimple_call_lhs (new_stmt1
);
4776 new_tmp2
= gimple_call_lhs (new_stmt2
);
4780 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4781 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4784 /* Store the results for the next step. */
4785 vec_tmp
.quick_push (new_tmp1
);
4786 vec_tmp
.quick_push (new_tmp2
);
4789 vec_oprnds0
->release ();
4790 *vec_oprnds0
= vec_tmp
;
4793 /* Create vectorized promotion stmts for widening stmts using only half the
4794 potential vector size for input. */
4796 vect_create_half_widening_stmts (vec_info
*vinfo
,
4797 vec
<tree
> *vec_oprnds0
,
4798 vec
<tree
> *vec_oprnds1
,
4799 stmt_vec_info stmt_info
, tree vec_dest
,
4800 gimple_stmt_iterator
*gsi
,
4801 enum tree_code code1
,
4809 vec
<tree
> vec_tmp
= vNULL
;
4811 vec_tmp
.create (vec_oprnds0
->length ());
4812 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4814 tree new_tmp1
, new_tmp2
, new_tmp3
, out_type
;
4816 gcc_assert (op_type
== binary_op
);
4817 vop1
= (*vec_oprnds1
)[i
];
4819 /* Widen the first vector input. */
4820 out_type
= TREE_TYPE (vec_dest
);
4821 new_tmp1
= make_ssa_name (out_type
);
4822 new_stmt1
= gimple_build_assign (new_tmp1
, NOP_EXPR
, vop0
);
4823 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt1
, gsi
);
4824 if (VECTOR_TYPE_P (TREE_TYPE (vop1
)))
4826 /* Widen the second vector input. */
4827 new_tmp2
= make_ssa_name (out_type
);
4828 new_stmt2
= gimple_build_assign (new_tmp2
, NOP_EXPR
, vop1
);
4829 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt2
, gsi
);
4830 /* Perform the operation. With both vector inputs widened. */
4831 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, new_tmp2
);
4835 /* Perform the operation. With the single vector input widened. */
4836 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, vop1
);
4839 new_tmp3
= make_ssa_name (vec_dest
, new_stmt3
);
4840 gimple_assign_set_lhs (new_stmt3
, new_tmp3
);
4841 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt3
, gsi
);
4843 /* Store the results for the next step. */
4844 vec_tmp
.quick_push (new_tmp3
);
4847 vec_oprnds0
->release ();
4848 *vec_oprnds0
= vec_tmp
;
4852 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4853 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4854 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4855 Return true if STMT_INFO is vectorizable in this way. */
4858 vectorizable_conversion (vec_info
*vinfo
,
4859 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4860 gimple
**vec_stmt
, slp_tree slp_node
,
4861 stmt_vector_for_cost
*cost_vec
)
4865 tree op0
, op1
= NULL_TREE
;
4866 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4867 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4868 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4870 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4872 poly_uint64 nunits_in
;
4873 poly_uint64 nunits_out
;
4874 tree vectype_out
, vectype_in
;
4876 tree lhs_type
, rhs_type
;
4877 enum { NARROW
, NONE
, WIDEN
} modifier
;
4878 vec
<tree
> vec_oprnds0
= vNULL
;
4879 vec
<tree
> vec_oprnds1
= vNULL
;
4881 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4882 int multi_step_cvt
= 0;
4883 vec
<tree
> interm_types
= vNULL
;
4884 tree intermediate_type
, cvt_type
= NULL_TREE
;
4886 unsigned short fltsz
;
4888 /* Is STMT a vectorizable conversion? */
4890 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4893 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4897 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4901 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4904 code
= gimple_assign_rhs_code (stmt
);
4905 if (!CONVERT_EXPR_CODE_P (code
)
4906 && code
!= FIX_TRUNC_EXPR
4907 && code
!= FLOAT_EXPR
4908 && code
!= WIDEN_PLUS_EXPR
4909 && code
!= WIDEN_MINUS_EXPR
4910 && code
!= WIDEN_MULT_EXPR
4911 && code
!= WIDEN_LSHIFT_EXPR
)
4914 bool widen_arith
= (code
== WIDEN_PLUS_EXPR
4915 || code
== WIDEN_MINUS_EXPR
4916 || code
== WIDEN_MULT_EXPR
4917 || code
== WIDEN_LSHIFT_EXPR
);
4918 op_type
= TREE_CODE_LENGTH (code
);
4920 /* Check types of lhs and rhs. */
4921 scalar_dest
= gimple_assign_lhs (stmt
);
4922 lhs_type
= TREE_TYPE (scalar_dest
);
4923 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4925 /* Check the operands of the operation. */
4926 slp_tree slp_op0
, slp_op1
= NULL
;
4927 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
4928 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
4930 if (dump_enabled_p ())
4931 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4932 "use not simple.\n");
4936 rhs_type
= TREE_TYPE (op0
);
4937 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4938 && !((INTEGRAL_TYPE_P (lhs_type
)
4939 && INTEGRAL_TYPE_P (rhs_type
))
4940 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4941 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4944 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4945 && ((INTEGRAL_TYPE_P (lhs_type
)
4946 && !type_has_mode_precision_p (lhs_type
))
4947 || (INTEGRAL_TYPE_P (rhs_type
)
4948 && !type_has_mode_precision_p (rhs_type
))))
4950 if (dump_enabled_p ())
4951 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4952 "type conversion to/from bit-precision unsupported."
4957 if (op_type
== binary_op
)
4959 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
4960 || code
== WIDEN_PLUS_EXPR
|| code
== WIDEN_MINUS_EXPR
);
4962 op1
= gimple_assign_rhs2 (stmt
);
4964 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
4965 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
4967 if (dump_enabled_p ())
4968 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4969 "use not simple.\n");
4972 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4975 vectype_in
= vectype1_in
;
4978 /* If op0 is an external or constant def, infer the vector type
4979 from the scalar type. */
4981 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
4983 gcc_assert (vectype_in
);
4986 if (dump_enabled_p ())
4987 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4988 "no vectype for scalar type %T\n", rhs_type
);
4993 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4994 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4996 if (dump_enabled_p ())
4997 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4998 "can't convert between boolean and non "
4999 "boolean vectors %T\n", rhs_type
);
5004 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
5005 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5006 if (known_eq (nunits_out
, nunits_in
))
5011 else if (multiple_p (nunits_out
, nunits_in
))
5015 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
5019 /* Multiple types in SLP are handled by creating the appropriate number of
5020 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5024 else if (modifier
== NARROW
)
5025 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
5027 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
5029 /* Sanity check: make sure that at least one copy of the vectorized stmt
5030 needs to be generated. */
5031 gcc_assert (ncopies
>= 1);
5033 bool found_mode
= false;
5034 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
5035 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
5036 opt_scalar_mode rhs_mode_iter
;
5038 /* Supportable by target? */
5042 if (code
!= FIX_TRUNC_EXPR
5043 && code
!= FLOAT_EXPR
5044 && !CONVERT_EXPR_CODE_P (code
))
5046 if (supportable_convert_operation (code
, vectype_out
, vectype_in
, &code1
))
5050 if (dump_enabled_p ())
5051 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5052 "conversion not supported by target.\n");
5056 if (known_eq (nunits_in
, nunits_out
))
5058 if (!supportable_half_widening_operation (code
, vectype_out
,
5059 vectype_in
, &code1
))
5061 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5064 if (supportable_widening_operation (vinfo
, code
, stmt_info
,
5065 vectype_out
, vectype_in
, &code1
,
5066 &code2
, &multi_step_cvt
,
5069 /* Binary widening operation can only be supported directly by the
5071 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5075 if (code
!= FLOAT_EXPR
5076 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
5079 fltsz
= GET_MODE_SIZE (lhs_mode
);
5080 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
5082 rhs_mode
= rhs_mode_iter
.require ();
5083 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
5087 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5088 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5089 if (cvt_type
== NULL_TREE
)
5092 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5094 if (!supportable_convert_operation (code
, vectype_out
,
5095 cvt_type
, &codecvt1
))
5098 else if (!supportable_widening_operation (vinfo
, code
, stmt_info
,
5099 vectype_out
, cvt_type
,
5100 &codecvt1
, &codecvt2
,
5105 gcc_assert (multi_step_cvt
== 0);
5107 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
5109 vectype_in
, &code1
, &code2
,
5110 &multi_step_cvt
, &interm_types
))
5120 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5121 codecvt2
= ERROR_MARK
;
5125 interm_types
.safe_push (cvt_type
);
5126 cvt_type
= NULL_TREE
;
5131 gcc_assert (op_type
== unary_op
);
5132 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
5133 &code1
, &multi_step_cvt
,
5137 if (code
!= FIX_TRUNC_EXPR
5138 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
5142 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5143 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5144 if (cvt_type
== NULL_TREE
)
5146 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
5149 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
5150 &code1
, &multi_step_cvt
,
5159 if (!vec_stmt
) /* transformation not required. */
5162 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
5163 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
5165 if (dump_enabled_p ())
5166 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5167 "incompatible vector types for invariants\n");
5170 DUMP_VECT_SCOPE ("vectorizable_conversion");
5171 if (modifier
== NONE
)
5173 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
5174 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5177 else if (modifier
== NARROW
)
5179 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
5180 /* The final packing step produces one vector result per copy. */
5181 unsigned int nvectors
5182 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
5183 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5184 multi_step_cvt
, cost_vec
,
5189 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5190 /* The initial unpacking step produces two vector results
5191 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5192 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5193 unsigned int nvectors
5195 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5197 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5198 multi_step_cvt
, cost_vec
,
5201 interm_types
.release ();
5206 if (dump_enabled_p ())
5207 dump_printf_loc (MSG_NOTE
, vect_location
,
5208 "transform conversion. ncopies = %d.\n", ncopies
);
5210 if (op_type
== binary_op
)
5212 if (CONSTANT_CLASS_P (op0
))
5213 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5214 else if (CONSTANT_CLASS_P (op1
))
5215 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5218 /* In case of multi-step conversion, we first generate conversion operations
5219 to the intermediate types, and then from that types to the final one.
5220 We create vector destinations for the intermediate type (TYPES) received
5221 from supportable_*_operation, and store them in the correct order
5222 for future use in vect_create_vectorized_*_stmts (). */
5223 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5224 vec_dest
= vect_create_destination_var (scalar_dest
,
5225 (cvt_type
&& modifier
== WIDEN
)
5226 ? cvt_type
: vectype_out
);
5227 vec_dsts
.quick_push (vec_dest
);
5231 for (i
= interm_types
.length () - 1;
5232 interm_types
.iterate (i
, &intermediate_type
); i
--)
5234 vec_dest
= vect_create_destination_var (scalar_dest
,
5236 vec_dsts
.quick_push (vec_dest
);
5241 vec_dest
= vect_create_destination_var (scalar_dest
,
5243 ? vectype_out
: cvt_type
);
5248 if (modifier
== WIDEN
)
5250 else if (modifier
== NARROW
)
5253 ninputs
= vect_pow2 (multi_step_cvt
);
5261 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5263 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5265 /* Arguments are ready, create the new vector stmt. */
5266 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5267 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
5268 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5269 gimple_assign_set_lhs (new_stmt
, new_temp
);
5270 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5273 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5275 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5280 /* In case the vectorization factor (VF) is bigger than the number
5281 of elements that we can fit in a vectype (nunits), we have to
5282 generate more than one vector stmt - i.e - we need to "unroll"
5283 the vector stmt by a factor VF/nunits. */
5284 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5286 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
5288 if (code
== WIDEN_LSHIFT_EXPR
)
5290 int oprnds_size
= vec_oprnds0
.length ();
5291 vec_oprnds1
.create (oprnds_size
);
5292 for (i
= 0; i
< oprnds_size
; ++i
)
5293 vec_oprnds1
.quick_push (op1
);
5295 /* Arguments are ready. Create the new vector stmts. */
5296 for (i
= multi_step_cvt
; i
>= 0; i
--)
5298 tree this_dest
= vec_dsts
[i
];
5299 enum tree_code c1
= code1
, c2
= code2
;
5300 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5305 if (known_eq (nunits_out
, nunits_in
))
5306 vect_create_half_widening_stmts (vinfo
, &vec_oprnds0
,
5307 &vec_oprnds1
, stmt_info
,
5311 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5312 &vec_oprnds1
, stmt_info
,
5317 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5322 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5323 new_temp
= make_ssa_name (vec_dest
);
5324 new_stmt
= gimple_build_assign (new_temp
, codecvt1
, vop0
);
5325 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5328 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
5331 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5333 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5338 /* In case the vectorization factor (VF) is bigger than the number
5339 of elements that we can fit in a vectype (nunits), we have to
5340 generate more than one vector stmt - i.e - we need to "unroll"
5341 the vector stmt by a factor VF/nunits. */
5342 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5344 /* Arguments are ready. Create the new vector stmts. */
5346 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5348 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5349 new_temp
= make_ssa_name (vec_dest
);
5351 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5352 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5353 vec_oprnds0
[i
] = new_temp
;
5356 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5358 stmt_info
, vec_dsts
, gsi
,
5363 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5365 vec_oprnds0
.release ();
5366 vec_oprnds1
.release ();
5367 interm_types
.release ();
5372 /* Return true if we can assume from the scalar form of STMT_INFO that
5373 neither the scalar nor the vector forms will generate code. STMT_INFO
5374 is known not to involve a data reference. */
5377 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5379 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5383 tree lhs
= gimple_assign_lhs (stmt
);
5384 tree_code code
= gimple_assign_rhs_code (stmt
);
5385 tree rhs
= gimple_assign_rhs1 (stmt
);
5387 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5390 if (CONVERT_EXPR_CODE_P (code
))
5391 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5396 /* Function vectorizable_assignment.
5398 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5399 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5400 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5401 Return true if STMT_INFO is vectorizable in this way. */
5404 vectorizable_assignment (vec_info
*vinfo
,
5405 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5406 gimple
**vec_stmt
, slp_tree slp_node
,
5407 stmt_vector_for_cost
*cost_vec
)
5412 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5414 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5418 vec
<tree
> vec_oprnds
= vNULL
;
5420 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5421 enum tree_code code
;
5424 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5427 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5431 /* Is vectorizable assignment? */
5432 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5436 scalar_dest
= gimple_assign_lhs (stmt
);
5437 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5440 if (STMT_VINFO_DATA_REF (stmt_info
))
5443 code
= gimple_assign_rhs_code (stmt
);
5444 if (!(gimple_assign_single_p (stmt
)
5445 || code
== PAREN_EXPR
5446 || CONVERT_EXPR_CODE_P (code
)))
5449 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5450 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5452 /* Multiple types in SLP are handled by creating the appropriate number of
5453 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5458 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5460 gcc_assert (ncopies
>= 1);
5463 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5464 &dt
[0], &vectype_in
))
5466 if (dump_enabled_p ())
5467 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5468 "use not simple.\n");
5472 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5474 /* We can handle NOP_EXPR conversions that do not change the number
5475 of elements or the vector size. */
5476 if ((CONVERT_EXPR_CODE_P (code
)
5477 || code
== VIEW_CONVERT_EXPR
)
5479 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5480 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5481 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5484 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
5485 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5487 if (dump_enabled_p ())
5488 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5489 "can't convert between boolean and non "
5490 "boolean vectors %T\n", TREE_TYPE (op
));
5495 /* We do not handle bit-precision changes. */
5496 if ((CONVERT_EXPR_CODE_P (code
)
5497 || code
== VIEW_CONVERT_EXPR
)
5498 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5499 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5500 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5501 /* But a conversion that does not change the bit-pattern is ok. */
5502 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5503 > TYPE_PRECISION (TREE_TYPE (op
)))
5504 && TYPE_UNSIGNED (TREE_TYPE (op
))))
5506 if (dump_enabled_p ())
5507 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5508 "type conversion to/from bit-precision "
5513 if (!vec_stmt
) /* transformation not required. */
5516 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
5518 if (dump_enabled_p ())
5519 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5520 "incompatible vector types for invariants\n");
5523 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5524 DUMP_VECT_SCOPE ("vectorizable_assignment");
5525 if (!vect_nop_conversion_p (stmt_info
))
5526 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5532 if (dump_enabled_p ())
5533 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5536 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5539 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
5541 /* Arguments are ready. create the new vector stmt. */
5542 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5544 if (CONVERT_EXPR_CODE_P (code
)
5545 || code
== VIEW_CONVERT_EXPR
)
5546 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5547 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5548 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5549 gimple_assign_set_lhs (new_stmt
, new_temp
);
5550 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5552 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5554 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5557 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5559 vec_oprnds
.release ();
5564 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5565 either as shift by a scalar or by a vector. */
5568 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5571 machine_mode vec_mode
;
5576 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5580 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5582 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5584 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5586 || (optab_handler (optab
, TYPE_MODE (vectype
))
5587 == CODE_FOR_nothing
))
5591 vec_mode
= TYPE_MODE (vectype
);
5592 icode
= (int) optab_handler (optab
, vec_mode
);
5593 if (icode
== CODE_FOR_nothing
)
5600 /* Function vectorizable_shift.
5602 Check if STMT_INFO performs a shift operation that can be vectorized.
5603 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5604 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5605 Return true if STMT_INFO is vectorizable in this way. */
5608 vectorizable_shift (vec_info
*vinfo
,
5609 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5610 gimple
**vec_stmt
, slp_tree slp_node
,
5611 stmt_vector_for_cost
*cost_vec
)
5615 tree op0
, op1
= NULL
;
5616 tree vec_oprnd1
= NULL_TREE
;
5618 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5619 enum tree_code code
;
5620 machine_mode vec_mode
;
5624 machine_mode optab_op2_mode
;
5625 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5627 poly_uint64 nunits_in
;
5628 poly_uint64 nunits_out
;
5633 vec
<tree
> vec_oprnds0
= vNULL
;
5634 vec
<tree
> vec_oprnds1
= vNULL
;
5637 bool scalar_shift_arg
= true;
5638 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5639 bool incompatible_op1_vectype_p
= false;
5641 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5644 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5645 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5649 /* Is STMT a vectorizable binary/unary operation? */
5650 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5654 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5657 code
= gimple_assign_rhs_code (stmt
);
5659 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5660 || code
== RROTATE_EXPR
))
5663 scalar_dest
= gimple_assign_lhs (stmt
);
5664 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5665 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5667 if (dump_enabled_p ())
5668 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5669 "bit-precision shifts not supported.\n");
5674 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5675 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5677 if (dump_enabled_p ())
5678 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5679 "use not simple.\n");
5682 /* If op0 is an external or constant def, infer the vector type
5683 from the scalar type. */
5685 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
5687 gcc_assert (vectype
);
5690 if (dump_enabled_p ())
5691 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5692 "no vectype for scalar type\n");
5696 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5697 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5698 if (maybe_ne (nunits_out
, nunits_in
))
5701 stmt_vec_info op1_def_stmt_info
;
5703 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
5704 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
5706 if (dump_enabled_p ())
5707 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5708 "use not simple.\n");
5712 /* Multiple types in SLP are handled by creating the appropriate number of
5713 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5718 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5720 gcc_assert (ncopies
>= 1);
5722 /* Determine whether the shift amount is a vector, or scalar. If the
5723 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5725 if ((dt
[1] == vect_internal_def
5726 || dt
[1] == vect_induction_def
5727 || dt
[1] == vect_nested_cycle
)
5729 scalar_shift_arg
= false;
5730 else if (dt
[1] == vect_constant_def
5731 || dt
[1] == vect_external_def
5732 || dt
[1] == vect_internal_def
)
5734 /* In SLP, need to check whether the shift count is the same,
5735 in loops if it is a constant or invariant, it is always
5739 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5740 stmt_vec_info slpstmt_info
;
5742 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5744 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5745 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5746 scalar_shift_arg
= false;
5749 /* For internal SLP defs we have to make sure we see scalar stmts
5750 for all vector elements.
5751 ??? For different vectors we could resort to a different
5752 scalar shift operand but code-generation below simply always
5754 if (dt
[1] == vect_internal_def
5755 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5757 scalar_shift_arg
= false;
5760 /* If the shift amount is computed by a pattern stmt we cannot
5761 use the scalar amount directly thus give up and use a vector
5763 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5764 scalar_shift_arg
= false;
5768 if (dump_enabled_p ())
5769 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5770 "operand mode requires invariant argument.\n");
5774 /* Vector shifted by vector. */
5775 bool was_scalar_shift_arg
= scalar_shift_arg
;
5776 if (!scalar_shift_arg
)
5778 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5779 if (dump_enabled_p ())
5780 dump_printf_loc (MSG_NOTE
, vect_location
,
5781 "vector/vector shift/rotate found.\n");
5784 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
5786 incompatible_op1_vectype_p
5787 = (op1_vectype
== NULL_TREE
5788 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5789 TYPE_VECTOR_SUBPARTS (vectype
))
5790 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5791 if (incompatible_op1_vectype_p
5793 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
5794 || slp_op1
->refcnt
!= 1))
5796 if (dump_enabled_p ())
5797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5798 "unusable type for last operand in"
5799 " vector/vector shift/rotate.\n");
5803 /* See if the machine has a vector shifted by scalar insn and if not
5804 then see if it has a vector shifted by vector insn. */
5807 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5809 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5811 if (dump_enabled_p ())
5812 dump_printf_loc (MSG_NOTE
, vect_location
,
5813 "vector/scalar shift/rotate found.\n");
5817 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5819 && (optab_handler (optab
, TYPE_MODE (vectype
))
5820 != CODE_FOR_nothing
))
5822 scalar_shift_arg
= false;
5824 if (dump_enabled_p ())
5825 dump_printf_loc (MSG_NOTE
, vect_location
,
5826 "vector/vector shift/rotate found.\n");
5829 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
5833 /* Unlike the other binary operators, shifts/rotates have
5834 the rhs being int, instead of the same type as the lhs,
5835 so make sure the scalar is the right type if we are
5836 dealing with vectors of long long/long/short/char. */
5837 incompatible_op1_vectype_p
5839 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
5841 if (incompatible_op1_vectype_p
5842 && dt
[1] == vect_internal_def
)
5844 if (dump_enabled_p ())
5845 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5846 "unusable type for last operand in"
5847 " vector/vector shift/rotate.\n");
5854 /* Supportable by target? */
5857 if (dump_enabled_p ())
5858 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5862 vec_mode
= TYPE_MODE (vectype
);
5863 icode
= (int) optab_handler (optab
, vec_mode
);
5864 if (icode
== CODE_FOR_nothing
)
5866 if (dump_enabled_p ())
5867 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5868 "op not supported by target.\n");
5871 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5872 if (vect_emulated_vector_p (vectype
))
5875 if (!vec_stmt
) /* transformation not required. */
5878 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5879 || ((!scalar_shift_arg
|| dt
[1] == vect_internal_def
)
5880 && (!incompatible_op1_vectype_p
5881 || dt
[1] == vect_constant_def
)
5882 && !vect_maybe_update_slp_op_vectype
5884 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
5886 if (dump_enabled_p ())
5887 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5888 "incompatible vector types for invariants\n");
5891 /* Now adjust the constant shift amount in place. */
5893 && incompatible_op1_vectype_p
5894 && dt
[1] == vect_constant_def
)
5896 for (unsigned i
= 0;
5897 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
5899 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
5900 = fold_convert (TREE_TYPE (vectype
),
5901 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
5902 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
5906 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5907 DUMP_VECT_SCOPE ("vectorizable_shift");
5908 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
5909 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
5915 if (dump_enabled_p ())
5916 dump_printf_loc (MSG_NOTE
, vect_location
,
5917 "transform binary/unary operation.\n");
5919 if (incompatible_op1_vectype_p
&& !slp_node
)
5921 gcc_assert (!scalar_shift_arg
&& was_scalar_shift_arg
);
5922 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5923 if (dt
[1] != vect_constant_def
)
5924 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
5925 TREE_TYPE (vectype
), NULL
);
5929 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5931 if (scalar_shift_arg
&& dt
[1] != vect_internal_def
)
5933 /* Vector shl and shr insn patterns can be defined with scalar
5934 operand 2 (shift operand). In this case, use constant or loop
5935 invariant op1 directly, without extending it to vector mode
5937 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5938 if (!VECTOR_MODE_P (optab_op2_mode
))
5940 if (dump_enabled_p ())
5941 dump_printf_loc (MSG_NOTE
, vect_location
,
5942 "operand 1 using scalar mode.\n");
5944 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
5945 vec_oprnds1
.quick_push (vec_oprnd1
);
5946 /* Store vec_oprnd1 for every vector stmt to be created.
5947 We check during the analysis that all the shift arguments
5949 TODO: Allow different constants for different vector
5950 stmts generated for an SLP instance. */
5952 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
5953 vec_oprnds1
.quick_push (vec_oprnd1
);
5956 else if (!scalar_shift_arg
&& slp_node
&& incompatible_op1_vectype_p
)
5958 if (was_scalar_shift_arg
)
5960 /* If the argument was the same in all lanes create
5961 the correctly typed vector shift amount directly. */
5962 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5963 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
5964 !loop_vinfo
? gsi
: NULL
);
5965 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
5966 !loop_vinfo
? gsi
: NULL
);
5967 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5968 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
5969 vec_oprnds1
.quick_push (vec_oprnd1
);
5971 else if (dt
[1] == vect_constant_def
)
5972 /* The constant shift amount has been adjusted in place. */
5975 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
5978 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5979 (a special case for certain kind of vector shifts); otherwise,
5980 operand 1 should be of a vector type (the usual case). */
5981 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5983 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
5985 /* Arguments are ready. Create the new vector stmt. */
5986 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5988 /* For internal defs where we need to use a scalar shift arg
5989 extract the first lane. */
5990 if (scalar_shift_arg
&& dt
[1] == vect_internal_def
)
5992 vop1
= vec_oprnds1
[0];
5993 new_temp
= make_ssa_name (TREE_TYPE (TREE_TYPE (vop1
)));
5995 = gimple_build_assign (new_temp
,
5996 build3 (BIT_FIELD_REF
, TREE_TYPE (new_temp
),
5998 TYPE_SIZE (TREE_TYPE (new_temp
)),
5999 bitsize_zero_node
));
6000 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6004 vop1
= vec_oprnds1
[i
];
6005 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
6006 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6007 gimple_assign_set_lhs (new_stmt
, new_temp
);
6008 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6010 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6012 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6016 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6018 vec_oprnds0
.release ();
6019 vec_oprnds1
.release ();
6025 /* Function vectorizable_operation.
6027 Check if STMT_INFO performs a binary, unary or ternary operation that can
6029 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6030 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6031 Return true if STMT_INFO is vectorizable in this way. */
6034 vectorizable_operation (vec_info
*vinfo
,
6035 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6036 gimple
**vec_stmt
, slp_tree slp_node
,
6037 stmt_vector_for_cost
*cost_vec
)
6041 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
6043 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6044 enum tree_code code
, orig_code
;
6045 machine_mode vec_mode
;
6049 bool target_support_p
;
6050 enum vect_def_type dt
[3]
6051 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
6053 poly_uint64 nunits_in
;
6054 poly_uint64 nunits_out
;
6056 int ncopies
, vec_num
;
6058 vec
<tree
> vec_oprnds0
= vNULL
;
6059 vec
<tree
> vec_oprnds1
= vNULL
;
6060 vec
<tree
> vec_oprnds2
= vNULL
;
6061 tree vop0
, vop1
, vop2
;
6062 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6064 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6067 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6071 /* Is STMT a vectorizable binary/unary operation? */
6072 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6076 /* Loads and stores are handled in vectorizable_{load,store}. */
6077 if (STMT_VINFO_DATA_REF (stmt_info
))
6080 orig_code
= code
= gimple_assign_rhs_code (stmt
);
6082 /* Shifts are handled in vectorizable_shift. */
6083 if (code
== LSHIFT_EXPR
6084 || code
== RSHIFT_EXPR
6085 || code
== LROTATE_EXPR
6086 || code
== RROTATE_EXPR
)
6089 /* Comparisons are handled in vectorizable_comparison. */
6090 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
6093 /* Conditions are handled in vectorizable_condition. */
6094 if (code
== COND_EXPR
)
6097 /* For pointer addition and subtraction, we should use the normal
6098 plus and minus for the vector operation. */
6099 if (code
== POINTER_PLUS_EXPR
)
6101 if (code
== POINTER_DIFF_EXPR
)
6104 /* Support only unary or binary operations. */
6105 op_type
= TREE_CODE_LENGTH (code
);
6106 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
6108 if (dump_enabled_p ())
6109 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6110 "num. args = %d (not unary/binary/ternary op).\n",
6115 scalar_dest
= gimple_assign_lhs (stmt
);
6116 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6118 /* Most operations cannot handle bit-precision types without extra
6120 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
6122 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
6123 /* Exception are bitwise binary operations. */
6124 && code
!= BIT_IOR_EXPR
6125 && code
!= BIT_XOR_EXPR
6126 && code
!= BIT_AND_EXPR
)
6128 if (dump_enabled_p ())
6129 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6130 "bit-precision arithmetic not supported.\n");
6135 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6136 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6138 if (dump_enabled_p ())
6139 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6140 "use not simple.\n");
6143 /* If op0 is an external or constant def, infer the vector type
6144 from the scalar type. */
6147 /* For boolean type we cannot determine vectype by
6148 invariant value (don't know whether it is a vector
6149 of booleans or vector of integers). We use output
6150 vectype because operations on boolean don't change
6152 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6154 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6156 if (dump_enabled_p ())
6157 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6158 "not supported operation on bool value.\n");
6161 vectype
= vectype_out
;
6164 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6168 gcc_assert (vectype
);
6171 if (dump_enabled_p ())
6172 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6173 "no vectype for scalar type %T\n",
6179 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6180 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6181 if (maybe_ne (nunits_out
, nunits_in
))
6184 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6185 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6186 if (op_type
== binary_op
|| op_type
== ternary_op
)
6188 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6189 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6191 if (dump_enabled_p ())
6192 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6193 "use not simple.\n");
6197 && maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype2
)))
6200 if (op_type
== ternary_op
)
6202 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6203 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6205 if (dump_enabled_p ())
6206 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6207 "use not simple.\n");
6211 && maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype3
)))
6215 /* Multiple types in SLP are handled by creating the appropriate number of
6216 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6221 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6225 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6229 gcc_assert (ncopies
>= 1);
6231 /* Reject attempts to combine mask types with nonmask types, e.g. if
6232 we have an AND between a (nonmask) boolean loaded from memory and
6233 a (mask) boolean result of a comparison.
6235 TODO: We could easily fix these cases up using pattern statements. */
6236 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6237 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6238 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6240 if (dump_enabled_p ())
6241 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6242 "mixed mask and nonmask vector types\n");
6246 /* Supportable by target? */
6248 vec_mode
= TYPE_MODE (vectype
);
6249 if (code
== MULT_HIGHPART_EXPR
)
6250 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6253 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6256 if (dump_enabled_p ())
6257 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6261 target_support_p
= (optab_handler (optab
, vec_mode
)
6262 != CODE_FOR_nothing
);
6264 if (!target_support_p
6266 && (cst
= uniform_integer_cst_p (op1
)))
6268 = targetm
.vectorize
.can_special_div_by_const (code
, vectype
,
6274 bool using_emulated_vectors_p
= vect_emulated_vector_p (vectype
);
6275 if (!target_support_p
)
6277 if (dump_enabled_p ())
6278 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6279 "op not supported by target.\n");
6280 /* Check only during analysis. */
6281 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6282 || (!vec_stmt
&& !vect_can_vectorize_without_simd_p (code
)))
6284 if (dump_enabled_p ())
6285 dump_printf_loc (MSG_NOTE
, vect_location
,
6286 "proceeding using word mode.\n");
6287 using_emulated_vectors_p
= true;
6290 if (using_emulated_vectors_p
6291 && !vect_can_vectorize_without_simd_p (code
))
6293 if (dump_enabled_p ())
6294 dump_printf (MSG_NOTE
, "using word mode not possible.\n");
6298 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6299 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6300 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6302 if (!vec_stmt
) /* transformation not required. */
6304 /* If this operation is part of a reduction, a fully-masked loop
6305 should only change the active lanes of the reduction chain,
6306 keeping the inactive lanes as-is. */
6308 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
6311 if (cond_fn
== IFN_LAST
6312 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
6313 OPTIMIZE_FOR_SPEED
))
6315 if (dump_enabled_p ())
6316 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6317 "can't use a fully-masked loop because no"
6318 " conditional operation is available.\n");
6319 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
6322 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6326 /* Put types on constant and invariant SLP children. */
6328 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6329 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6330 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6332 if (dump_enabled_p ())
6333 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6334 "incompatible vector types for invariants\n");
6338 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6339 DUMP_VECT_SCOPE ("vectorizable_operation");
6340 vect_model_simple_cost (vinfo
, stmt_info
,
6341 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6342 if (using_emulated_vectors_p
)
6344 /* The above vect_model_simple_cost call handles constants
6345 in the prologue and (mis-)costs one of the stmts as
6346 vector stmt. See tree-vect-generic.cc:do_plus_minus/do_negate
6347 for the actual lowering that will be applied. */
6349 = slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
;
6363 record_stmt_cost (cost_vec
, n
, scalar_stmt
, stmt_info
, 0, vect_body
);
6370 if (dump_enabled_p ())
6371 dump_printf_loc (MSG_NOTE
, vect_location
,
6372 "transform binary/unary operation.\n");
6374 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6376 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6377 vectors with unsigned elements, but the result is signed. So, we
6378 need to compute the MINUS_EXPR into vectype temporary and
6379 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6380 tree vec_cvt_dest
= NULL_TREE
;
6381 if (orig_code
== POINTER_DIFF_EXPR
)
6383 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6384 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6388 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6390 /* In case the vectorization factor (VF) is bigger than the number
6391 of elements that we can fit in a vectype (nunits), we have to generate
6392 more than one vector stmt - i.e - we need to "unroll" the
6393 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6394 from one copy of the vector stmt to the next, in the field
6395 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6396 stages to find the correct vector defs to be used when vectorizing
6397 stmts that use the defs of the current stmt. The example below
6398 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6399 we need to create 4 vectorized stmts):
6401 before vectorization:
6402 RELATED_STMT VEC_STMT
6406 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6408 RELATED_STMT VEC_STMT
6409 VS1_0: vx0 = memref0 VS1_1 -
6410 VS1_1: vx1 = memref1 VS1_2 -
6411 VS1_2: vx2 = memref2 VS1_3 -
6412 VS1_3: vx3 = memref3 - -
6413 S1: x = load - VS1_0
6416 step2: vectorize stmt S2 (done here):
6417 To vectorize stmt S2 we first need to find the relevant vector
6418 def for the first operand 'x'. This is, as usual, obtained from
6419 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6420 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6421 relevant vector def 'vx0'. Having found 'vx0' we can generate
6422 the vector stmt VS2_0, and as usual, record it in the
6423 STMT_VINFO_VEC_STMT of stmt S2.
6424 When creating the second copy (VS2_1), we obtain the relevant vector
6425 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6426 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6427 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6428 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6429 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6430 chain of stmts and pointers:
6431 RELATED_STMT VEC_STMT
6432 VS1_0: vx0 = memref0 VS1_1 -
6433 VS1_1: vx1 = memref1 VS1_2 -
6434 VS1_2: vx2 = memref2 VS1_3 -
6435 VS1_3: vx3 = memref3 - -
6436 S1: x = load - VS1_0
6437 VS2_0: vz0 = vx0 + v1 VS2_1 -
6438 VS2_1: vz1 = vx1 + v1 VS2_2 -
6439 VS2_2: vz2 = vx2 + v1 VS2_3 -
6440 VS2_3: vz3 = vx3 + v1 - -
6441 S2: z = x + 1 - VS2_0 */
6443 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6444 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
6445 /* Arguments are ready. Create the new vector stmt. */
6446 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6448 gimple
*new_stmt
= NULL
;
6449 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6450 ? vec_oprnds1
[i
] : NULL_TREE
);
6451 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
6452 if (masked_loop_p
&& reduc_idx
>= 0)
6454 /* Perform the operation on active elements only and take
6455 inactive elements from the reduction chain input. */
6457 vop2
= reduc_idx
== 1 ? vop1
: vop0
;
6458 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6460 gcall
*call
= gimple_build_call_internal (cond_fn
, 4, mask
,
6462 new_temp
= make_ssa_name (vec_dest
, call
);
6463 gimple_call_set_lhs (call
, new_temp
);
6464 gimple_call_set_nothrow (call
, true);
6465 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
6470 tree mask
= NULL_TREE
;
6471 /* When combining two masks check if either of them is elsewhere
6472 combined with a loop mask, if that's the case we can mark that the
6473 new combined mask doesn't need to be combined with a loop mask. */
6475 && code
== BIT_AND_EXPR
6476 && VECTOR_BOOLEAN_TYPE_P (vectype
))
6478 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op0
,
6481 mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6484 vop0
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
6488 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op1
,
6491 mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6494 vop1
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
6499 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
6500 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6501 gimple_assign_set_lhs (new_stmt
, new_temp
);
6502 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6503 if (using_emulated_vectors_p
)
6504 suppress_warning (new_stmt
, OPT_Wvector_operation_performance
);
6506 /* Enter the combined value into the vector cond hash so we don't
6507 AND it with a loop mask again. */
6509 loop_vinfo
->vec_cond_masked_set
.add ({ new_temp
, mask
});
6513 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6514 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6516 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6517 gimple_assign_set_lhs (new_stmt
, new_temp
);
6518 vect_finish_stmt_generation (vinfo
, stmt_info
,
6523 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6525 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6529 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6531 vec_oprnds0
.release ();
6532 vec_oprnds1
.release ();
6533 vec_oprnds2
.release ();
6538 /* A helper function to ensure data reference DR_INFO's base alignment. */
6541 ensure_base_align (dr_vec_info
*dr_info
)
6543 /* Alignment is only analyzed for the first element of a DR group,
6544 use that to look at base alignment we need to enforce. */
6545 if (STMT_VINFO_GROUPED_ACCESS (dr_info
->stmt
))
6546 dr_info
= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info
->stmt
));
6548 gcc_assert (dr_info
->misalignment
!= DR_MISALIGNMENT_UNINITIALIZED
);
6550 if (dr_info
->base_misaligned
)
6552 tree base_decl
= dr_info
->base_decl
;
6554 // We should only be able to increase the alignment of a base object if
6555 // we know what its new alignment should be at compile time.
6556 unsigned HOST_WIDE_INT align_base_to
=
6557 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6559 if (decl_in_symtab_p (base_decl
))
6560 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6561 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6563 SET_DECL_ALIGN (base_decl
, align_base_to
);
6564 DECL_USER_ALIGN (base_decl
) = 1;
6566 dr_info
->base_misaligned
= false;
6571 /* Function get_group_alias_ptr_type.
6573 Return the alias type for the group starting at FIRST_STMT_INFO. */
6576 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6578 struct data_reference
*first_dr
, *next_dr
;
6580 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6581 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6582 while (next_stmt_info
)
6584 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6585 if (get_alias_set (DR_REF (first_dr
))
6586 != get_alias_set (DR_REF (next_dr
)))
6588 if (dump_enabled_p ())
6589 dump_printf_loc (MSG_NOTE
, vect_location
,
6590 "conflicting alias set types.\n");
6591 return ptr_type_node
;
6593 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6595 return reference_alias_ptr_type (DR_REF (first_dr
));
6599 /* Function scan_operand_equal_p.
6601 Helper function for check_scan_store. Compare two references
6602 with .GOMP_SIMD_LANE bases. */
6605 scan_operand_equal_p (tree ref1
, tree ref2
)
6607 tree ref
[2] = { ref1
, ref2
};
6608 poly_int64 bitsize
[2], bitpos
[2];
6609 tree offset
[2], base
[2];
6610 for (int i
= 0; i
< 2; ++i
)
6613 int unsignedp
, reversep
, volatilep
= 0;
6614 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6615 &offset
[i
], &mode
, &unsignedp
,
6616 &reversep
, &volatilep
);
6617 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6619 if (TREE_CODE (base
[i
]) == MEM_REF
6620 && offset
[i
] == NULL_TREE
6621 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6623 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6624 if (is_gimple_assign (def_stmt
)
6625 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6626 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6627 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6629 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6631 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6632 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6637 if (!operand_equal_p (base
[0], base
[1], 0))
6639 if (maybe_ne (bitsize
[0], bitsize
[1]))
6641 if (offset
[0] != offset
[1])
6643 if (!offset
[0] || !offset
[1])
6645 if (!operand_equal_p (offset
[0], offset
[1], 0))
6648 for (int i
= 0; i
< 2; ++i
)
6650 step
[i
] = integer_one_node
;
6651 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6653 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6654 if (is_gimple_assign (def_stmt
)
6655 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6656 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6659 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6660 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6663 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6665 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6666 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6668 tree rhs1
= NULL_TREE
;
6669 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6671 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6672 if (gimple_assign_cast_p (def_stmt
))
6673 rhs1
= gimple_assign_rhs1 (def_stmt
);
6675 else if (CONVERT_EXPR_P (offset
[i
]))
6676 rhs1
= TREE_OPERAND (offset
[i
], 0);
6678 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6679 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6680 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6681 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6684 if (!operand_equal_p (offset
[0], offset
[1], 0)
6685 || !operand_equal_p (step
[0], step
[1], 0))
6693 enum scan_store_kind
{
6694 /* Normal permutation. */
6695 scan_store_kind_perm
,
6697 /* Whole vector left shift permutation with zero init. */
6698 scan_store_kind_lshift_zero
,
6700 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6701 scan_store_kind_lshift_cond
6704 /* Function check_scan_store.
6706 Verify if we can perform the needed permutations or whole vector shifts.
6707 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6708 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6709 to do at each step. */
6712 scan_store_can_perm_p (tree vectype
, tree init
,
6713 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6715 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6716 unsigned HOST_WIDE_INT nunits
;
6717 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6719 int units_log2
= exact_log2 (nunits
);
6720 if (units_log2
<= 0)
6724 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6725 for (i
= 0; i
<= units_log2
; ++i
)
6727 unsigned HOST_WIDE_INT j
, k
;
6728 enum scan_store_kind kind
= scan_store_kind_perm
;
6729 vec_perm_builder
sel (nunits
, nunits
, 1);
6730 sel
.quick_grow (nunits
);
6731 if (i
== units_log2
)
6733 for (j
= 0; j
< nunits
; ++j
)
6734 sel
[j
] = nunits
- 1;
6738 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6740 for (k
= 0; j
< nunits
; ++j
, ++k
)
6741 sel
[j
] = nunits
+ k
;
6743 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6744 if (!can_vec_perm_const_p (vec_mode
, vec_mode
, indices
))
6746 if (i
== units_log2
)
6749 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6751 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6753 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6754 /* Whole vector shifts shift in zeros, so if init is all zero
6755 constant, there is no need to do anything further. */
6756 if ((TREE_CODE (init
) != INTEGER_CST
6757 && TREE_CODE (init
) != REAL_CST
)
6758 || !initializer_zerop (init
))
6760 tree masktype
= truth_type_for (vectype
);
6761 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6763 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6766 kind
= whole_vector_shift_kind
;
6768 if (use_whole_vector
)
6770 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6771 use_whole_vector
->safe_grow_cleared (i
, true);
6772 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6773 use_whole_vector
->safe_push (kind
);
6781 /* Function check_scan_store.
6783 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6786 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
6787 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6788 vect_memory_access_type memory_access_type
)
6790 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6791 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6794 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6797 || memory_access_type
!= VMAT_CONTIGUOUS
6798 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6799 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6800 || loop_vinfo
== NULL
6801 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6802 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6803 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
6804 || !integer_zerop (DR_INIT (dr_info
->dr
))
6805 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6806 || !alias_sets_conflict_p (get_alias_set (vectype
),
6807 get_alias_set (TREE_TYPE (ref_type
))))
6809 if (dump_enabled_p ())
6810 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6811 "unsupported OpenMP scan store.\n");
6815 /* We need to pattern match code built by OpenMP lowering and simplified
6816 by following optimizations into something we can handle.
6817 #pragma omp simd reduction(inscan,+:r)
6821 #pragma omp scan inclusive (r)
6824 shall have body with:
6825 // Initialization for input phase, store the reduction initializer:
6826 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6827 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6829 // Actual input phase:
6831 r.0_5 = D.2042[_20];
6834 // Initialization for scan phase:
6835 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6841 // Actual scan phase:
6843 r.1_8 = D.2042[_20];
6845 The "omp simd array" variable D.2042 holds the privatized copy used
6846 inside of the loop and D.2043 is another one that holds copies of
6847 the current original list item. The separate GOMP_SIMD_LANE ifn
6848 kinds are there in order to allow optimizing the initializer store
6849 and combiner sequence, e.g. if it is originally some C++ish user
6850 defined reduction, but allow the vectorizer to pattern recognize it
6851 and turn into the appropriate vectorized scan.
6853 For exclusive scan, this is slightly different:
6854 #pragma omp simd reduction(inscan,+:r)
6858 #pragma omp scan exclusive (r)
6861 shall have body with:
6862 // Initialization for input phase, store the reduction initializer:
6863 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6864 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6866 // Actual input phase:
6868 r.0_5 = D.2042[_20];
6871 // Initialization for scan phase:
6872 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6878 // Actual scan phase:
6880 r.1_8 = D.2044[_20];
6883 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6885 /* Match the D.2042[_21] = 0; store above. Just require that
6886 it is a constant or external definition store. */
6887 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6890 if (dump_enabled_p ())
6891 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6892 "unsupported OpenMP scan initializer store.\n");
6896 if (! loop_vinfo
->scan_map
)
6897 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6898 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6899 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6902 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6904 /* These stores can be vectorized normally. */
6908 if (rhs_dt
!= vect_internal_def
)
6911 if (dump_enabled_p ())
6912 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6913 "unsupported OpenMP scan combiner pattern.\n");
6917 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6918 tree rhs
= gimple_assign_rhs1 (stmt
);
6919 if (TREE_CODE (rhs
) != SSA_NAME
)
6922 gimple
*other_store_stmt
= NULL
;
6923 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6924 bool inscan_var_store
6925 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6927 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6929 if (!inscan_var_store
)
6931 use_operand_p use_p
;
6932 imm_use_iterator iter
;
6933 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6935 gimple
*use_stmt
= USE_STMT (use_p
);
6936 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6938 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6939 || !is_gimple_assign (use_stmt
)
6940 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6942 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6944 other_store_stmt
= use_stmt
;
6946 if (other_store_stmt
== NULL
)
6948 rhs
= gimple_assign_lhs (other_store_stmt
);
6949 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6953 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6955 use_operand_p use_p
;
6956 imm_use_iterator iter
;
6957 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6959 gimple
*use_stmt
= USE_STMT (use_p
);
6960 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6962 if (other_store_stmt
)
6964 other_store_stmt
= use_stmt
;
6970 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6971 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6972 || !is_gimple_assign (def_stmt
)
6973 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6976 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6977 /* For pointer addition, we should use the normal plus for the vector
6981 case POINTER_PLUS_EXPR
:
6984 case MULT_HIGHPART_EXPR
:
6989 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6992 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6993 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6994 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6997 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6998 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6999 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
7000 || !gimple_assign_load_p (load1_stmt
)
7001 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
7002 || !gimple_assign_load_p (load2_stmt
))
7005 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7006 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7007 if (load1_stmt_info
== NULL
7008 || load2_stmt_info
== NULL
7009 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
7010 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
7011 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
7012 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7015 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
7017 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7018 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
7019 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
7021 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7023 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7027 use_operand_p use_p
;
7028 imm_use_iterator iter
;
7029 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
7031 gimple
*use_stmt
= USE_STMT (use_p
);
7032 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
7034 if (other_store_stmt
)
7036 other_store_stmt
= use_stmt
;
7040 if (other_store_stmt
== NULL
)
7042 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
7043 || !gimple_store_p (other_store_stmt
))
7046 stmt_vec_info other_store_stmt_info
7047 = loop_vinfo
->lookup_stmt (other_store_stmt
);
7048 if (other_store_stmt_info
== NULL
7049 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
7050 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7053 gimple
*stmt1
= stmt
;
7054 gimple
*stmt2
= other_store_stmt
;
7055 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7056 std::swap (stmt1
, stmt2
);
7057 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7058 gimple_assign_rhs1 (load2_stmt
)))
7060 std::swap (rhs1
, rhs2
);
7061 std::swap (load1_stmt
, load2_stmt
);
7062 std::swap (load1_stmt_info
, load2_stmt_info
);
7064 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7065 gimple_assign_rhs1 (load1_stmt
)))
7068 tree var3
= NULL_TREE
;
7069 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
7070 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
7071 gimple_assign_rhs1 (load2_stmt
)))
7073 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7075 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7076 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
7077 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
7079 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7080 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
7081 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
7082 || lookup_attribute ("omp simd inscan exclusive",
7083 DECL_ATTRIBUTES (var3
)))
7087 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
7088 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
7089 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
7092 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7093 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
7094 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
7095 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
7096 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7097 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
7100 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7101 std::swap (var1
, var2
);
7103 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7105 if (!lookup_attribute ("omp simd inscan exclusive",
7106 DECL_ATTRIBUTES (var1
)))
7111 if (loop_vinfo
->scan_map
== NULL
)
7113 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7117 /* The IL is as expected, now check if we can actually vectorize it.
7124 should be vectorized as (where _40 is the vectorized rhs
7125 from the D.2042[_21] = 0; store):
7126 _30 = MEM <vector(8) int> [(int *)&D.2043];
7127 _31 = MEM <vector(8) int> [(int *)&D.2042];
7128 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7130 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7131 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7133 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7134 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7135 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7137 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7138 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7140 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7141 MEM <vector(8) int> [(int *)&D.2043] = _39;
7142 MEM <vector(8) int> [(int *)&D.2042] = _38;
7149 should be vectorized as (where _40 is the vectorized rhs
7150 from the D.2042[_21] = 0; store):
7151 _30 = MEM <vector(8) int> [(int *)&D.2043];
7152 _31 = MEM <vector(8) int> [(int *)&D.2042];
7153 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7154 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7156 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7157 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7158 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7160 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7161 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7162 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7164 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7165 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7168 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7169 MEM <vector(8) int> [(int *)&D.2044] = _39;
7170 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7171 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7172 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7173 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7176 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7177 if (units_log2
== -1)
7184 /* Function vectorizable_scan_store.
7186 Helper of vectorizable_score, arguments like on vectorizable_store.
7187 Handle only the transformation, checking is done in check_scan_store. */
7190 vectorizable_scan_store (vec_info
*vinfo
,
7191 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7192 gimple
**vec_stmt
, int ncopies
)
7194 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7195 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7196 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7197 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7199 if (dump_enabled_p ())
7200 dump_printf_loc (MSG_NOTE
, vect_location
,
7201 "transform scan store. ncopies = %d\n", ncopies
);
7203 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7204 tree rhs
= gimple_assign_rhs1 (stmt
);
7205 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7207 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7208 bool inscan_var_store
7209 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7211 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7213 use_operand_p use_p
;
7214 imm_use_iterator iter
;
7215 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7217 gimple
*use_stmt
= USE_STMT (use_p
);
7218 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7220 rhs
= gimple_assign_lhs (use_stmt
);
7225 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7226 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7227 if (code
== POINTER_PLUS_EXPR
)
7229 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7230 && commutative_tree_code (code
));
7231 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7232 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7233 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7234 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7235 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7236 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7237 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7238 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7239 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7240 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7241 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7243 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7245 std::swap (rhs1
, rhs2
);
7246 std::swap (var1
, var2
);
7247 std::swap (load1_dr_info
, load2_dr_info
);
7250 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7253 unsigned HOST_WIDE_INT nunits
;
7254 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7256 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7257 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7258 gcc_assert (units_log2
> 0);
7259 auto_vec
<tree
, 16> perms
;
7260 perms
.quick_grow (units_log2
+ 1);
7261 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7262 for (int i
= 0; i
<= units_log2
; ++i
)
7264 unsigned HOST_WIDE_INT j
, k
;
7265 vec_perm_builder
sel (nunits
, nunits
, 1);
7266 sel
.quick_grow (nunits
);
7267 if (i
== units_log2
)
7268 for (j
= 0; j
< nunits
; ++j
)
7269 sel
[j
] = nunits
- 1;
7272 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7274 for (k
= 0; j
< nunits
; ++j
, ++k
)
7275 sel
[j
] = nunits
+ k
;
7277 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7278 if (!use_whole_vector
.is_empty ()
7279 && use_whole_vector
[i
] != scan_store_kind_perm
)
7281 if (zero_vec
== NULL_TREE
)
7282 zero_vec
= build_zero_cst (vectype
);
7283 if (masktype
== NULL_TREE
7284 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7285 masktype
= truth_type_for (vectype
);
7286 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7289 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7292 tree vec_oprnd1
= NULL_TREE
;
7293 tree vec_oprnd2
= NULL_TREE
;
7294 tree vec_oprnd3
= NULL_TREE
;
7295 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7296 tree dataref_offset
= build_int_cst (ref_type
, 0);
7297 tree bump
= vect_get_data_ptr_increment (vinfo
, dr_info
,
7298 vectype
, VMAT_CONTIGUOUS
);
7299 tree ldataref_ptr
= NULL_TREE
;
7300 tree orig
= NULL_TREE
;
7301 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7302 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7303 auto_vec
<tree
> vec_oprnds1
;
7304 auto_vec
<tree
> vec_oprnds2
;
7305 auto_vec
<tree
> vec_oprnds3
;
7306 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
7307 *init
, &vec_oprnds1
,
7308 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
7309 rhs2
, &vec_oprnds3
);
7310 for (int j
= 0; j
< ncopies
; j
++)
7312 vec_oprnd1
= vec_oprnds1
[j
];
7313 if (ldataref_ptr
== NULL
)
7314 vec_oprnd2
= vec_oprnds2
[j
];
7315 vec_oprnd3
= vec_oprnds3
[j
];
7318 else if (!inscan_var_store
)
7319 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7323 vec_oprnd2
= make_ssa_name (vectype
);
7324 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7325 unshare_expr (ldataref_ptr
),
7327 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7328 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7329 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7330 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7331 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7334 tree v
= vec_oprnd2
;
7335 for (int i
= 0; i
< units_log2
; ++i
)
7337 tree new_temp
= make_ssa_name (vectype
);
7338 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7340 && (use_whole_vector
[i
]
7341 != scan_store_kind_perm
))
7342 ? zero_vec
: vec_oprnd1
, v
,
7344 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7345 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7346 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7348 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7350 /* Whole vector shift shifted in zero bits, but if *init
7351 is not initializer_zerop, we need to replace those elements
7352 with elements from vec_oprnd1. */
7353 tree_vector_builder
vb (masktype
, nunits
, 1);
7354 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7355 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7356 ? boolean_false_node
: boolean_true_node
);
7358 tree new_temp2
= make_ssa_name (vectype
);
7359 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7360 new_temp
, vec_oprnd1
);
7361 vect_finish_stmt_generation (vinfo
, stmt_info
,
7363 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7364 new_temp
= new_temp2
;
7367 /* For exclusive scan, perform the perms[i] permutation once
7370 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7378 tree new_temp2
= make_ssa_name (vectype
);
7379 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7380 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7381 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7386 tree new_temp
= make_ssa_name (vectype
);
7387 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7388 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7389 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7391 tree last_perm_arg
= new_temp
;
7392 /* For exclusive scan, new_temp computed above is the exclusive scan
7393 prefix sum. Turn it into inclusive prefix sum for the broadcast
7394 of the last element into orig. */
7395 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7397 last_perm_arg
= make_ssa_name (vectype
);
7398 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7399 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7400 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7403 orig
= make_ssa_name (vectype
);
7404 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7405 last_perm_arg
, perms
[units_log2
]);
7406 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7407 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7409 if (!inscan_var_store
)
7411 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7412 unshare_expr (dataref_ptr
),
7414 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7415 g
= gimple_build_assign (data_ref
, new_temp
);
7416 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7417 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7421 if (inscan_var_store
)
7422 for (int j
= 0; j
< ncopies
; j
++)
7425 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7427 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7428 unshare_expr (dataref_ptr
),
7430 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7431 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7432 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7433 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7439 /* Function vectorizable_store.
7441 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7442 that can be vectorized.
7443 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7444 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7445 Return true if STMT_INFO is vectorizable in this way. */
7448 vectorizable_store (vec_info
*vinfo
,
7449 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7450 gimple
**vec_stmt
, slp_tree slp_node
,
7451 stmt_vector_for_cost
*cost_vec
)
7455 tree vec_oprnd
= NULL_TREE
;
7457 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7458 class loop
*loop
= NULL
;
7459 machine_mode vec_mode
;
7461 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7462 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7463 tree dataref_ptr
= NULL_TREE
;
7464 tree dataref_offset
= NULL_TREE
;
7465 gimple
*ptr_incr
= NULL
;
7468 stmt_vec_info first_stmt_info
;
7470 unsigned int group_size
, i
;
7471 vec
<tree
> oprnds
= vNULL
;
7472 vec
<tree
> result_chain
= vNULL
;
7473 vec
<tree
> vec_oprnds
= vNULL
;
7474 bool slp
= (slp_node
!= NULL
);
7475 unsigned int vec_num
;
7476 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
7478 gather_scatter_info gs_info
;
7480 vec_load_store_type vls_type
;
7483 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7486 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7490 /* Is vectorizable store? */
7492 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7493 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7495 tree scalar_dest
= gimple_assign_lhs (assign
);
7496 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7497 && is_pattern_stmt_p (stmt_info
))
7498 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7499 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7500 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7501 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7502 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7503 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7504 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7505 && TREE_CODE (scalar_dest
) != MEM_REF
)
7510 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7511 if (!call
|| !gimple_call_internal_p (call
))
7514 internal_fn ifn
= gimple_call_internal_fn (call
);
7515 if (!internal_store_fn_p (ifn
))
7518 if (slp_node
!= NULL
)
7520 if (dump_enabled_p ())
7521 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7522 "SLP of masked stores not supported.\n");
7526 int mask_index
= internal_fn_mask_index (ifn
);
7528 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
7529 &mask
, NULL
, &mask_dt
, &mask_vectype
))
7533 op
= vect_get_store_rhs (stmt_info
);
7535 /* Cannot have hybrid store SLP -- that would mean storing to the
7536 same location twice. */
7537 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7539 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7540 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7544 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7545 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7550 /* Multiple types in SLP are handled by creating the appropriate number of
7551 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7556 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7558 gcc_assert (ncopies
>= 1);
7560 /* FORNOW. This restriction should be relaxed. */
7561 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7563 if (dump_enabled_p ())
7564 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7565 "multiple types in nested loop.\n");
7569 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
7570 op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7573 elem_type
= TREE_TYPE (vectype
);
7574 vec_mode
= TYPE_MODE (vectype
);
7576 if (!STMT_VINFO_DATA_REF (stmt_info
))
7579 vect_memory_access_type memory_access_type
;
7580 enum dr_alignment_support alignment_support_scheme
;
7583 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, vls_type
,
7584 ncopies
, &memory_access_type
, &poffset
,
7585 &alignment_support_scheme
, &misalignment
, &gs_info
))
7590 if (memory_access_type
== VMAT_CONTIGUOUS
)
7592 if (!VECTOR_MODE_P (vec_mode
)
7593 || !can_vec_mask_load_store_p (vec_mode
,
7594 TYPE_MODE (mask_vectype
), false))
7597 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7598 && (memory_access_type
!= VMAT_GATHER_SCATTER
7599 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7601 if (dump_enabled_p ())
7602 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7603 "unsupported access type for masked store.\n");
7609 /* FORNOW. In some cases can vectorize even if data-type not supported
7610 (e.g. - array initialization with 0). */
7611 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7615 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7616 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7617 && memory_access_type
!= VMAT_GATHER_SCATTER
7618 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7621 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7622 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7623 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7627 first_stmt_info
= stmt_info
;
7628 first_dr_info
= dr_info
;
7629 group_size
= vec_num
= 1;
7632 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7634 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7635 memory_access_type
))
7639 if (!vec_stmt
) /* transformation not required. */
7641 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7644 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
7645 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
7646 vls_type
, group_size
,
7647 memory_access_type
, &gs_info
,
7651 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
7654 if (dump_enabled_p ())
7655 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7656 "incompatible vector types for invariants\n");
7660 if (dump_enabled_p ()
7661 && memory_access_type
!= VMAT_ELEMENTWISE
7662 && memory_access_type
!= VMAT_GATHER_SCATTER
7663 && alignment_support_scheme
!= dr_aligned
)
7664 dump_printf_loc (MSG_NOTE
, vect_location
,
7665 "Vectorizing an unaligned access.\n");
7667 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7668 vect_model_store_cost (vinfo
, stmt_info
, ncopies
,
7669 memory_access_type
, alignment_support_scheme
,
7670 misalignment
, vls_type
, slp_node
, cost_vec
);
7673 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7677 ensure_base_align (dr_info
);
7679 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7681 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7682 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7683 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7684 tree ptr
, var
, scale
, vec_mask
;
7685 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7686 tree mask_halfvectype
= mask_vectype
;
7687 edge pe
= loop_preheader_edge (loop
);
7690 enum { NARROW
, NONE
, WIDEN
} modifier
;
7691 poly_uint64 scatter_off_nunits
7692 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7694 if (known_eq (nunits
, scatter_off_nunits
))
7696 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7700 /* Currently gathers and scatters are only supported for
7701 fixed-length vectors. */
7702 unsigned int count
= scatter_off_nunits
.to_constant ();
7703 vec_perm_builder
sel (count
, count
, 1);
7704 for (i
= 0; i
< (unsigned int) count
; ++i
)
7705 sel
.quick_push (i
| (count
/ 2));
7707 vec_perm_indices
indices (sel
, 1, count
);
7708 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7710 gcc_assert (perm_mask
!= NULL_TREE
);
7712 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7716 /* Currently gathers and scatters are only supported for
7717 fixed-length vectors. */
7718 unsigned int count
= nunits
.to_constant ();
7719 vec_perm_builder
sel (count
, count
, 1);
7720 for (i
= 0; i
< (unsigned int) count
; ++i
)
7721 sel
.quick_push (i
| (count
/ 2));
7723 vec_perm_indices
indices (sel
, 2, count
);
7724 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7725 gcc_assert (perm_mask
!= NULL_TREE
);
7729 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7734 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7735 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7736 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7737 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7738 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7739 scaletype
= TREE_VALUE (arglist
);
7741 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7742 && TREE_CODE (rettype
) == VOID_TYPE
);
7744 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7745 if (!is_gimple_min_invariant (ptr
))
7747 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7748 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7749 gcc_assert (!new_bb
);
7752 if (mask
== NULL_TREE
)
7754 mask_arg
= build_int_cst (masktype
, -1);
7755 mask_arg
= vect_init_vector (vinfo
, stmt_info
,
7756 mask_arg
, masktype
, NULL
);
7759 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7761 auto_vec
<tree
> vec_oprnds0
;
7762 auto_vec
<tree
> vec_oprnds1
;
7763 auto_vec
<tree
> vec_masks
;
7766 tree mask_vectype
= truth_type_for (vectype
);
7767 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7769 ? ncopies
/ 2 : ncopies
,
7770 mask
, &vec_masks
, mask_vectype
);
7772 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7774 ? ncopies
/ 2 : ncopies
,
7775 gs_info
.offset
, &vec_oprnds0
);
7776 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7778 ? ncopies
/ 2 : ncopies
,
7780 for (j
= 0; j
< ncopies
; ++j
)
7782 if (modifier
== WIDEN
)
7785 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
7786 perm_mask
, stmt_info
, gsi
);
7788 op
= vec_oprnd0
= vec_oprnds0
[j
/ 2];
7789 src
= vec_oprnd1
= vec_oprnds1
[j
];
7791 mask_op
= vec_mask
= vec_masks
[j
];
7793 else if (modifier
== NARROW
)
7796 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
7797 perm_mask
, stmt_info
, gsi
);
7799 src
= vec_oprnd1
= vec_oprnds1
[j
/ 2];
7800 op
= vec_oprnd0
= vec_oprnds0
[j
];
7802 mask_op
= vec_mask
= vec_masks
[j
/ 2];
7806 op
= vec_oprnd0
= vec_oprnds0
[j
];
7807 src
= vec_oprnd1
= vec_oprnds1
[j
];
7809 mask_op
= vec_mask
= vec_masks
[j
];
7812 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7814 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7815 TYPE_VECTOR_SUBPARTS (srctype
)));
7816 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7817 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7819 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7820 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7824 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7826 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7827 TYPE_VECTOR_SUBPARTS (idxtype
)));
7828 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7829 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7831 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7832 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7840 if (modifier
== NARROW
)
7842 var
= vect_get_new_ssa_name (mask_halfvectype
,
7845 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7846 : VEC_UNPACK_LO_EXPR
,
7848 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7851 tree optype
= TREE_TYPE (mask_arg
);
7852 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7855 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7856 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7857 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7859 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7860 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7862 if (!useless_type_conversion_p (masktype
, utype
))
7864 gcc_assert (TYPE_PRECISION (utype
)
7865 <= TYPE_PRECISION (masktype
));
7866 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7867 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7868 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7874 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7875 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7877 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
7879 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7882 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7883 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
7885 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7886 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7891 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7893 /* We vectorize all the stmts of the interleaving group when we
7894 reach the last stmt in the group. */
7895 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7896 < DR_GROUP_SIZE (first_stmt_info
)
7905 grouped_store
= false;
7906 /* VEC_NUM is the number of vect stmts to be created for this
7908 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7909 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7910 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7911 == first_stmt_info
);
7912 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7913 op
= vect_get_store_rhs (first_stmt_info
);
7916 /* VEC_NUM is the number of vect stmts to be created for this
7918 vec_num
= group_size
;
7920 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7923 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7925 if (dump_enabled_p ())
7926 dump_printf_loc (MSG_NOTE
, vect_location
,
7927 "transform store. ncopies = %d\n", ncopies
);
7929 if (memory_access_type
== VMAT_ELEMENTWISE
7930 || memory_access_type
== VMAT_STRIDED_SLP
)
7932 gimple_stmt_iterator incr_gsi
;
7938 tree stride_base
, stride_step
, alias_off
;
7942 /* Checked by get_load_store_type. */
7943 unsigned int const_nunits
= nunits
.to_constant ();
7945 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7946 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7948 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
7950 = fold_build_pointer_plus
7951 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7952 size_binop (PLUS_EXPR
,
7953 convert_to_ptrofftype (dr_offset
),
7954 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7955 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7957 /* For a store with loop-invariant (but other than power-of-2)
7958 stride (i.e. not a grouped access) like so:
7960 for (i = 0; i < n; i += stride)
7963 we generate a new induction variable and new stores from
7964 the components of the (vectorized) rhs:
7966 for (j = 0; ; j += VF*stride)
7971 array[j + stride] = tmp2;
7975 unsigned nstores
= const_nunits
;
7977 tree ltype
= elem_type
;
7978 tree lvectype
= vectype
;
7981 if (group_size
< const_nunits
7982 && const_nunits
% group_size
== 0)
7984 nstores
= const_nunits
/ group_size
;
7986 ltype
= build_vector_type (elem_type
, group_size
);
7989 /* First check if vec_extract optab doesn't support extraction
7990 of vector elts directly. */
7991 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7993 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7994 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
7995 group_size
).exists (&vmode
)
7996 || (convert_optab_handler (vec_extract_optab
,
7997 TYPE_MODE (vectype
), vmode
)
7998 == CODE_FOR_nothing
))
8000 /* Try to avoid emitting an extract of vector elements
8001 by performing the extracts using an integer type of the
8002 same size, extracting from a vector of those and then
8003 re-interpreting it as the original vector type if
8006 = group_size
* GET_MODE_BITSIZE (elmode
);
8007 unsigned int lnunits
= const_nunits
/ group_size
;
8008 /* If we can't construct such a vector fall back to
8009 element extracts from the original vector type and
8010 element size stores. */
8011 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
8012 && VECTOR_MODE_P (TYPE_MODE (vectype
))
8013 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
8014 lnunits
).exists (&vmode
)
8015 && (convert_optab_handler (vec_extract_optab
,
8017 != CODE_FOR_nothing
))
8021 ltype
= build_nonstandard_integer_type (lsize
, 1);
8022 lvectype
= build_vector_type (ltype
, nstores
);
8024 /* Else fall back to vector extraction anyway.
8025 Fewer stores are more important than avoiding spilling
8026 of the vector we extract from. Compared to the
8027 construction case in vectorizable_load no store-forwarding
8028 issue exists here for reasonable archs. */
8031 else if (group_size
>= const_nunits
8032 && group_size
% const_nunits
== 0)
8035 lnel
= const_nunits
;
8039 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
8040 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8043 ivstep
= stride_step
;
8044 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
8045 build_int_cst (TREE_TYPE (ivstep
), vf
));
8047 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8049 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8050 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8051 create_iv (stride_base
, ivstep
, NULL
,
8052 loop
, &incr_gsi
, insert_after
,
8054 incr
= gsi_stmt (incr_gsi
);
8056 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8058 alias_off
= build_int_cst (ref_type
, 0);
8059 stmt_vec_info next_stmt_info
= first_stmt_info
;
8060 for (g
= 0; g
< group_size
; g
++)
8062 running_off
= offvar
;
8065 tree size
= TYPE_SIZE_UNIT (ltype
);
8066 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
8068 tree newoff
= copy_ssa_name (running_off
, NULL
);
8069 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8071 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8072 running_off
= newoff
;
8075 op
= vect_get_store_rhs (next_stmt_info
);
8076 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
,
8078 unsigned int group_el
= 0;
8079 unsigned HOST_WIDE_INT
8080 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8081 for (j
= 0; j
< ncopies
; j
++)
8083 vec_oprnd
= vec_oprnds
[j
];
8084 /* Pun the vector to extract from if necessary. */
8085 if (lvectype
!= vectype
)
8087 tree tem
= make_ssa_name (lvectype
);
8089 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
8090 lvectype
, vec_oprnd
));
8091 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
8094 for (i
= 0; i
< nstores
; i
++)
8096 tree newref
, newoff
;
8097 gimple
*incr
, *assign
;
8098 tree size
= TYPE_SIZE (ltype
);
8099 /* Extract the i'th component. */
8100 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
8101 bitsize_int (i
), size
);
8102 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
8105 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
8109 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8111 newref
= build2 (MEM_REF
, ltype
,
8112 running_off
, this_off
);
8113 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
8115 /* And store it to *running_off. */
8116 assign
= gimple_build_assign (newref
, elem
);
8117 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
8121 || group_el
== group_size
)
8123 newoff
= copy_ssa_name (running_off
, NULL
);
8124 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8125 running_off
, stride_step
);
8126 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8128 running_off
= newoff
;
8131 if (g
== group_size
- 1
8134 if (j
== 0 && i
== 0)
8136 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
8140 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8141 vec_oprnds
.release ();
8149 auto_vec
<tree
> dr_chain (group_size
);
8150 oprnds
.create (group_size
);
8152 gcc_assert (alignment_support_scheme
);
8153 vec_loop_masks
*loop_masks
8154 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8155 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8157 vec_loop_lens
*loop_lens
8158 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
8159 ? &LOOP_VINFO_LENS (loop_vinfo
)
8162 /* Shouldn't go with length-based approach if fully masked. */
8163 gcc_assert (!loop_lens
|| !loop_masks
);
8165 /* Targets with store-lane instructions must not require explicit
8166 realignment. vect_supportable_dr_alignment always returns either
8167 dr_aligned or dr_unaligned_supported for masked operations. */
8168 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8171 || alignment_support_scheme
== dr_aligned
8172 || alignment_support_scheme
== dr_unaligned_supported
);
8174 tree offset
= NULL_TREE
;
8175 if (!known_eq (poffset
, 0))
8176 offset
= size_int (poffset
);
8179 tree vec_offset
= NULL_TREE
;
8180 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8182 aggr_type
= NULL_TREE
;
8185 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8187 aggr_type
= elem_type
;
8188 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8189 &bump
, &vec_offset
);
8193 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8194 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8196 aggr_type
= vectype
;
8197 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
8198 memory_access_type
);
8202 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8204 /* In case the vectorization factor (VF) is bigger than the number
8205 of elements that we can fit in a vectype (nunits), we have to generate
8206 more than one vector stmt - i.e - we need to "unroll" the
8207 vector stmt by a factor VF/nunits. */
8209 /* In case of interleaving (non-unit grouped access):
8216 We create vectorized stores starting from base address (the access of the
8217 first stmt in the chain (S2 in the above example), when the last store stmt
8218 of the chain (S4) is reached:
8221 VS2: &base + vec_size*1 = vx0
8222 VS3: &base + vec_size*2 = vx1
8223 VS4: &base + vec_size*3 = vx3
8225 Then permutation statements are generated:
8227 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8228 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8231 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8232 (the order of the data-refs in the output of vect_permute_store_chain
8233 corresponds to the order of scalar stmts in the interleaving chain - see
8234 the documentation of vect_permute_store_chain()).
8236 In case of both multiple types and interleaving, above vector stores and
8237 permutation stmts are created for every copy. The result vector stmts are
8238 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8239 STMT_VINFO_RELATED_STMT for the next copies.
8242 auto_vec
<tree
> vec_masks
;
8243 tree vec_mask
= NULL
;
8244 auto_vec
<tree
> vec_offsets
;
8245 auto_vec
<vec
<tree
> > gvec_oprnds
;
8246 gvec_oprnds
.safe_grow_cleared (group_size
, true);
8247 for (j
= 0; j
< ncopies
; j
++)
8254 /* Get vectorized arguments for SLP_NODE. */
8255 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1,
8257 vec_oprnd
= vec_oprnds
[0];
8261 /* For interleaved stores we collect vectorized defs for all the
8262 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8263 used as an input to vect_permute_store_chain().
8265 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8266 and OPRNDS are of size 1. */
8267 stmt_vec_info next_stmt_info
= first_stmt_info
;
8268 for (i
= 0; i
< group_size
; i
++)
8270 /* Since gaps are not supported for interleaved stores,
8271 DR_GROUP_SIZE is the exact number of stmts in the chain.
8272 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8273 that there is no interleaving, DR_GROUP_SIZE is 1,
8274 and only one iteration of the loop will be executed. */
8275 op
= vect_get_store_rhs (next_stmt_info
);
8276 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
8277 ncopies
, op
, &gvec_oprnds
[i
]);
8278 vec_oprnd
= gvec_oprnds
[i
][0];
8279 dr_chain
.quick_push (gvec_oprnds
[i
][0]);
8280 oprnds
.quick_push (gvec_oprnds
[i
][0]);
8281 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8285 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
8286 mask
, &vec_masks
, mask_vectype
);
8287 vec_mask
= vec_masks
[0];
8291 /* We should have catched mismatched types earlier. */
8292 gcc_assert (useless_type_conversion_p (vectype
,
8293 TREE_TYPE (vec_oprnd
)));
8294 bool simd_lane_access_p
8295 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8296 if (simd_lane_access_p
8298 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8299 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8300 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
8301 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8302 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8303 get_alias_set (TREE_TYPE (ref_type
))))
8305 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8306 dataref_offset
= build_int_cst (ref_type
, 0);
8308 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8310 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
8311 slp_node
, &gs_info
, &dataref_ptr
,
8313 vec_offset
= vec_offsets
[0];
8317 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
8318 simd_lane_access_p
? loop
: NULL
,
8319 offset
, &dummy
, gsi
, &ptr_incr
,
8320 simd_lane_access_p
, bump
);
8324 /* For interleaved stores we created vectorized defs for all the
8325 defs stored in OPRNDS in the previous iteration (previous copy).
8326 DR_CHAIN is then used as an input to vect_permute_store_chain().
8327 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8328 OPRNDS are of size 1. */
8329 for (i
= 0; i
< group_size
; i
++)
8331 vec_oprnd
= gvec_oprnds
[i
][j
];
8332 dr_chain
[i
] = gvec_oprnds
[i
][j
];
8333 oprnds
[i
] = gvec_oprnds
[i
][j
];
8336 vec_mask
= vec_masks
[j
];
8339 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8340 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8341 vec_offset
= vec_offsets
[j
];
8343 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8347 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8351 /* Get an array into which we can store the individual vectors. */
8352 vec_array
= create_vector_array (vectype
, vec_num
);
8354 /* Invalidate the current contents of VEC_ARRAY. This should
8355 become an RTL clobber too, which prevents the vector registers
8356 from being upward-exposed. */
8357 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8359 /* Store the individual vectors into the array. */
8360 for (i
= 0; i
< vec_num
; i
++)
8362 vec_oprnd
= dr_chain
[i
];
8363 write_vector_array (vinfo
, stmt_info
,
8364 gsi
, vec_oprnd
, vec_array
, i
);
8367 tree final_mask
= NULL
;
8369 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8372 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
8373 final_mask
, vec_mask
, gsi
);
8379 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8381 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8382 tree alias_ptr
= build_int_cst (ref_type
, align
);
8383 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8384 dataref_ptr
, alias_ptr
,
8385 final_mask
, vec_array
);
8390 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8391 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8392 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8394 gimple_call_set_lhs (call
, data_ref
);
8396 gimple_call_set_nothrow (call
, true);
8397 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8400 /* Record that VEC_ARRAY is now dead. */
8401 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8409 result_chain
.create (group_size
);
8411 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
8412 gsi
, &result_chain
);
8415 stmt_vec_info next_stmt_info
= first_stmt_info
;
8416 for (i
= 0; i
< vec_num
; i
++)
8419 unsigned HOST_WIDE_INT align
;
8421 tree final_mask
= NULL_TREE
;
8423 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8425 vectype
, vec_num
* j
+ i
);
8427 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
8428 final_mask
, vec_mask
, gsi
);
8430 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8432 tree scale
= size_int (gs_info
.scale
);
8435 call
= gimple_build_call_internal
8436 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8437 scale
, vec_oprnd
, final_mask
);
8439 call
= gimple_build_call_internal
8440 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8442 gimple_call_set_nothrow (call
, true);
8443 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8449 /* Bump the vector pointer. */
8450 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
8451 gsi
, stmt_info
, bump
);
8454 vec_oprnd
= vec_oprnds
[i
];
8455 else if (grouped_store
)
8456 /* For grouped stores vectorized defs are interleaved in
8457 vect_permute_store_chain(). */
8458 vec_oprnd
= result_chain
[i
];
8460 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8461 if (alignment_support_scheme
== dr_aligned
)
8463 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
8465 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
8469 misalign
= misalignment
;
8470 if (dataref_offset
== NULL_TREE
8471 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8472 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8474 align
= least_bit_hwi (misalign
| align
);
8476 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8478 tree perm_mask
= perm_mask_for_reverse (vectype
);
8479 tree perm_dest
= vect_create_destination_var
8480 (vect_get_store_rhs (stmt_info
), vectype
);
8481 tree new_temp
= make_ssa_name (perm_dest
);
8483 /* Generate the permute statement. */
8485 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8486 vec_oprnd
, perm_mask
);
8487 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8489 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8490 vec_oprnd
= new_temp
;
8493 /* Arguments are ready. Create the new vector stmt. */
8496 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8498 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8500 final_mask
, vec_oprnd
);
8501 gimple_call_set_nothrow (call
, true);
8502 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8508 = vect_get_loop_len (loop_vinfo
, loop_lens
,
8509 vec_num
* ncopies
, vec_num
* j
+ i
);
8510 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8511 machine_mode vmode
= TYPE_MODE (vectype
);
8512 opt_machine_mode new_ovmode
8513 = get_len_load_store_mode (vmode
, false);
8514 machine_mode new_vmode
= new_ovmode
.require ();
8515 /* Need conversion if it's wrapped with VnQI. */
8516 if (vmode
!= new_vmode
)
8519 = build_vector_type_for_mode (unsigned_intQI_type_node
,
8522 = vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
8524 = build1 (VIEW_CONVERT_EXPR
, new_vtype
, vec_oprnd
);
8526 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
,
8528 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
8533 signed char biasval
=
8534 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
8536 tree bias
= build_int_cst (intQI_type_node
, biasval
);
8538 = gimple_build_call_internal (IFN_LEN_STORE
, 5, dataref_ptr
,
8539 ptr
, final_len
, vec_oprnd
,
8541 gimple_call_set_nothrow (call
, true);
8542 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8547 data_ref
= fold_build2 (MEM_REF
, vectype
,
8551 : build_int_cst (ref_type
, 0));
8552 if (alignment_support_scheme
== dr_aligned
)
8555 TREE_TYPE (data_ref
)
8556 = build_aligned_type (TREE_TYPE (data_ref
),
8557 align
* BITS_PER_UNIT
);
8558 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8559 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
8560 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8566 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8567 if (!next_stmt_info
)
8574 *vec_stmt
= new_stmt
;
8575 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8579 for (i
= 0; i
< group_size
; ++i
)
8581 vec
<tree
> oprndsi
= gvec_oprnds
[i
];
8585 result_chain
.release ();
8586 vec_oprnds
.release ();
8591 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8592 VECTOR_CST mask. No checks are made that the target platform supports the
8593 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8594 vect_gen_perm_mask_checked. */
8597 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8601 poly_uint64 nunits
= sel
.length ();
8602 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8604 mask_type
= build_vector_type (ssizetype
, nunits
);
8605 return vec_perm_indices_to_tree (mask_type
, sel
);
8608 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8609 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8612 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8614 machine_mode vmode
= TYPE_MODE (vectype
);
8615 gcc_assert (can_vec_perm_const_p (vmode
, vmode
, sel
));
8616 return vect_gen_perm_mask_any (vectype
, sel
);
8619 /* Given a vector variable X and Y, that was generated for the scalar
8620 STMT_INFO, generate instructions to permute the vector elements of X and Y
8621 using permutation mask MASK_VEC, insert them at *GSI and return the
8622 permuted vector variable. */
8625 permute_vec_elements (vec_info
*vinfo
,
8626 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8627 gimple_stmt_iterator
*gsi
)
8629 tree vectype
= TREE_TYPE (x
);
8630 tree perm_dest
, data_ref
;
8633 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8634 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8635 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8637 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8638 data_ref
= make_ssa_name (perm_dest
);
8640 /* Generate the permute statement. */
8641 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8642 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8647 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8648 inserting them on the loops preheader edge. Returns true if we
8649 were successful in doing so (and thus STMT_INFO can be moved then),
8650 otherwise returns false. */
8653 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8659 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8661 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8662 if (!gimple_nop_p (def_stmt
)
8663 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8665 /* Make sure we don't need to recurse. While we could do
8666 so in simple cases when there are more complex use webs
8667 we don't have an easy way to preserve stmt order to fulfil
8668 dependencies within them. */
8671 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8673 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8675 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8676 if (!gimple_nop_p (def_stmt2
)
8677 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8687 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8689 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8690 if (!gimple_nop_p (def_stmt
)
8691 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8693 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8694 gsi_remove (&gsi
, false);
8695 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8702 /* vectorizable_load.
8704 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8705 that can be vectorized.
8706 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8707 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8708 Return true if STMT_INFO is vectorizable in this way. */
8711 vectorizable_load (vec_info
*vinfo
,
8712 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8713 gimple
**vec_stmt
, slp_tree slp_node
,
8714 stmt_vector_for_cost
*cost_vec
)
8717 tree vec_dest
= NULL
;
8718 tree data_ref
= NULL
;
8719 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8720 class loop
*loop
= NULL
;
8721 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8722 bool nested_in_vect_loop
= false;
8727 tree dataref_ptr
= NULL_TREE
;
8728 tree dataref_offset
= NULL_TREE
;
8729 gimple
*ptr_incr
= NULL
;
8732 unsigned int group_size
;
8733 poly_uint64 group_gap_adj
;
8734 tree msq
= NULL_TREE
, lsq
;
8735 tree realignment_token
= NULL_TREE
;
8737 vec
<tree
> dr_chain
= vNULL
;
8738 bool grouped_load
= false;
8739 stmt_vec_info first_stmt_info
;
8740 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8741 bool compute_in_loop
= false;
8742 class loop
*at_loop
;
8744 bool slp
= (slp_node
!= NULL
);
8745 bool slp_perm
= false;
8746 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8749 gather_scatter_info gs_info
;
8751 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8753 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8756 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8760 if (!STMT_VINFO_DATA_REF (stmt_info
))
8763 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8764 int mask_index
= -1;
8765 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8767 scalar_dest
= gimple_assign_lhs (assign
);
8768 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8771 tree_code code
= gimple_assign_rhs_code (assign
);
8772 if (code
!= ARRAY_REF
8773 && code
!= BIT_FIELD_REF
8774 && code
!= INDIRECT_REF
8775 && code
!= COMPONENT_REF
8776 && code
!= IMAGPART_EXPR
8777 && code
!= REALPART_EXPR
8779 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8784 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8785 if (!call
|| !gimple_call_internal_p (call
))
8788 internal_fn ifn
= gimple_call_internal_fn (call
);
8789 if (!internal_load_fn_p (ifn
))
8792 scalar_dest
= gimple_call_lhs (call
);
8796 mask_index
= internal_fn_mask_index (ifn
);
8797 /* ??? For SLP the mask operand is always last. */
8798 if (mask_index
>= 0 && slp_node
)
8799 mask_index
= SLP_TREE_CHILDREN (slp_node
).length () - 1;
8801 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
8802 &mask
, NULL
, &mask_dt
, &mask_vectype
))
8806 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8807 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8811 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8812 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8813 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8818 /* Multiple types in SLP are handled by creating the appropriate number of
8819 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8824 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8826 gcc_assert (ncopies
>= 1);
8828 /* FORNOW. This restriction should be relaxed. */
8829 if (nested_in_vect_loop
&& ncopies
> 1)
8831 if (dump_enabled_p ())
8832 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8833 "multiple types in nested loop.\n");
8837 /* Invalidate assumptions made by dependence analysis when vectorization
8838 on the unrolled body effectively re-orders stmts. */
8840 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8841 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8842 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8844 if (dump_enabled_p ())
8845 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8846 "cannot perform implicit CSE when unrolling "
8847 "with negative dependence distance\n");
8851 elem_type
= TREE_TYPE (vectype
);
8852 mode
= TYPE_MODE (vectype
);
8854 /* FORNOW. In some cases can vectorize even if data-type not supported
8855 (e.g. - data copies). */
8856 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8858 if (dump_enabled_p ())
8859 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8860 "Aligned load, but unsupported type.\n");
8864 /* Check if the load is a part of an interleaving chain. */
8865 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8867 grouped_load
= true;
8869 gcc_assert (!nested_in_vect_loop
);
8870 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8872 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8873 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8875 /* Refuse non-SLP vectorization of SLP-only groups. */
8876 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8878 if (dump_enabled_p ())
8879 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8880 "cannot vectorize load in non-SLP mode.\n");
8884 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8890 /* In BB vectorization we may not actually use a loaded vector
8891 accessing elements in excess of DR_GROUP_SIZE. */
8892 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8893 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
8894 unsigned HOST_WIDE_INT nunits
;
8895 unsigned j
, k
, maxk
= 0;
8896 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
8899 tree vectype
= SLP_TREE_VECTYPE (slp_node
);
8900 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
8901 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
8903 if (dump_enabled_p ())
8904 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8905 "BB vectorization with gaps at the end of "
8906 "a load is not supported\n");
8913 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
8916 if (dump_enabled_p ())
8917 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
8919 "unsupported load permutation\n");
8924 /* Invalidate assumptions made by dependence analysis when vectorization
8925 on the unrolled body effectively re-orders stmts. */
8926 if (!PURE_SLP_STMT (stmt_info
)
8927 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8928 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8929 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8931 if (dump_enabled_p ())
8932 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8933 "cannot perform implicit CSE when performing "
8934 "group loads with negative dependence distance\n");
8941 vect_memory_access_type memory_access_type
;
8942 enum dr_alignment_support alignment_support_scheme
;
8945 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, VLS_LOAD
,
8946 ncopies
, &memory_access_type
, &poffset
,
8947 &alignment_support_scheme
, &misalignment
, &gs_info
))
8952 if (memory_access_type
== VMAT_CONTIGUOUS
)
8954 machine_mode vec_mode
= TYPE_MODE (vectype
);
8955 if (!VECTOR_MODE_P (vec_mode
)
8956 || !can_vec_mask_load_store_p (vec_mode
,
8957 TYPE_MODE (mask_vectype
), true))
8960 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8961 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8963 if (dump_enabled_p ())
8964 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8965 "unsupported access type for masked load.\n");
8968 else if (memory_access_type
== VMAT_GATHER_SCATTER
8969 && gs_info
.ifn
== IFN_LAST
8972 if (dump_enabled_p ())
8973 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8974 "unsupported masked emulated gather.\n");
8979 if (!vec_stmt
) /* transformation not required. */
8983 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
8986 if (dump_enabled_p ())
8987 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8988 "incompatible vector types for invariants\n");
8993 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8996 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
8997 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
8998 VLS_LOAD
, group_size
,
8999 memory_access_type
, &gs_info
,
9002 if (dump_enabled_p ()
9003 && memory_access_type
!= VMAT_ELEMENTWISE
9004 && memory_access_type
!= VMAT_GATHER_SCATTER
9005 && alignment_support_scheme
!= dr_aligned
)
9006 dump_printf_loc (MSG_NOTE
, vect_location
,
9007 "Vectorizing an unaligned access.\n");
9009 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9010 vinfo
->any_known_not_updated_vssa
= true;
9012 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
9013 vect_model_load_cost (vinfo
, stmt_info
, ncopies
, vf
, memory_access_type
,
9014 alignment_support_scheme
, misalignment
,
9015 &gs_info
, slp_node
, cost_vec
);
9020 gcc_assert (memory_access_type
9021 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
9023 if (dump_enabled_p ())
9024 dump_printf_loc (MSG_NOTE
, vect_location
,
9025 "transform load. ncopies = %d\n", ncopies
);
9029 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
9030 ensure_base_align (dr_info
);
9032 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
9034 vect_build_gather_load_calls (vinfo
,
9035 stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
9039 if (memory_access_type
== VMAT_INVARIANT
)
9041 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
9042 /* If we have versioned for aliasing or the loop doesn't
9043 have any data dependencies that would preclude this,
9044 then we are sure this is a loop invariant load and
9045 thus we can insert it on the preheader edge. */
9046 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
9047 && !nested_in_vect_loop
9048 && hoist_defs_of_uses (stmt_info
, loop
));
9051 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
9052 if (dump_enabled_p ())
9053 dump_printf_loc (MSG_NOTE
, vect_location
,
9054 "hoisting out of the vectorized loop: %G",
9056 scalar_dest
= copy_ssa_name (scalar_dest
);
9057 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
9058 edge pe
= loop_preheader_edge (loop
);
9059 gphi
*vphi
= get_virtual_phi (loop
->header
);
9062 vuse
= PHI_ARG_DEF_FROM_EDGE (vphi
, pe
);
9064 vuse
= gimple_vuse (gsi_stmt (*gsi
));
9065 gimple
*new_stmt
= gimple_build_assign (scalar_dest
, rhs
);
9066 gimple_set_vuse (new_stmt
, vuse
);
9067 gsi_insert_on_edge_immediate (pe
, new_stmt
);
9069 /* These copies are all equivalent, but currently the representation
9070 requires a separate STMT_VINFO_VEC_STMT for each one. */
9071 gimple_stmt_iterator gsi2
= *gsi
;
9073 for (j
= 0; j
< ncopies
; j
++)
9076 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
9079 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
9081 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9083 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9087 *vec_stmt
= new_stmt
;
9088 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9094 if (memory_access_type
== VMAT_ELEMENTWISE
9095 || memory_access_type
== VMAT_STRIDED_SLP
)
9097 gimple_stmt_iterator incr_gsi
;
9102 vec
<constructor_elt
, va_gc
> *v
= NULL
;
9103 tree stride_base
, stride_step
, alias_off
;
9104 /* Checked by get_load_store_type. */
9105 unsigned int const_nunits
= nunits
.to_constant ();
9106 unsigned HOST_WIDE_INT cst_offset
= 0;
9109 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
));
9110 gcc_assert (!nested_in_vect_loop
);
9114 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9115 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9119 first_stmt_info
= stmt_info
;
9120 first_dr_info
= dr_info
;
9122 if (slp
&& grouped_load
)
9124 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9125 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9131 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
9132 * vect_get_place_in_interleaving_chain (stmt_info
,
9135 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
9138 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
9140 = fold_build_pointer_plus
9141 (DR_BASE_ADDRESS (first_dr_info
->dr
),
9142 size_binop (PLUS_EXPR
,
9143 convert_to_ptrofftype (dr_offset
),
9144 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
9145 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
9147 /* For a load with loop-invariant (but other than power-of-2)
9148 stride (i.e. not a grouped access) like so:
9150 for (i = 0; i < n; i += stride)
9153 we generate a new induction variable and new accesses to
9154 form a new vector (or vectors, depending on ncopies):
9156 for (j = 0; ; j += VF*stride)
9158 tmp2 = array[j + stride];
9160 vectemp = {tmp1, tmp2, ...}
9163 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
9164 build_int_cst (TREE_TYPE (stride_step
), vf
));
9166 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
9168 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
9169 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
9170 create_iv (stride_base
, ivstep
, NULL
,
9171 loop
, &incr_gsi
, insert_after
,
9174 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
9176 running_off
= offvar
;
9177 alias_off
= build_int_cst (ref_type
, 0);
9178 int nloads
= const_nunits
;
9180 tree ltype
= TREE_TYPE (vectype
);
9181 tree lvectype
= vectype
;
9182 auto_vec
<tree
> dr_chain
;
9183 if (memory_access_type
== VMAT_STRIDED_SLP
)
9185 if (group_size
< const_nunits
)
9187 /* First check if vec_init optab supports construction from vector
9188 elts directly. Otherwise avoid emitting a constructor of
9189 vector elements by performing the loads using an integer type
9190 of the same size, constructing a vector of those and then
9191 re-interpreting it as the original vector type. This avoids a
9192 huge runtime penalty due to the general inability to perform
9193 store forwarding from smaller stores to a larger load. */
9196 = vector_vector_composition_type (vectype
,
9197 const_nunits
/ group_size
,
9199 if (vtype
!= NULL_TREE
)
9201 nloads
= const_nunits
/ group_size
;
9210 lnel
= const_nunits
;
9213 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
9215 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9216 else if (nloads
== 1)
9221 /* For SLP permutation support we need to load the whole group,
9222 not only the number of vector stmts the permutation result
9226 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9228 unsigned int const_vf
= vf
.to_constant ();
9229 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
9230 dr_chain
.create (ncopies
);
9233 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9235 unsigned int group_el
= 0;
9236 unsigned HOST_WIDE_INT
9237 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
9238 for (j
= 0; j
< ncopies
; j
++)
9241 vec_alloc (v
, nloads
);
9242 gimple
*new_stmt
= NULL
;
9243 for (i
= 0; i
< nloads
; i
++)
9245 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
9246 group_el
* elsz
+ cst_offset
);
9247 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
9248 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9249 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
9250 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9252 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9253 gimple_assign_lhs (new_stmt
));
9257 || group_el
== group_size
)
9259 tree newoff
= copy_ssa_name (running_off
);
9260 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
9261 running_off
, stride_step
);
9262 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
9264 running_off
= newoff
;
9270 tree vec_inv
= build_constructor (lvectype
, v
);
9271 new_temp
= vect_init_vector (vinfo
, stmt_info
,
9272 vec_inv
, lvectype
, gsi
);
9273 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9274 if (lvectype
!= vectype
)
9276 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
9278 build1 (VIEW_CONVERT_EXPR
,
9279 vectype
, new_temp
));
9280 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9287 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
9289 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9294 *vec_stmt
= new_stmt
;
9295 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9301 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
9307 if (memory_access_type
== VMAT_GATHER_SCATTER
9308 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
9309 grouped_load
= false;
9313 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9314 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9315 /* For SLP vectorization we directly vectorize a subchain
9316 without permutation. */
9317 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9318 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9319 /* For BB vectorization always use the first stmt to base
9320 the data ref pointer on. */
9322 first_stmt_info_for_drptr
9323 = vect_find_first_scalar_stmt_in_slp (slp_node
);
9325 /* Check if the chain of loads is already vectorized. */
9326 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
9327 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9328 ??? But we can only do so if there is exactly one
9329 as we have no way to get at the rest. Leave the CSE
9331 ??? With the group load eventually participating
9332 in multiple different permutations (having multiple
9333 slp nodes which refer to the same group) the CSE
9334 is even wrong code. See PR56270. */
9337 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9340 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9343 /* VEC_NUM is the number of vect stmts to be created for this group. */
9346 grouped_load
= false;
9347 /* If an SLP permutation is from N elements to N elements,
9348 and if one vector holds a whole number of N, we can load
9349 the inputs to the permutation in the same way as an
9350 unpermuted sequence. In other cases we need to load the
9351 whole group, not only the number of vector stmts the
9352 permutation result fits in. */
9353 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
9355 && (group_size
!= scalar_lanes
9356 || !multiple_p (nunits
, group_size
)))
9358 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9359 variable VF; see vect_transform_slp_perm_load. */
9360 unsigned int const_vf
= vf
.to_constant ();
9361 unsigned int const_nunits
= nunits
.to_constant ();
9362 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
9363 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
9367 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9369 = group_size
- scalar_lanes
;
9373 vec_num
= group_size
;
9375 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9379 first_stmt_info
= stmt_info
;
9380 first_dr_info
= dr_info
;
9381 group_size
= vec_num
= 1;
9383 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
9385 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9388 gcc_assert (alignment_support_scheme
);
9389 vec_loop_masks
*loop_masks
9390 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
9391 ? &LOOP_VINFO_MASKS (loop_vinfo
)
9393 vec_loop_lens
*loop_lens
9394 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
9395 ? &LOOP_VINFO_LENS (loop_vinfo
)
9398 /* Shouldn't go with length-based approach if fully masked. */
9399 gcc_assert (!loop_lens
|| !loop_masks
);
9401 /* Targets with store-lane instructions must not require explicit
9402 realignment. vect_supportable_dr_alignment always returns either
9403 dr_aligned or dr_unaligned_supported for masked operations. */
9404 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
9407 || alignment_support_scheme
== dr_aligned
9408 || alignment_support_scheme
== dr_unaligned_supported
);
9410 /* In case the vectorization factor (VF) is bigger than the number
9411 of elements that we can fit in a vectype (nunits), we have to generate
9412 more than one vector stmt - i.e - we need to "unroll" the
9413 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9414 from one copy of the vector stmt to the next, in the field
9415 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9416 stages to find the correct vector defs to be used when vectorizing
9417 stmts that use the defs of the current stmt. The example below
9418 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9419 need to create 4 vectorized stmts):
9421 before vectorization:
9422 RELATED_STMT VEC_STMT
9426 step 1: vectorize stmt S1:
9427 We first create the vector stmt VS1_0, and, as usual, record a
9428 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9429 Next, we create the vector stmt VS1_1, and record a pointer to
9430 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9431 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9433 RELATED_STMT VEC_STMT
9434 VS1_0: vx0 = memref0 VS1_1 -
9435 VS1_1: vx1 = memref1 VS1_2 -
9436 VS1_2: vx2 = memref2 VS1_3 -
9437 VS1_3: vx3 = memref3 - -
9438 S1: x = load - VS1_0
9442 /* In case of interleaving (non-unit grouped access):
9449 Vectorized loads are created in the order of memory accesses
9450 starting from the access of the first stmt of the chain:
9453 VS2: vx1 = &base + vec_size*1
9454 VS3: vx3 = &base + vec_size*2
9455 VS4: vx4 = &base + vec_size*3
9457 Then permutation statements are generated:
9459 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9460 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9463 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9464 (the order of the data-refs in the output of vect_permute_load_chain
9465 corresponds to the order of scalar stmts in the interleaving chain - see
9466 the documentation of vect_permute_load_chain()).
9467 The generation of permutation stmts and recording them in
9468 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9470 In case of both multiple types and interleaving, the vector loads and
9471 permutation stmts above are created for every copy. The result vector
9472 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9473 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9475 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9476 on a target that supports unaligned accesses (dr_unaligned_supported)
9477 we generate the following code:
9481 p = p + indx * vectype_size;
9486 Otherwise, the data reference is potentially unaligned on a target that
9487 does not support unaligned accesses (dr_explicit_realign_optimized) -
9488 then generate the following code, in which the data in each iteration is
9489 obtained by two vector loads, one from the previous iteration, and one
9490 from the current iteration:
9492 msq_init = *(floor(p1))
9493 p2 = initial_addr + VS - 1;
9494 realignment_token = call target_builtin;
9497 p2 = p2 + indx * vectype_size
9499 vec_dest = realign_load (msq, lsq, realignment_token)
9504 /* If the misalignment remains the same throughout the execution of the
9505 loop, we can create the init_addr and permutation mask at the loop
9506 preheader. Otherwise, it needs to be created inside the loop.
9507 This can only occur when vectorizing memory accesses in the inner-loop
9508 nested within an outer-loop that is being vectorized. */
9510 if (nested_in_vect_loop
9511 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9512 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9514 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9515 compute_in_loop
= true;
9518 bool diff_first_stmt_info
9519 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
9521 tree offset
= NULL_TREE
;
9522 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9523 || alignment_support_scheme
== dr_explicit_realign
)
9524 && !compute_in_loop
)
9526 /* If we have different first_stmt_info, we can't set up realignment
9527 here, since we can't guarantee first_stmt_info DR has been
9528 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9529 distance from first_stmt_info DR instead as below. */
9530 if (!diff_first_stmt_info
)
9531 msq
= vect_setup_realignment (vinfo
,
9532 first_stmt_info
, gsi
, &realignment_token
,
9533 alignment_support_scheme
, NULL_TREE
,
9535 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9537 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9538 offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9540 gcc_assert (!first_stmt_info_for_drptr
);
9546 if (!known_eq (poffset
, 0))
9548 ? size_binop (PLUS_EXPR
, offset
, size_int (poffset
))
9549 : size_int (poffset
));
9552 tree vec_offset
= NULL_TREE
;
9553 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9555 aggr_type
= NULL_TREE
;
9558 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9560 aggr_type
= elem_type
;
9561 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9562 &bump
, &vec_offset
);
9566 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9567 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9569 aggr_type
= vectype
;
9570 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
9571 memory_access_type
);
9574 auto_vec
<tree
> vec_offsets
;
9575 auto_vec
<tree
> vec_masks
;
9579 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[mask_index
],
9582 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
, mask
,
9583 &vec_masks
, mask_vectype
);
9585 tree vec_mask
= NULL_TREE
;
9586 poly_uint64 group_elt
= 0;
9587 for (j
= 0; j
< ncopies
; j
++)
9589 /* 1. Create the vector or array pointer update chain. */
9592 bool simd_lane_access_p
9593 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9594 if (simd_lane_access_p
9595 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9596 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9597 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9598 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9599 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9600 get_alias_set (TREE_TYPE (ref_type
)))
9601 && (alignment_support_scheme
== dr_aligned
9602 || alignment_support_scheme
== dr_unaligned_supported
))
9604 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9605 dataref_offset
= build_int_cst (ref_type
, 0);
9607 else if (diff_first_stmt_info
)
9610 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
9611 aggr_type
, at_loop
, offset
, &dummy
,
9612 gsi
, &ptr_incr
, simd_lane_access_p
,
9614 /* Adjust the pointer by the difference to first_stmt. */
9615 data_reference_p ptrdr
9616 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9618 = fold_convert (sizetype
,
9619 size_binop (MINUS_EXPR
,
9620 DR_INIT (first_dr_info
->dr
),
9622 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9624 if (alignment_support_scheme
== dr_explicit_realign
)
9626 msq
= vect_setup_realignment (vinfo
,
9627 first_stmt_info_for_drptr
, gsi
,
9629 alignment_support_scheme
,
9630 dataref_ptr
, &at_loop
);
9631 gcc_assert (!compute_in_loop
);
9634 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9636 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
9637 slp_node
, &gs_info
, &dataref_ptr
,
9642 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9644 offset
, &dummy
, gsi
, &ptr_incr
,
9645 simd_lane_access_p
, bump
);
9647 vec_mask
= vec_masks
[0];
9652 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9654 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9655 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9658 vec_mask
= vec_masks
[j
];
9661 if (grouped_load
|| slp_perm
)
9662 dr_chain
.create (vec_num
);
9664 gimple
*new_stmt
= NULL
;
9665 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9669 vec_array
= create_vector_array (vectype
, vec_num
);
9671 tree final_mask
= NULL_TREE
;
9673 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9676 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
9677 final_mask
, vec_mask
, gsi
);
9683 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9685 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
9686 tree alias_ptr
= build_int_cst (ref_type
, align
);
9687 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9688 dataref_ptr
, alias_ptr
,
9694 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9695 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9696 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9698 gimple_call_set_lhs (call
, vec_array
);
9699 gimple_call_set_nothrow (call
, true);
9700 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9703 /* Extract each vector into an SSA_NAME. */
9704 for (i
= 0; i
< vec_num
; i
++)
9706 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
9708 dr_chain
.quick_push (new_temp
);
9711 /* Record the mapping between SSA_NAMEs and statements. */
9712 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
9714 /* Record that VEC_ARRAY is now dead. */
9715 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
9719 for (i
= 0; i
< vec_num
; i
++)
9721 tree final_mask
= NULL_TREE
;
9723 && memory_access_type
!= VMAT_INVARIANT
)
9724 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9726 vectype
, vec_num
* j
+ i
);
9728 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
9729 final_mask
, vec_mask
, gsi
);
9731 if (i
> 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9732 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9733 gsi
, stmt_info
, bump
);
9735 /* 2. Create the vector-load in the loop. */
9736 switch (alignment_support_scheme
)
9739 case dr_unaligned_supported
:
9741 unsigned int misalign
;
9742 unsigned HOST_WIDE_INT align
;
9744 if (memory_access_type
== VMAT_GATHER_SCATTER
9745 && gs_info
.ifn
!= IFN_LAST
)
9747 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9748 vec_offset
= vec_offsets
[vec_num
* j
+ i
];
9749 tree zero
= build_zero_cst (vectype
);
9750 tree scale
= size_int (gs_info
.scale
);
9753 call
= gimple_build_call_internal
9754 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
9755 vec_offset
, scale
, zero
, final_mask
);
9757 call
= gimple_build_call_internal
9758 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
9759 vec_offset
, scale
, zero
);
9760 gimple_call_set_nothrow (call
, true);
9762 data_ref
= NULL_TREE
;
9765 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9767 /* Emulated gather-scatter. */
9768 gcc_assert (!final_mask
);
9769 unsigned HOST_WIDE_INT const_nunits
9770 = nunits
.to_constant ();
9771 unsigned HOST_WIDE_INT const_offset_nunits
9772 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
)
9774 vec
<constructor_elt
, va_gc
> *ctor_elts
;
9775 vec_alloc (ctor_elts
, const_nunits
);
9776 gimple_seq stmts
= NULL
;
9777 /* We support offset vectors with more elements
9778 than the data vector for now. */
9779 unsigned HOST_WIDE_INT factor
9780 = const_offset_nunits
/ const_nunits
;
9781 vec_offset
= vec_offsets
[j
/ factor
];
9782 unsigned elt_offset
= (j
% factor
) * const_nunits
;
9783 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
9784 tree scale
= size_int (gs_info
.scale
);
9786 = get_object_alignment (DR_REF (first_dr_info
->dr
));
9787 tree ltype
= build_aligned_type (TREE_TYPE (vectype
),
9789 for (unsigned k
= 0; k
< const_nunits
; ++k
)
9791 tree boff
= size_binop (MULT_EXPR
,
9792 TYPE_SIZE (idx_type
),
9795 tree idx
= gimple_build (&stmts
, BIT_FIELD_REF
,
9796 idx_type
, vec_offset
,
9797 TYPE_SIZE (idx_type
),
9799 idx
= gimple_convert (&stmts
, sizetype
, idx
);
9800 idx
= gimple_build (&stmts
, MULT_EXPR
,
9801 sizetype
, idx
, scale
);
9802 tree ptr
= gimple_build (&stmts
, PLUS_EXPR
,
9803 TREE_TYPE (dataref_ptr
),
9805 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
9806 tree elt
= make_ssa_name (TREE_TYPE (vectype
));
9807 tree ref
= build2 (MEM_REF
, ltype
, ptr
,
9808 build_int_cst (ref_type
, 0));
9809 new_stmt
= gimple_build_assign (elt
, ref
);
9810 gimple_set_vuse (new_stmt
,
9811 gimple_vuse (gsi_stmt (*gsi
)));
9812 gimple_seq_add_stmt (&stmts
, new_stmt
);
9813 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
, elt
);
9815 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
9816 new_stmt
= gimple_build_assign (NULL_TREE
,
9818 (vectype
, ctor_elts
));
9819 data_ref
= NULL_TREE
;
9824 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9825 if (alignment_support_scheme
== dr_aligned
)
9827 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
9829 align
= dr_alignment
9830 (vect_dr_behavior (vinfo
, first_dr_info
));
9834 misalign
= misalignment
;
9835 if (dataref_offset
== NULL_TREE
9836 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9837 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9839 align
= least_bit_hwi (misalign
| align
);
9843 tree ptr
= build_int_cst (ref_type
,
9844 align
* BITS_PER_UNIT
);
9846 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9849 gimple_call_set_nothrow (call
, true);
9851 data_ref
= NULL_TREE
;
9853 else if (loop_lens
&& memory_access_type
!= VMAT_INVARIANT
)
9856 = vect_get_loop_len (loop_vinfo
, loop_lens
,
9859 tree ptr
= build_int_cst (ref_type
,
9860 align
* BITS_PER_UNIT
);
9862 machine_mode vmode
= TYPE_MODE (vectype
);
9863 opt_machine_mode new_ovmode
9864 = get_len_load_store_mode (vmode
, true);
9865 machine_mode new_vmode
= new_ovmode
.require ();
9866 tree qi_type
= unsigned_intQI_type_node
;
9868 signed char biasval
=
9869 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
9871 tree bias
= build_int_cst (intQI_type_node
, biasval
);
9874 = gimple_build_call_internal (IFN_LEN_LOAD
, 4,
9877 gimple_call_set_nothrow (call
, true);
9879 data_ref
= NULL_TREE
;
9881 /* Need conversion if it's wrapped with VnQI. */
9882 if (vmode
!= new_vmode
)
9885 = build_vector_type_for_mode (qi_type
, new_vmode
);
9886 tree var
= vect_get_new_ssa_name (new_vtype
,
9888 gimple_set_lhs (call
, var
);
9889 vect_finish_stmt_generation (vinfo
, stmt_info
, call
,
9891 tree op
= build1 (VIEW_CONVERT_EXPR
, vectype
, var
);
9893 = gimple_build_assign (vec_dest
,
9894 VIEW_CONVERT_EXPR
, op
);
9899 tree ltype
= vectype
;
9900 tree new_vtype
= NULL_TREE
;
9901 unsigned HOST_WIDE_INT gap
9902 = DR_GROUP_GAP (first_stmt_info
);
9903 unsigned int vect_align
9904 = vect_known_alignment_in_bytes (first_dr_info
,
9906 unsigned int scalar_dr_size
9907 = vect_get_scalar_dr_size (first_dr_info
);
9908 /* If there's no peeling for gaps but we have a gap
9909 with slp loads then load the lower half of the
9910 vector only. See get_group_load_store_type for
9911 when we apply this optimization. */
9914 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9916 && known_eq (nunits
, (group_size
- gap
) * 2)
9917 && known_eq (nunits
, group_size
)
9918 && gap
>= (vect_align
/ scalar_dr_size
))
9922 = vector_vector_composition_type (vectype
, 2,
9924 if (new_vtype
!= NULL_TREE
)
9928 = (dataref_offset
? dataref_offset
9929 : build_int_cst (ref_type
, 0));
9930 if (ltype
!= vectype
9931 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9933 unsigned HOST_WIDE_INT gap_offset
9934 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
9935 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
9936 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
9939 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
9940 if (alignment_support_scheme
== dr_aligned
)
9943 TREE_TYPE (data_ref
)
9944 = build_aligned_type (TREE_TYPE (data_ref
),
9945 align
* BITS_PER_UNIT
);
9946 if (ltype
!= vectype
)
9948 vect_copy_ref_info (data_ref
,
9949 DR_REF (first_dr_info
->dr
));
9950 tree tem
= make_ssa_name (ltype
);
9951 new_stmt
= gimple_build_assign (tem
, data_ref
);
9952 vect_finish_stmt_generation (vinfo
, stmt_info
,
9955 vec
<constructor_elt
, va_gc
> *v
;
9957 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9959 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9960 build_zero_cst (ltype
));
9961 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9965 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9966 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9967 build_zero_cst (ltype
));
9969 gcc_assert (new_vtype
!= NULL_TREE
);
9970 if (new_vtype
== vectype
)
9971 new_stmt
= gimple_build_assign (
9972 vec_dest
, build_constructor (vectype
, v
));
9975 tree new_vname
= make_ssa_name (new_vtype
);
9976 new_stmt
= gimple_build_assign (
9977 new_vname
, build_constructor (new_vtype
, v
));
9978 vect_finish_stmt_generation (vinfo
, stmt_info
,
9980 new_stmt
= gimple_build_assign (
9981 vec_dest
, build1 (VIEW_CONVERT_EXPR
, vectype
,
9988 case dr_explicit_realign
:
9992 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9994 if (compute_in_loop
)
9995 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
9997 dr_explicit_realign
,
10000 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
10001 ptr
= copy_ssa_name (dataref_ptr
);
10003 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
10004 // For explicit realign the target alignment should be
10005 // known at compile time.
10006 unsigned HOST_WIDE_INT align
=
10007 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
10008 new_stmt
= gimple_build_assign
10009 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
10011 (TREE_TYPE (dataref_ptr
),
10012 -(HOST_WIDE_INT
) align
));
10013 vect_finish_stmt_generation (vinfo
, stmt_info
,
10016 = build2 (MEM_REF
, vectype
, ptr
,
10017 build_int_cst (ref_type
, 0));
10018 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
10019 vec_dest
= vect_create_destination_var (scalar_dest
,
10021 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
10022 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
10023 gimple_assign_set_lhs (new_stmt
, new_temp
);
10024 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
10025 vect_finish_stmt_generation (vinfo
, stmt_info
,
10029 bump
= size_binop (MULT_EXPR
, vs
,
10030 TYPE_SIZE_UNIT (elem_type
));
10031 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
10032 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
,
10034 new_stmt
= gimple_build_assign
10035 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
10037 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
10038 if (TREE_CODE (ptr
) == SSA_NAME
)
10039 ptr
= copy_ssa_name (ptr
, new_stmt
);
10041 ptr
= make_ssa_name (TREE_TYPE (ptr
), new_stmt
);
10042 gimple_assign_set_lhs (new_stmt
, ptr
);
10043 vect_finish_stmt_generation (vinfo
, stmt_info
,
10046 = build2 (MEM_REF
, vectype
, ptr
,
10047 build_int_cst (ref_type
, 0));
10050 case dr_explicit_realign_optimized
:
10052 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
10053 new_temp
= copy_ssa_name (dataref_ptr
);
10055 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
10056 // We should only be doing this if we know the target
10057 // alignment at compile time.
10058 unsigned HOST_WIDE_INT align
=
10059 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
10060 new_stmt
= gimple_build_assign
10061 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
10062 build_int_cst (TREE_TYPE (dataref_ptr
),
10063 -(HOST_WIDE_INT
) align
));
10064 vect_finish_stmt_generation (vinfo
, stmt_info
,
10067 = build2 (MEM_REF
, vectype
, new_temp
,
10068 build_int_cst (ref_type
, 0));
10072 gcc_unreachable ();
10074 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10075 /* DATA_REF is null if we've already built the statement. */
10078 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
10079 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
10081 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
10082 gimple_set_lhs (new_stmt
, new_temp
);
10083 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10085 /* 3. Handle explicit realignment if necessary/supported.
10087 vec_dest = realign_load (msq, lsq, realignment_token) */
10088 if (alignment_support_scheme
== dr_explicit_realign_optimized
10089 || alignment_support_scheme
== dr_explicit_realign
)
10091 lsq
= gimple_assign_lhs (new_stmt
);
10092 if (!realignment_token
)
10093 realignment_token
= dataref_ptr
;
10094 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10095 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
10096 msq
, lsq
, realignment_token
);
10097 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
10098 gimple_assign_set_lhs (new_stmt
, new_temp
);
10099 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10101 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
10104 if (i
== vec_num
- 1 && j
== ncopies
- 1)
10105 add_phi_arg (phi
, lsq
,
10106 loop_latch_edge (containing_loop
),
10112 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
10114 tree perm_mask
= perm_mask_for_reverse (vectype
);
10115 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
10116 perm_mask
, stmt_info
, gsi
);
10117 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
10120 /* Collect vector loads and later create their permutation in
10121 vect_transform_grouped_load (). */
10122 if (grouped_load
|| slp_perm
)
10123 dr_chain
.quick_push (new_temp
);
10125 /* Store vector loads in the corresponding SLP_NODE. */
10126 if (slp
&& !slp_perm
)
10127 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10129 /* With SLP permutation we load the gaps as well, without
10130 we need to skip the gaps after we manage to fully load
10131 all elements. group_gap_adj is DR_GROUP_SIZE here. */
10132 group_elt
+= nunits
;
10133 if (maybe_ne (group_gap_adj
, 0U)
10135 && known_eq (group_elt
, group_size
- group_gap_adj
))
10137 poly_wide_int bump_val
10138 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
10140 if (tree_int_cst_sgn
10141 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
10142 bump_val
= -bump_val
;
10143 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
10144 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10145 gsi
, stmt_info
, bump
);
10149 /* Bump the vector pointer to account for a gap or for excess
10150 elements loaded for a permuted SLP load. */
10151 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
10153 poly_wide_int bump_val
10154 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
10156 if (tree_int_cst_sgn
10157 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
10158 bump_val
= -bump_val
;
10159 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
10160 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
10165 if (slp
&& !slp_perm
)
10171 /* For SLP we know we've seen all possible uses of dr_chain so
10172 direct vect_transform_slp_perm_load to DCE the unused parts.
10173 ??? This is a hack to prevent compile-time issues as seen
10174 in PR101120 and friends. */
10175 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
10176 gsi
, vf
, false, &n_perms
,
10184 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
10185 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
10187 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10191 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10194 dr_chain
.release ();
10197 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10202 /* Function vect_is_simple_cond.
10205 LOOP - the loop that is being vectorized.
10206 COND - Condition that is checked for simple use.
10209 *COMP_VECTYPE - the vector type for the comparison.
10210 *DTS - The def types for the arguments of the comparison
10212 Returns whether a COND can be vectorized. Checks whether
10213 condition operands are supportable using vec_is_simple_use. */
10216 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
10217 slp_tree slp_node
, tree
*comp_vectype
,
10218 enum vect_def_type
*dts
, tree vectype
)
10221 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10225 if (TREE_CODE (cond
) == SSA_NAME
10226 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
10228 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
10229 &slp_op
, &dts
[0], comp_vectype
)
10231 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
10236 if (!COMPARISON_CLASS_P (cond
))
10239 lhs
= TREE_OPERAND (cond
, 0);
10240 rhs
= TREE_OPERAND (cond
, 1);
10242 if (TREE_CODE (lhs
) == SSA_NAME
)
10244 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
10245 &lhs
, &slp_op
, &dts
[0], &vectype1
))
10248 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
10249 || TREE_CODE (lhs
) == FIXED_CST
)
10250 dts
[0] = vect_constant_def
;
10254 if (TREE_CODE (rhs
) == SSA_NAME
)
10256 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
10257 &rhs
, &slp_op
, &dts
[1], &vectype2
))
10260 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
10261 || TREE_CODE (rhs
) == FIXED_CST
)
10262 dts
[1] = vect_constant_def
;
10266 if (vectype1
&& vectype2
10267 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10268 TYPE_VECTOR_SUBPARTS (vectype2
)))
10271 *comp_vectype
= vectype1
? vectype1
: vectype2
;
10272 /* Invariant comparison. */
10273 if (! *comp_vectype
)
10275 tree scalar_type
= TREE_TYPE (lhs
);
10276 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10277 *comp_vectype
= truth_type_for (vectype
);
10280 /* If we can widen the comparison to match vectype do so. */
10281 if (INTEGRAL_TYPE_P (scalar_type
)
10283 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
10284 TYPE_SIZE (TREE_TYPE (vectype
))))
10285 scalar_type
= build_nonstandard_integer_type
10286 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
10287 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
10295 /* vectorizable_condition.
10297 Check if STMT_INFO is conditional modify expression that can be vectorized.
10298 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10299 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10302 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10304 Return true if STMT_INFO is vectorizable in this way. */
10307 vectorizable_condition (vec_info
*vinfo
,
10308 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10310 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10312 tree scalar_dest
= NULL_TREE
;
10313 tree vec_dest
= NULL_TREE
;
10314 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
10315 tree then_clause
, else_clause
;
10316 tree comp_vectype
= NULL_TREE
;
10317 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
10318 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
10321 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10322 enum vect_def_type dts
[4]
10323 = {vect_unknown_def_type
, vect_unknown_def_type
,
10324 vect_unknown_def_type
, vect_unknown_def_type
};
10328 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10330 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10331 vec
<tree
> vec_oprnds0
= vNULL
;
10332 vec
<tree
> vec_oprnds1
= vNULL
;
10333 vec
<tree
> vec_oprnds2
= vNULL
;
10334 vec
<tree
> vec_oprnds3
= vNULL
;
10336 bool masked
= false;
10338 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10341 /* Is vectorizable conditional operation? */
10342 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10346 code
= gimple_assign_rhs_code (stmt
);
10347 if (code
!= COND_EXPR
)
10350 stmt_vec_info reduc_info
= NULL
;
10351 int reduc_index
= -1;
10352 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
10354 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
10357 if (STMT_SLP_TYPE (stmt_info
))
10359 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
10360 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
10361 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
10362 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
10363 || reduc_index
!= -1);
10367 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10371 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10372 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10377 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10381 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10385 gcc_assert (ncopies
>= 1);
10386 if (for_reduction
&& ncopies
> 1)
10387 return false; /* FORNOW */
10389 cond_expr
= gimple_assign_rhs1 (stmt
);
10391 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
10392 &comp_vectype
, &dts
[0], vectype
)
10396 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
10397 slp_tree then_slp_node
, else_slp_node
;
10398 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
10399 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
10401 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
10402 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
10405 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
10408 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
10411 masked
= !COMPARISON_CLASS_P (cond_expr
);
10412 vec_cmp_type
= truth_type_for (comp_vectype
);
10414 if (vec_cmp_type
== NULL_TREE
)
10417 cond_code
= TREE_CODE (cond_expr
);
10420 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
10421 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
10424 /* For conditional reductions, the "then" value needs to be the candidate
10425 value calculated by this iteration while the "else" value needs to be
10426 the result carried over from previous iterations. If the COND_EXPR
10427 is the other way around, we need to swap it. */
10428 bool must_invert_cmp_result
= false;
10429 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
10432 must_invert_cmp_result
= true;
10435 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
10436 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
10437 if (new_code
== ERROR_MARK
)
10438 must_invert_cmp_result
= true;
10441 cond_code
= new_code
;
10442 /* Make sure we don't accidentally use the old condition. */
10443 cond_expr
= NULL_TREE
;
10446 std::swap (then_clause
, else_clause
);
10449 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
10451 /* Boolean values may have another representation in vectors
10452 and therefore we prefer bit operations over comparison for
10453 them (which also works for scalar masks). We store opcodes
10454 to use in bitop1 and bitop2. Statement is vectorized as
10455 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10456 depending on bitop1 and bitop2 arity. */
10460 bitop1
= BIT_NOT_EXPR
;
10461 bitop2
= BIT_AND_EXPR
;
10464 bitop1
= BIT_NOT_EXPR
;
10465 bitop2
= BIT_IOR_EXPR
;
10468 bitop1
= BIT_NOT_EXPR
;
10469 bitop2
= BIT_AND_EXPR
;
10470 std::swap (cond_expr0
, cond_expr1
);
10473 bitop1
= BIT_NOT_EXPR
;
10474 bitop2
= BIT_IOR_EXPR
;
10475 std::swap (cond_expr0
, cond_expr1
);
10478 bitop1
= BIT_XOR_EXPR
;
10481 bitop1
= BIT_XOR_EXPR
;
10482 bitop2
= BIT_NOT_EXPR
;
10487 cond_code
= SSA_NAME
;
10490 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
10491 && reduction_type
== EXTRACT_LAST_REDUCTION
10492 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
10494 if (dump_enabled_p ())
10495 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10496 "reduction comparison operation not supported.\n");
10502 if (bitop1
!= NOP_EXPR
)
10504 machine_mode mode
= TYPE_MODE (comp_vectype
);
10507 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
10508 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10511 if (bitop2
!= NOP_EXPR
)
10513 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
10515 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10520 vect_cost_for_stmt kind
= vector_stmt
;
10521 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10522 /* Count one reduction-like operation per vector. */
10523 kind
= vec_to_scalar
;
10524 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
))
10528 && (!vect_maybe_update_slp_op_vectype
10529 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
10531 && !vect_maybe_update_slp_op_vectype
10532 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
10533 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
10534 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
10536 if (dump_enabled_p ())
10537 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10538 "incompatible vector types for invariants\n");
10542 if (loop_vinfo
&& for_reduction
10543 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
10545 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10546 vect_record_loop_mask (loop_vinfo
, &LOOP_VINFO_MASKS (loop_vinfo
),
10547 ncopies
* vec_num
, vectype
, NULL
);
10548 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10549 else if (STMT_VINFO_DEF_TYPE (reduc_info
) != vect_nested_cycle
)
10551 if (dump_enabled_p ())
10552 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10553 "conditional reduction prevents the use"
10554 " of partial vectors.\n");
10555 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
10559 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10560 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10568 scalar_dest
= gimple_assign_lhs (stmt
);
10569 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10570 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10572 bool swap_cond_operands
= false;
10574 /* See whether another part of the vectorized code applies a loop
10575 mask to the condition, or to its inverse. */
10577 vec_loop_masks
*masks
= NULL
;
10578 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10580 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10581 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10584 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10585 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10586 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10589 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10590 tree_code orig_code
= cond
.code
;
10591 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10592 if (!masked
&& loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10594 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10595 cond_code
= cond
.code
;
10596 swap_cond_operands
= true;
10600 /* Try the inverse of the current mask. We check if the
10601 inverse mask is live and if so we generate a negate of
10602 the current mask such that we still honor NaNs. */
10603 cond
.inverted_p
= true;
10604 cond
.code
= orig_code
;
10605 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10607 bitop1
= orig_code
;
10608 bitop2
= BIT_NOT_EXPR
;
10609 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10610 cond_code
= cond
.code
;
10611 swap_cond_operands
= true;
10618 /* Handle cond expr. */
10620 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10621 cond_expr
, &vec_oprnds0
, comp_vectype
,
10622 then_clause
, &vec_oprnds2
, vectype
,
10623 reduction_type
!= EXTRACT_LAST_REDUCTION
10624 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10626 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10627 cond_expr0
, &vec_oprnds0
, comp_vectype
,
10628 cond_expr1
, &vec_oprnds1
, comp_vectype
,
10629 then_clause
, &vec_oprnds2
, vectype
,
10630 reduction_type
!= EXTRACT_LAST_REDUCTION
10631 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10633 /* Arguments are ready. Create the new vector stmt. */
10634 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10636 vec_then_clause
= vec_oprnds2
[i
];
10637 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10638 vec_else_clause
= vec_oprnds3
[i
];
10640 if (swap_cond_operands
)
10641 std::swap (vec_then_clause
, vec_else_clause
);
10644 vec_compare
= vec_cond_lhs
;
10647 vec_cond_rhs
= vec_oprnds1
[i
];
10648 if (bitop1
== NOP_EXPR
)
10650 gimple_seq stmts
= NULL
;
10651 vec_compare
= gimple_build (&stmts
, cond_code
, vec_cmp_type
,
10652 vec_cond_lhs
, vec_cond_rhs
);
10653 gsi_insert_before (gsi
, stmts
, GSI_SAME_STMT
);
10657 new_temp
= make_ssa_name (vec_cmp_type
);
10659 if (bitop1
== BIT_NOT_EXPR
)
10660 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10664 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10666 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10667 if (bitop2
== NOP_EXPR
)
10668 vec_compare
= new_temp
;
10669 else if (bitop2
== BIT_NOT_EXPR
)
10671 /* Instead of doing ~x ? y : z do x ? z : y. */
10672 vec_compare
= new_temp
;
10673 std::swap (vec_then_clause
, vec_else_clause
);
10677 vec_compare
= make_ssa_name (vec_cmp_type
);
10679 = gimple_build_assign (vec_compare
, bitop2
,
10680 vec_cond_lhs
, new_temp
);
10681 vect_finish_stmt_generation (vinfo
, stmt_info
,
10687 /* If we decided to apply a loop mask to the result of the vector
10688 comparison, AND the comparison with the mask now. Later passes
10689 should then be able to reuse the AND results between mulitple
10693 for (int i = 0; i < 100; ++i)
10694 x[i] = y[i] ? z[i] : 10;
10696 results in following optimized GIMPLE:
10698 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10699 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10700 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10701 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10702 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10703 vect_iftmp.11_47, { 10, ... }>;
10705 instead of using a masked and unmasked forms of
10706 vec != { 0, ... } (masked in the MASK_LOAD,
10707 unmasked in the VEC_COND_EXPR). */
10709 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10710 in cases where that's necessary. */
10712 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
10714 if (!is_gimple_val (vec_compare
))
10716 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10717 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10719 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10720 vec_compare
= vec_compare_name
;
10723 if (must_invert_cmp_result
)
10725 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10726 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10729 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10730 vec_compare
= vec_compare_name
;
10736 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10738 tree tmp2
= make_ssa_name (vec_cmp_type
);
10740 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10742 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
10743 vec_compare
= tmp2
;
10748 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10750 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
10751 tree lhs
= gimple_get_lhs (old_stmt
);
10752 new_stmt
= gimple_build_call_internal
10753 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10755 gimple_call_set_lhs (new_stmt
, lhs
);
10756 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
10757 if (old_stmt
== gsi_stmt (*gsi
))
10758 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
10761 /* In this case we're moving the definition to later in the
10762 block. That doesn't matter because the only uses of the
10763 lhs are in phi statements. */
10764 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
10765 gsi_remove (&old_gsi
, true);
10766 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10771 new_temp
= make_ssa_name (vec_dest
);
10772 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10773 vec_then_clause
, vec_else_clause
);
10774 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10777 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10779 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10783 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10785 vec_oprnds0
.release ();
10786 vec_oprnds1
.release ();
10787 vec_oprnds2
.release ();
10788 vec_oprnds3
.release ();
10793 /* vectorizable_comparison.
10795 Check if STMT_INFO is comparison expression that can be vectorized.
10796 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10797 comparison, put it in VEC_STMT, and insert it at GSI.
10799 Return true if STMT_INFO is vectorizable in this way. */
10802 vectorizable_comparison (vec_info
*vinfo
,
10803 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10805 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10807 tree lhs
, rhs1
, rhs2
;
10808 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10809 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10810 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10812 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10813 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10815 poly_uint64 nunits
;
10817 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10819 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10820 vec
<tree
> vec_oprnds0
= vNULL
;
10821 vec
<tree
> vec_oprnds1
= vNULL
;
10825 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10828 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10831 mask_type
= vectype
;
10832 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10837 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10839 gcc_assert (ncopies
>= 1);
10840 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10843 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10847 code
= gimple_assign_rhs_code (stmt
);
10849 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10852 slp_tree slp_rhs1
, slp_rhs2
;
10853 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10854 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
10857 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10858 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
10861 if (vectype1
&& vectype2
10862 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10863 TYPE_VECTOR_SUBPARTS (vectype2
)))
10866 vectype
= vectype1
? vectype1
: vectype2
;
10868 /* Invariant comparison. */
10871 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10872 vectype
= mask_type
;
10874 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
10876 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10879 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10882 /* Can't compare mask and non-mask types. */
10883 if (vectype1
&& vectype2
10884 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10887 /* Boolean values may have another representation in vectors
10888 and therefore we prefer bit operations over comparison for
10889 them (which also works for scalar masks). We store opcodes
10890 to use in bitop1 and bitop2. Statement is vectorized as
10891 BITOP2 (rhs1 BITOP1 rhs2) or
10892 rhs1 BITOP2 (BITOP1 rhs2)
10893 depending on bitop1 and bitop2 arity. */
10894 bool swap_p
= false;
10895 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10897 if (code
== GT_EXPR
)
10899 bitop1
= BIT_NOT_EXPR
;
10900 bitop2
= BIT_AND_EXPR
;
10902 else if (code
== GE_EXPR
)
10904 bitop1
= BIT_NOT_EXPR
;
10905 bitop2
= BIT_IOR_EXPR
;
10907 else if (code
== LT_EXPR
)
10909 bitop1
= BIT_NOT_EXPR
;
10910 bitop2
= BIT_AND_EXPR
;
10913 else if (code
== LE_EXPR
)
10915 bitop1
= BIT_NOT_EXPR
;
10916 bitop2
= BIT_IOR_EXPR
;
10921 bitop1
= BIT_XOR_EXPR
;
10922 if (code
== EQ_EXPR
)
10923 bitop2
= BIT_NOT_EXPR
;
10929 if (bitop1
== NOP_EXPR
)
10931 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10936 machine_mode mode
= TYPE_MODE (vectype
);
10939 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10940 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10943 if (bitop2
!= NOP_EXPR
)
10945 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10946 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10951 /* Put types on constant and invariant SLP children. */
10953 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
10954 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
10956 if (dump_enabled_p ())
10957 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10958 "incompatible vector types for invariants\n");
10962 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10963 vect_model_simple_cost (vinfo
, stmt_info
,
10964 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10965 dts
, ndts
, slp_node
, cost_vec
);
10972 lhs
= gimple_assign_lhs (stmt
);
10973 mask
= vect_create_destination_var (lhs
, mask_type
);
10975 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10976 rhs1
, &vec_oprnds0
, vectype
,
10977 rhs2
, &vec_oprnds1
, vectype
);
10979 std::swap (vec_oprnds0
, vec_oprnds1
);
10981 /* Arguments are ready. Create the new vector stmt. */
10982 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10985 vec_rhs2
= vec_oprnds1
[i
];
10987 new_temp
= make_ssa_name (mask
);
10988 if (bitop1
== NOP_EXPR
)
10990 new_stmt
= gimple_build_assign (new_temp
, code
,
10991 vec_rhs1
, vec_rhs2
);
10992 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10996 if (bitop1
== BIT_NOT_EXPR
)
10997 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10999 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
11001 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11002 if (bitop2
!= NOP_EXPR
)
11004 tree res
= make_ssa_name (mask
);
11005 if (bitop2
== BIT_NOT_EXPR
)
11006 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
11008 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
11010 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11014 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
11016 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
11020 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11022 vec_oprnds0
.release ();
11023 vec_oprnds1
.release ();
11028 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
11029 can handle all live statements in the node. Otherwise return true
11030 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
11031 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
11034 can_vectorize_live_stmts (vec_info
*vinfo
,
11035 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11036 slp_tree slp_node
, slp_instance slp_node_instance
,
11038 stmt_vector_for_cost
*cost_vec
)
11042 stmt_vec_info slp_stmt_info
;
11044 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
11046 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
11047 && !vectorizable_live_operation (vinfo
,
11048 slp_stmt_info
, gsi
, slp_node
,
11049 slp_node_instance
, i
,
11050 vec_stmt_p
, cost_vec
))
11054 else if (STMT_VINFO_LIVE_P (stmt_info
)
11055 && !vectorizable_live_operation (vinfo
, stmt_info
, gsi
,
11056 slp_node
, slp_node_instance
, -1,
11057 vec_stmt_p
, cost_vec
))
11063 /* Make sure the statement is vectorizable. */
11066 vect_analyze_stmt (vec_info
*vinfo
,
11067 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
11068 slp_tree node
, slp_instance node_instance
,
11069 stmt_vector_for_cost
*cost_vec
)
11071 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
11072 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
11074 gimple_seq pattern_def_seq
;
11076 if (dump_enabled_p ())
11077 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
11080 if (gimple_has_volatile_ops (stmt_info
->stmt
))
11081 return opt_result::failure_at (stmt_info
->stmt
,
11083 " stmt has volatile operands: %G\n",
11086 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11088 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
11090 gimple_stmt_iterator si
;
11092 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
11094 stmt_vec_info pattern_def_stmt_info
11095 = vinfo
->lookup_stmt (gsi_stmt (si
));
11096 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
11097 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
11099 /* Analyze def stmt of STMT if it's a pattern stmt. */
11100 if (dump_enabled_p ())
11101 dump_printf_loc (MSG_NOTE
, vect_location
,
11102 "==> examining pattern def statement: %G",
11103 pattern_def_stmt_info
->stmt
);
11106 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
11107 need_to_vectorize
, node
, node_instance
,
11115 /* Skip stmts that do not need to be vectorized. In loops this is expected
11117 - the COND_EXPR which is the loop exit condition
11118 - any LABEL_EXPRs in the loop
11119 - computations that are used only for array indexing or loop control.
11120 In basic blocks we only analyze statements that are a part of some SLP
11121 instance, therefore, all the statements are relevant.
11123 Pattern statement needs to be analyzed instead of the original statement
11124 if the original statement is not relevant. Otherwise, we analyze both
11125 statements. In basic blocks we are called from some SLP instance
11126 traversal, don't analyze pattern stmts instead, the pattern stmts
11127 already will be part of SLP instance. */
11129 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
11130 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
11131 && !STMT_VINFO_LIVE_P (stmt_info
))
11133 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11134 && pattern_stmt_info
11135 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
11136 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
11138 /* Analyze PATTERN_STMT instead of the original stmt. */
11139 stmt_info
= pattern_stmt_info
;
11140 if (dump_enabled_p ())
11141 dump_printf_loc (MSG_NOTE
, vect_location
,
11142 "==> examining pattern statement: %G",
11147 if (dump_enabled_p ())
11148 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
11150 return opt_result::success ();
11153 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11155 && pattern_stmt_info
11156 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
11157 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
11159 /* Analyze PATTERN_STMT too. */
11160 if (dump_enabled_p ())
11161 dump_printf_loc (MSG_NOTE
, vect_location
,
11162 "==> examining pattern statement: %G",
11163 pattern_stmt_info
->stmt
);
11166 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
11167 node_instance
, cost_vec
);
11172 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
11174 case vect_internal_def
:
11177 case vect_reduction_def
:
11178 case vect_nested_cycle
:
11179 gcc_assert (!bb_vinfo
11180 && (relevance
== vect_used_in_outer
11181 || relevance
== vect_used_in_outer_by_reduction
11182 || relevance
== vect_used_by_reduction
11183 || relevance
== vect_unused_in_scope
11184 || relevance
== vect_used_only_live
));
11187 case vect_induction_def
:
11188 case vect_first_order_recurrence
:
11189 gcc_assert (!bb_vinfo
);
11192 case vect_constant_def
:
11193 case vect_external_def
:
11194 case vect_unknown_def_type
:
11196 gcc_unreachable ();
11199 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11201 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (node
);
11203 if (STMT_VINFO_RELEVANT_P (stmt_info
))
11205 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
11206 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
11207 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
11208 *need_to_vectorize
= true;
11211 if (PURE_SLP_STMT (stmt_info
) && !node
)
11213 if (dump_enabled_p ())
11214 dump_printf_loc (MSG_NOTE
, vect_location
,
11215 "handled only by SLP analysis\n");
11216 return opt_result::success ();
11221 && (STMT_VINFO_RELEVANT_P (stmt_info
)
11222 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
11223 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11224 -mveclibabi= takes preference over library functions with
11225 the simd attribute. */
11226 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11227 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
11229 || vectorizable_conversion (vinfo
, stmt_info
,
11230 NULL
, NULL
, node
, cost_vec
)
11231 || vectorizable_operation (vinfo
, stmt_info
,
11232 NULL
, NULL
, node
, cost_vec
)
11233 || vectorizable_assignment (vinfo
, stmt_info
,
11234 NULL
, NULL
, node
, cost_vec
)
11235 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11236 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11237 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11238 node
, node_instance
, cost_vec
)
11239 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11240 NULL
, node
, cost_vec
)
11241 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11242 || vectorizable_condition (vinfo
, stmt_info
,
11243 NULL
, NULL
, node
, cost_vec
)
11244 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11246 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11247 stmt_info
, NULL
, node
)
11248 || vectorizable_recurr (as_a
<loop_vec_info
> (vinfo
),
11249 stmt_info
, NULL
, node
, cost_vec
));
11253 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11254 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
11255 NULL
, NULL
, node
, cost_vec
)
11256 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
11258 || vectorizable_shift (vinfo
, stmt_info
,
11259 NULL
, NULL
, node
, cost_vec
)
11260 || vectorizable_operation (vinfo
, stmt_info
,
11261 NULL
, NULL
, node
, cost_vec
)
11262 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
11264 || vectorizable_load (vinfo
, stmt_info
,
11265 NULL
, NULL
, node
, cost_vec
)
11266 || vectorizable_store (vinfo
, stmt_info
,
11267 NULL
, NULL
, node
, cost_vec
)
11268 || vectorizable_condition (vinfo
, stmt_info
,
11269 NULL
, NULL
, node
, cost_vec
)
11270 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11272 || vectorizable_phi (vinfo
, stmt_info
, NULL
, node
, cost_vec
));
11276 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11279 return opt_result::failure_at (stmt_info
->stmt
,
11281 " relevant stmt not supported: %G",
11284 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11285 need extra handling, except for vectorizable reductions. */
11287 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11288 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
11289 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
11290 stmt_info
, NULL
, node
, node_instance
,
11292 return opt_result::failure_at (stmt_info
->stmt
,
11294 " live stmt not supported: %G",
11297 return opt_result::success ();
11301 /* Function vect_transform_stmt.
11303 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11306 vect_transform_stmt (vec_info
*vinfo
,
11307 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11308 slp_tree slp_node
, slp_instance slp_node_instance
)
11310 bool is_store
= false;
11311 gimple
*vec_stmt
= NULL
;
11314 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
11316 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11318 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (slp_node
);
11320 switch (STMT_VINFO_TYPE (stmt_info
))
11322 case type_demotion_vec_info_type
:
11323 case type_promotion_vec_info_type
:
11324 case type_conversion_vec_info_type
:
11325 done
= vectorizable_conversion (vinfo
, stmt_info
,
11326 gsi
, &vec_stmt
, slp_node
, NULL
);
11330 case induc_vec_info_type
:
11331 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
11332 stmt_info
, &vec_stmt
, slp_node
,
11337 case shift_vec_info_type
:
11338 done
= vectorizable_shift (vinfo
, stmt_info
,
11339 gsi
, &vec_stmt
, slp_node
, NULL
);
11343 case op_vec_info_type
:
11344 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11349 case assignment_vec_info_type
:
11350 done
= vectorizable_assignment (vinfo
, stmt_info
,
11351 gsi
, &vec_stmt
, slp_node
, NULL
);
11355 case load_vec_info_type
:
11356 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11361 case store_vec_info_type
:
11362 done
= vectorizable_store (vinfo
, stmt_info
,
11363 gsi
, &vec_stmt
, slp_node
, NULL
);
11365 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
11367 /* In case of interleaving, the whole chain is vectorized when the
11368 last store in the chain is reached. Store stmts before the last
11369 one are skipped, and there vec_stmt_info shouldn't be freed
11371 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
11372 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
11379 case condition_vec_info_type
:
11380 done
= vectorizable_condition (vinfo
, stmt_info
,
11381 gsi
, &vec_stmt
, slp_node
, NULL
);
11385 case comparison_vec_info_type
:
11386 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11391 case call_vec_info_type
:
11392 done
= vectorizable_call (vinfo
, stmt_info
,
11393 gsi
, &vec_stmt
, slp_node
, NULL
);
11396 case call_simd_clone_vec_info_type
:
11397 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11401 case reduc_vec_info_type
:
11402 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11403 gsi
, &vec_stmt
, slp_node
);
11407 case cycle_phi_info_type
:
11408 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11409 &vec_stmt
, slp_node
, slp_node_instance
);
11413 case lc_phi_info_type
:
11414 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11415 stmt_info
, &vec_stmt
, slp_node
);
11419 case recurr_info_type
:
11420 done
= vectorizable_recurr (as_a
<loop_vec_info
> (vinfo
),
11421 stmt_info
, &vec_stmt
, slp_node
, NULL
);
11425 case phi_info_type
:
11426 done
= vectorizable_phi (vinfo
, stmt_info
, &vec_stmt
, slp_node
, NULL
);
11431 if (!STMT_VINFO_LIVE_P (stmt_info
))
11433 if (dump_enabled_p ())
11434 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11435 "stmt not supported.\n");
11436 gcc_unreachable ();
11441 if (!slp_node
&& vec_stmt
)
11442 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
11444 if (STMT_VINFO_TYPE (stmt_info
) != store_vec_info_type
)
11446 /* Handle stmts whose DEF is used outside the loop-nest that is
11447 being vectorized. */
11448 done
= can_vectorize_live_stmts (vinfo
, stmt_info
, gsi
, slp_node
,
11449 slp_node_instance
, true, NULL
);
11454 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11460 /* Remove a group of stores (for SLP or interleaving), free their
11464 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
11466 stmt_vec_info next_stmt_info
= first_stmt_info
;
11468 while (next_stmt_info
)
11470 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
11471 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
11472 /* Free the attached stmt_vec_info and remove the stmt. */
11473 vinfo
->remove_stmt (next_stmt_info
);
11474 next_stmt_info
= tmp
;
11478 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11479 elements of type SCALAR_TYPE, or null if the target doesn't support
11482 If NUNITS is zero, return a vector type that contains elements of
11483 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11485 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11486 for this vectorization region and want to "autodetect" the best choice.
11487 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11488 and we want the new type to be interoperable with it. PREVAILING_MODE
11489 in this case can be a scalar integer mode or a vector mode; when it
11490 is a vector mode, the function acts like a tree-level version of
11491 related_vector_mode. */
11494 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
11495 tree scalar_type
, poly_uint64 nunits
)
11497 tree orig_scalar_type
= scalar_type
;
11498 scalar_mode inner_mode
;
11499 machine_mode simd_mode
;
11502 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
11503 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
11506 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
11508 /* Interoperability between modes requires one to be a constant multiple
11509 of the other, so that the number of vectors required for each operation
11510 is a compile-time constant. */
11511 if (prevailing_mode
!= VOIDmode
11512 && !constant_multiple_p (nunits
* nbytes
,
11513 GET_MODE_SIZE (prevailing_mode
))
11514 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode
),
11518 /* For vector types of elements whose mode precision doesn't
11519 match their types precision we use a element type of mode
11520 precision. The vectorization routines will have to make sure
11521 they support the proper result truncation/extension.
11522 We also make sure to build vector types with INTEGER_TYPE
11523 component type only. */
11524 if (INTEGRAL_TYPE_P (scalar_type
)
11525 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
11526 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
11527 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
11528 TYPE_UNSIGNED (scalar_type
));
11530 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11531 When the component mode passes the above test simply use a type
11532 corresponding to that mode. The theory is that any use that
11533 would cause problems with this will disable vectorization anyway. */
11534 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
11535 && !INTEGRAL_TYPE_P (scalar_type
))
11536 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
11538 /* We can't build a vector type of elements with alignment bigger than
11540 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
11541 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
11542 TYPE_UNSIGNED (scalar_type
));
11544 /* If we felt back to using the mode fail if there was
11545 no scalar type for it. */
11546 if (scalar_type
== NULL_TREE
)
11549 /* If no prevailing mode was supplied, use the mode the target prefers.
11550 Otherwise lookup a vector mode based on the prevailing mode. */
11551 if (prevailing_mode
== VOIDmode
)
11553 gcc_assert (known_eq (nunits
, 0U));
11554 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
11555 if (SCALAR_INT_MODE_P (simd_mode
))
11557 /* Traditional behavior is not to take the integer mode
11558 literally, but simply to use it as a way of determining
11559 the vector size. It is up to mode_for_vector to decide
11560 what the TYPE_MODE should be.
11562 Note that nunits == 1 is allowed in order to support single
11563 element vector types. */
11564 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
11565 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11569 else if (SCALAR_INT_MODE_P (prevailing_mode
)
11570 || !related_vector_mode (prevailing_mode
,
11571 inner_mode
, nunits
).exists (&simd_mode
))
11573 /* Fall back to using mode_for_vector, mostly in the hope of being
11574 able to use an integer mode. */
11575 if (known_eq (nunits
, 0U)
11576 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
11579 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11583 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
11585 /* In cases where the mode was chosen by mode_for_vector, check that
11586 the target actually supports the chosen mode, or that it at least
11587 allows the vector mode to be replaced by a like-sized integer. */
11588 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11589 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11592 /* Re-attach the address-space qualifier if we canonicalized the scalar
11594 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11595 return build_qualified_type
11596 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11601 /* Function get_vectype_for_scalar_type.
11603 Returns the vector type corresponding to SCALAR_TYPE as supported
11604 by the target. If GROUP_SIZE is nonzero and we're performing BB
11605 vectorization, make sure that the number of elements in the vector
11606 is no bigger than GROUP_SIZE. */
11609 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11610 unsigned int group_size
)
11612 /* For BB vectorization, we should always have a group size once we've
11613 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11614 are tentative requests during things like early data reference
11615 analysis and pattern recognition. */
11616 if (is_a
<bb_vec_info
> (vinfo
))
11617 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11621 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11623 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
11624 vinfo
->vector_mode
= TYPE_MODE (vectype
);
11626 /* Register the natural choice of vector type, before the group size
11627 has been applied. */
11629 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
11631 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11632 try again with an explicit number of elements. */
11635 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
11637 /* Start with the biggest number of units that fits within
11638 GROUP_SIZE and halve it until we find a valid vector type.
11639 Usually either the first attempt will succeed or all will
11640 fail (in the latter case because GROUP_SIZE is too small
11641 for the target), but it's possible that a target could have
11642 a hole between supported vector types.
11644 If GROUP_SIZE is not a power of 2, this has the effect of
11645 trying the largest power of 2 that fits within the group,
11646 even though the group is not a multiple of that vector size.
11647 The BB vectorizer will then try to carve up the group into
11649 unsigned int nunits
= 1 << floor_log2 (group_size
);
11652 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11653 scalar_type
, nunits
);
11656 while (nunits
> 1 && !vectype
);
11662 /* Return the vector type corresponding to SCALAR_TYPE as supported
11663 by the target. NODE, if nonnull, is the SLP tree node that will
11664 use the returned vector type. */
11667 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
11669 unsigned int group_size
= 0;
11671 group_size
= SLP_TREE_LANES (node
);
11672 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11675 /* Function get_mask_type_for_scalar_type.
11677 Returns the mask type corresponding to a result of comparison
11678 of vectors of specified SCALAR_TYPE as supported by target.
11679 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11680 make sure that the number of elements in the vector is no bigger
11681 than GROUP_SIZE. */
11684 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11685 unsigned int group_size
)
11687 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11692 return truth_type_for (vectype
);
11695 /* Function get_same_sized_vectype
11697 Returns a vector type corresponding to SCALAR_TYPE of size
11698 VECTOR_TYPE if supported by the target. */
11701 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11703 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11704 return truth_type_for (vector_type
);
11706 poly_uint64 nunits
;
11707 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
11708 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
11711 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
11712 scalar_type
, nunits
);
11715 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11716 would not change the chosen vector modes. */
11719 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
11721 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
11722 i
!= vinfo
->used_vector_modes
.end (); ++i
)
11723 if (!VECTOR_MODE_P (*i
)
11724 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
11729 /* Function vect_is_simple_use.
11732 VINFO - the vect info of the loop or basic block that is being vectorized.
11733 OPERAND - operand in the loop or bb.
11735 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11736 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11737 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11738 the definition could be anywhere in the function
11739 DT - the type of definition
11741 Returns whether a stmt with OPERAND can be vectorized.
11742 For loops, supportable operands are constants, loop invariants, and operands
11743 that are defined by the current iteration of the loop. Unsupportable
11744 operands are those that are defined by a previous iteration of the loop (as
11745 is the case in reduction/induction computations).
11746 For basic blocks, supportable operands are constants and bb invariants.
11747 For now, operands defined outside the basic block are not supported. */
11750 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11751 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11753 if (def_stmt_info_out
)
11754 *def_stmt_info_out
= NULL
;
11756 *def_stmt_out
= NULL
;
11757 *dt
= vect_unknown_def_type
;
11759 if (dump_enabled_p ())
11761 dump_printf_loc (MSG_NOTE
, vect_location
,
11762 "vect_is_simple_use: operand ");
11763 if (TREE_CODE (operand
) == SSA_NAME
11764 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11765 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11767 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11770 if (CONSTANT_CLASS_P (operand
))
11771 *dt
= vect_constant_def
;
11772 else if (is_gimple_min_invariant (operand
))
11773 *dt
= vect_external_def
;
11774 else if (TREE_CODE (operand
) != SSA_NAME
)
11775 *dt
= vect_unknown_def_type
;
11776 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11777 *dt
= vect_external_def
;
11780 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11781 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11783 *dt
= vect_external_def
;
11786 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11787 def_stmt
= stmt_vinfo
->stmt
;
11788 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11789 if (def_stmt_info_out
)
11790 *def_stmt_info_out
= stmt_vinfo
;
11793 *def_stmt_out
= def_stmt
;
11796 if (dump_enabled_p ())
11798 dump_printf (MSG_NOTE
, ", type of def: ");
11801 case vect_uninitialized_def
:
11802 dump_printf (MSG_NOTE
, "uninitialized\n");
11804 case vect_constant_def
:
11805 dump_printf (MSG_NOTE
, "constant\n");
11807 case vect_external_def
:
11808 dump_printf (MSG_NOTE
, "external\n");
11810 case vect_internal_def
:
11811 dump_printf (MSG_NOTE
, "internal\n");
11813 case vect_induction_def
:
11814 dump_printf (MSG_NOTE
, "induction\n");
11816 case vect_reduction_def
:
11817 dump_printf (MSG_NOTE
, "reduction\n");
11819 case vect_double_reduction_def
:
11820 dump_printf (MSG_NOTE
, "double reduction\n");
11822 case vect_nested_cycle
:
11823 dump_printf (MSG_NOTE
, "nested cycle\n");
11825 case vect_first_order_recurrence
:
11826 dump_printf (MSG_NOTE
, "first order recurrence\n");
11828 case vect_unknown_def_type
:
11829 dump_printf (MSG_NOTE
, "unknown\n");
11834 if (*dt
== vect_unknown_def_type
)
11836 if (dump_enabled_p ())
11837 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11838 "Unsupported pattern.\n");
11845 /* Function vect_is_simple_use.
11847 Same as vect_is_simple_use but also determines the vector operand
11848 type of OPERAND and stores it to *VECTYPE. If the definition of
11849 OPERAND is vect_uninitialized_def, vect_constant_def or
11850 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11851 is responsible to compute the best suited vector type for the
11855 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11856 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11857 gimple
**def_stmt_out
)
11859 stmt_vec_info def_stmt_info
;
11861 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11865 *def_stmt_out
= def_stmt
;
11866 if (def_stmt_info_out
)
11867 *def_stmt_info_out
= def_stmt_info
;
11869 /* Now get a vector type if the def is internal, otherwise supply
11870 NULL_TREE and leave it up to the caller to figure out a proper
11871 type for the use stmt. */
11872 if (*dt
== vect_internal_def
11873 || *dt
== vect_induction_def
11874 || *dt
== vect_reduction_def
11875 || *dt
== vect_double_reduction_def
11876 || *dt
== vect_nested_cycle
11877 || *dt
== vect_first_order_recurrence
)
11879 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11880 gcc_assert (*vectype
!= NULL_TREE
);
11881 if (dump_enabled_p ())
11882 dump_printf_loc (MSG_NOTE
, vect_location
,
11883 "vect_is_simple_use: vectype %T\n", *vectype
);
11885 else if (*dt
== vect_uninitialized_def
11886 || *dt
== vect_constant_def
11887 || *dt
== vect_external_def
)
11888 *vectype
= NULL_TREE
;
11890 gcc_unreachable ();
11895 /* Function vect_is_simple_use.
11897 Same as vect_is_simple_use but determines the operand by operand
11898 position OPERAND from either STMT or SLP_NODE, filling in *OP
11899 and *SLP_DEF (when SLP_NODE is not NULL). */
11902 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
11903 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
11904 enum vect_def_type
*dt
,
11905 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
11909 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
11911 *vectype
= SLP_TREE_VECTYPE (child
);
11912 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
11914 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
11915 return vect_is_simple_use (*op
, vinfo
, dt
, def_stmt_info_out
);
11919 if (def_stmt_info_out
)
11920 *def_stmt_info_out
= NULL
;
11921 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
11922 *dt
= SLP_TREE_DEF_TYPE (child
);
11929 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
11931 if (gimple_assign_rhs_code (ass
) == COND_EXPR
11932 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
11935 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
11937 *op
= gimple_op (ass
, operand
);
11939 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
11940 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
11942 *op
= gimple_op (ass
, operand
+ 1);
11944 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
11945 *op
= gimple_call_arg (call
, operand
);
11947 gcc_unreachable ();
11948 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
11952 /* If OP is not NULL and is external or constant update its vector
11953 type with VECTYPE. Returns true if successful or false if not,
11954 for example when conflicting vector types are present. */
11957 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
11959 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
11961 if (SLP_TREE_VECTYPE (op
))
11962 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
11963 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
11964 should be handled by patters. Allow vect_constant_def for now. */
11965 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
11966 && SLP_TREE_DEF_TYPE (op
) == vect_external_def
)
11968 SLP_TREE_VECTYPE (op
) = vectype
;
11972 /* Function supportable_widening_operation
11974 Check whether an operation represented by the code CODE is a
11975 widening operation that is supported by the target platform in
11976 vector form (i.e., when operating on arguments of type VECTYPE_IN
11977 producing a result of type VECTYPE_OUT).
11979 Widening operations we currently support are NOP (CONVERT), FLOAT,
11980 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11981 are supported by the target platform either directly (via vector
11982 tree-codes), or via target builtins.
11985 - CODE1 and CODE2 are codes of vector operations to be used when
11986 vectorizing the operation, if available.
11987 - MULTI_STEP_CVT determines the number of required intermediate steps in
11988 case of multi-step conversion (like char->short->int - in that case
11989 MULTI_STEP_CVT will be 1).
11990 - INTERM_TYPES contains the intermediate type required to perform the
11991 widening operation (short in the above example). */
11994 supportable_widening_operation (vec_info
*vinfo
,
11995 enum tree_code code
, stmt_vec_info stmt_info
,
11996 tree vectype_out
, tree vectype_in
,
11997 enum tree_code
*code1
, enum tree_code
*code2
,
11998 int *multi_step_cvt
,
11999 vec
<tree
> *interm_types
)
12001 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
12002 class loop
*vect_loop
= NULL
;
12003 machine_mode vec_mode
;
12004 enum insn_code icode1
, icode2
;
12005 optab optab1
, optab2
;
12006 tree vectype
= vectype_in
;
12007 tree wide_vectype
= vectype_out
;
12008 enum tree_code c1
, c2
;
12010 tree prev_type
, intermediate_type
;
12011 machine_mode intermediate_mode
, prev_mode
;
12012 optab optab3
, optab4
;
12014 *multi_step_cvt
= 0;
12016 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
12020 case WIDEN_MULT_EXPR
:
12021 /* The result of a vectorized widening operation usually requires
12022 two vectors (because the widened results do not fit into one vector).
12023 The generated vector results would normally be expected to be
12024 generated in the same order as in the original scalar computation,
12025 i.e. if 8 results are generated in each vector iteration, they are
12026 to be organized as follows:
12027 vect1: [res1,res2,res3,res4],
12028 vect2: [res5,res6,res7,res8].
12030 However, in the special case that the result of the widening
12031 operation is used in a reduction computation only, the order doesn't
12032 matter (because when vectorizing a reduction we change the order of
12033 the computation). Some targets can take advantage of this and
12034 generate more efficient code. For example, targets like Altivec,
12035 that support widen_mult using a sequence of {mult_even,mult_odd}
12036 generate the following vectors:
12037 vect1: [res1,res3,res5,res7],
12038 vect2: [res2,res4,res6,res8].
12040 When vectorizing outer-loops, we execute the inner-loop sequentially
12041 (each vectorized inner-loop iteration contributes to VF outer-loop
12042 iterations in parallel). We therefore don't allow to change the
12043 order of the computation in the inner-loop during outer-loop
12045 /* TODO: Another case in which order doesn't *really* matter is when we
12046 widen and then contract again, e.g. (short)((int)x * y >> 8).
12047 Normally, pack_trunc performs an even/odd permute, whereas the
12048 repack from an even/odd expansion would be an interleave, which
12049 would be significantly simpler for e.g. AVX2. */
12050 /* In any case, in order to avoid duplicating the code below, recurse
12051 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
12052 are properly set up for the caller. If we fail, we'll continue with
12053 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
12055 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
12056 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
12057 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
12058 stmt_info
, vectype_out
,
12059 vectype_in
, code1
, code2
,
12060 multi_step_cvt
, interm_types
))
12062 /* Elements in a vector with vect_used_by_reduction property cannot
12063 be reordered if the use chain with this property does not have the
12064 same operation. One such an example is s += a * b, where elements
12065 in a and b cannot be reordered. Here we check if the vector defined
12066 by STMT is only directly used in the reduction statement. */
12067 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
12068 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
12070 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
12073 c1
= VEC_WIDEN_MULT_LO_EXPR
;
12074 c2
= VEC_WIDEN_MULT_HI_EXPR
;
12077 case DOT_PROD_EXPR
:
12078 c1
= DOT_PROD_EXPR
;
12079 c2
= DOT_PROD_EXPR
;
12087 case VEC_WIDEN_MULT_EVEN_EXPR
:
12088 /* Support the recursion induced just above. */
12089 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
12090 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
12093 case WIDEN_LSHIFT_EXPR
:
12094 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
12095 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
12098 case WIDEN_PLUS_EXPR
:
12099 c1
= VEC_WIDEN_PLUS_LO_EXPR
;
12100 c2
= VEC_WIDEN_PLUS_HI_EXPR
;
12103 case WIDEN_MINUS_EXPR
:
12104 c1
= VEC_WIDEN_MINUS_LO_EXPR
;
12105 c2
= VEC_WIDEN_MINUS_HI_EXPR
;
12109 c1
= VEC_UNPACK_LO_EXPR
;
12110 c2
= VEC_UNPACK_HI_EXPR
;
12114 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
12115 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
12118 case FIX_TRUNC_EXPR
:
12119 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
12120 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
12124 gcc_unreachable ();
12127 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
12128 std::swap (c1
, c2
);
12130 if (code
== FIX_TRUNC_EXPR
)
12132 /* The signedness is determined from output operand. */
12133 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12134 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
12136 else if (CONVERT_EXPR_CODE_P (code
)
12137 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
12138 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12139 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
12140 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
12142 /* If the input and result modes are the same, a different optab
12143 is needed where we pass in the number of units in vectype. */
12144 optab1
= vec_unpacks_sbool_lo_optab
;
12145 optab2
= vec_unpacks_sbool_hi_optab
;
12149 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12150 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
12153 if (!optab1
|| !optab2
)
12156 vec_mode
= TYPE_MODE (vectype
);
12157 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
12158 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
12164 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12165 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12167 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12169 /* For scalar masks we may have different boolean
12170 vector types having the same QImode. Thus we
12171 add additional check for elements number. */
12172 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
12173 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12177 /* Check if it's a multi-step conversion that can be done using intermediate
12180 prev_type
= vectype
;
12181 prev_mode
= vec_mode
;
12183 if (!CONVERT_EXPR_CODE_P (code
))
12186 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12187 intermediate steps in promotion sequence. We try
12188 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12190 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12191 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12193 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12194 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12196 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
12197 else if (VECTOR_MODE_P (intermediate_mode
))
12199 tree intermediate_element_type
12200 = lang_hooks
.types
.type_for_mode (GET_MODE_INNER (intermediate_mode
),
12201 TYPE_UNSIGNED (prev_type
));
12203 = build_vector_type_for_mode (intermediate_element_type
,
12204 intermediate_mode
);
12208 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
12209 TYPE_UNSIGNED (prev_type
));
12211 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12212 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12213 && intermediate_mode
== prev_mode
12214 && SCALAR_INT_MODE_P (prev_mode
))
12216 /* If the input and result modes are the same, a different optab
12217 is needed where we pass in the number of units in vectype. */
12218 optab3
= vec_unpacks_sbool_lo_optab
;
12219 optab4
= vec_unpacks_sbool_hi_optab
;
12223 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12224 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
12227 if (!optab3
|| !optab4
12228 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
12229 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12230 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
12231 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
12232 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
12233 == CODE_FOR_nothing
)
12234 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
12235 == CODE_FOR_nothing
))
12238 interm_types
->quick_push (intermediate_type
);
12239 (*multi_step_cvt
)++;
12241 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12242 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12244 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12246 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
12247 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12251 prev_type
= intermediate_type
;
12252 prev_mode
= intermediate_mode
;
12255 interm_types
->release ();
12260 /* Function supportable_narrowing_operation
12262 Check whether an operation represented by the code CODE is a
12263 narrowing operation that is supported by the target platform in
12264 vector form (i.e., when operating on arguments of type VECTYPE_IN
12265 and producing a result of type VECTYPE_OUT).
12267 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12268 and FLOAT. This function checks if these operations are supported by
12269 the target platform directly via vector tree-codes.
12272 - CODE1 is the code of a vector operation to be used when
12273 vectorizing the operation, if available.
12274 - MULTI_STEP_CVT determines the number of required intermediate steps in
12275 case of multi-step conversion (like int->short->char - in that case
12276 MULTI_STEP_CVT will be 1).
12277 - INTERM_TYPES contains the intermediate type required to perform the
12278 narrowing operation (short in the above example). */
12281 supportable_narrowing_operation (enum tree_code code
,
12282 tree vectype_out
, tree vectype_in
,
12283 enum tree_code
*code1
, int *multi_step_cvt
,
12284 vec
<tree
> *interm_types
)
12286 machine_mode vec_mode
;
12287 enum insn_code icode1
;
12288 optab optab1
, interm_optab
;
12289 tree vectype
= vectype_in
;
12290 tree narrow_vectype
= vectype_out
;
12292 tree intermediate_type
, prev_type
;
12293 machine_mode intermediate_mode
, prev_mode
;
12295 unsigned HOST_WIDE_INT n_elts
;
12298 *multi_step_cvt
= 0;
12302 c1
= VEC_PACK_TRUNC_EXPR
;
12303 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
12304 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12305 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
))
12306 && TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&n_elts
)
12307 && n_elts
< BITS_PER_UNIT
)
12308 optab1
= vec_pack_sbool_trunc_optab
;
12310 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12313 case FIX_TRUNC_EXPR
:
12314 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
12315 /* The signedness is determined from output operand. */
12316 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12320 c1
= VEC_PACK_FLOAT_EXPR
;
12321 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12325 gcc_unreachable ();
12331 vec_mode
= TYPE_MODE (vectype
);
12332 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
12337 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12339 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12341 /* For scalar masks we may have different boolean
12342 vector types having the same QImode. Thus we
12343 add additional check for elements number. */
12344 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
12345 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12349 if (code
== FLOAT_EXPR
)
12352 /* Check if it's a multi-step conversion that can be done using intermediate
12354 prev_mode
= vec_mode
;
12355 prev_type
= vectype
;
12356 if (code
== FIX_TRUNC_EXPR
)
12357 uns
= TYPE_UNSIGNED (vectype_out
);
12359 uns
= TYPE_UNSIGNED (vectype
);
12361 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12362 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12363 costly than signed. */
12364 if (code
== FIX_TRUNC_EXPR
&& uns
)
12366 enum insn_code icode2
;
12369 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
12371 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12372 if (interm_optab
!= unknown_optab
12373 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
12374 && insn_data
[icode1
].operand
[0].mode
12375 == insn_data
[icode2
].operand
[0].mode
)
12378 optab1
= interm_optab
;
12383 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12384 intermediate steps in promotion sequence. We try
12385 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12386 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12387 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12389 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12390 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12392 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
12395 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
12396 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12397 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12398 && SCALAR_INT_MODE_P (prev_mode
)
12399 && TYPE_VECTOR_SUBPARTS (intermediate_type
).is_constant (&n_elts
)
12400 && n_elts
< BITS_PER_UNIT
)
12401 interm_optab
= vec_pack_sbool_trunc_optab
;
12404 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
12407 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
12408 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12409 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
12410 == CODE_FOR_nothing
))
12413 interm_types
->quick_push (intermediate_type
);
12414 (*multi_step_cvt
)++;
12416 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12418 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12420 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
12421 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12425 prev_mode
= intermediate_mode
;
12426 prev_type
= intermediate_type
;
12427 optab1
= interm_optab
;
12430 interm_types
->release ();
12434 /* Generate and return a vector mask of MASK_TYPE such that
12435 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12436 Add the statements to SEQ. */
12439 vect_gen_while (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12440 tree end_index
, const char *name
)
12442 tree cmp_type
= TREE_TYPE (start_index
);
12443 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
12444 cmp_type
, mask_type
,
12445 OPTIMIZE_FOR_SPEED
));
12446 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
12447 start_index
, end_index
,
12448 build_zero_cst (mask_type
));
12451 tmp
= make_temp_ssa_name (mask_type
, NULL
, name
);
12453 tmp
= make_ssa_name (mask_type
);
12454 gimple_call_set_lhs (call
, tmp
);
12455 gimple_seq_add_stmt (seq
, call
);
12459 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12460 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12463 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12466 tree tmp
= vect_gen_while (seq
, mask_type
, start_index
, end_index
);
12467 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
12470 /* Try to compute the vector types required to vectorize STMT_INFO,
12471 returning true on success and false if vectorization isn't possible.
12472 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12473 take sure that the number of elements in the vectors is no bigger
12478 - Set *STMT_VECTYPE_OUT to:
12479 - NULL_TREE if the statement doesn't need to be vectorized;
12480 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12482 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12483 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12484 statement does not help to determine the overall number of units. */
12487 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12488 tree
*stmt_vectype_out
,
12489 tree
*nunits_vectype_out
,
12490 unsigned int group_size
)
12492 gimple
*stmt
= stmt_info
->stmt
;
12494 /* For BB vectorization, we should always have a group size once we've
12495 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12496 are tentative requests during things like early data reference
12497 analysis and pattern recognition. */
12498 if (is_a
<bb_vec_info
> (vinfo
))
12499 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
12503 *stmt_vectype_out
= NULL_TREE
;
12504 *nunits_vectype_out
= NULL_TREE
;
12506 if (gimple_get_lhs (stmt
) == NULL_TREE
12507 /* MASK_STORE has no lhs, but is ok. */
12508 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12510 if (is_a
<gcall
*> (stmt
))
12512 /* Ignore calls with no lhs. These must be calls to
12513 #pragma omp simd functions, and what vectorization factor
12514 it really needs can't be determined until
12515 vectorizable_simd_clone_call. */
12516 if (dump_enabled_p ())
12517 dump_printf_loc (MSG_NOTE
, vect_location
,
12518 "defer to SIMD clone analysis.\n");
12519 return opt_result::success ();
12522 return opt_result::failure_at (stmt
,
12523 "not vectorized: irregular stmt.%G", stmt
);
12527 tree scalar_type
= NULL_TREE
;
12528 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
12530 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12531 if (dump_enabled_p ())
12532 dump_printf_loc (MSG_NOTE
, vect_location
,
12533 "precomputed vectype: %T\n", vectype
);
12535 else if (vect_use_mask_type_p (stmt_info
))
12537 unsigned int precision
= stmt_info
->mask_precision
;
12538 scalar_type
= build_nonstandard_integer_type (precision
, 1);
12539 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
12541 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
12542 " data-type %T\n", scalar_type
);
12543 if (dump_enabled_p ())
12544 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12548 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
12549 scalar_type
= TREE_TYPE (DR_REF (dr
));
12550 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12551 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
12553 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
12555 if (dump_enabled_p ())
12558 dump_printf_loc (MSG_NOTE
, vect_location
,
12559 "get vectype for scalar type (group size %d):"
12560 " %T\n", group_size
, scalar_type
);
12562 dump_printf_loc (MSG_NOTE
, vect_location
,
12563 "get vectype for scalar type: %T\n", scalar_type
);
12565 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
12567 return opt_result::failure_at (stmt
,
12569 " unsupported data-type %T\n",
12572 if (dump_enabled_p ())
12573 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12576 if (scalar_type
&& VECTOR_MODE_P (TYPE_MODE (scalar_type
)))
12577 return opt_result::failure_at (stmt
,
12578 "not vectorized: vector stmt in loop:%G",
12581 *stmt_vectype_out
= vectype
;
12583 /* Don't try to compute scalar types if the stmt produces a boolean
12584 vector; use the existing vector type instead. */
12585 tree nunits_vectype
= vectype
;
12586 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12588 /* The number of units is set according to the smallest scalar
12589 type (or the largest vector size, but we only support one
12590 vector size per vectorization). */
12591 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
12592 TREE_TYPE (vectype
));
12593 if (scalar_type
!= TREE_TYPE (vectype
))
12595 if (dump_enabled_p ())
12596 dump_printf_loc (MSG_NOTE
, vect_location
,
12597 "get vectype for smallest scalar type: %T\n",
12599 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12601 if (!nunits_vectype
)
12602 return opt_result::failure_at
12603 (stmt
, "not vectorized: unsupported data-type %T\n",
12605 if (dump_enabled_p ())
12606 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
12611 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
12612 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)))
12613 return opt_result::failure_at (stmt
,
12614 "Not vectorized: Incompatible number "
12615 "of vector subparts between %T and %T\n",
12616 nunits_vectype
, *stmt_vectype_out
);
12618 if (dump_enabled_p ())
12620 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12621 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12622 dump_printf (MSG_NOTE
, "\n");
12625 *nunits_vectype_out
= nunits_vectype
;
12626 return opt_result::success ();
12629 /* Generate and return statement sequence that sets vector length LEN that is:
12631 min_of_start_and_end = min (START_INDEX, END_INDEX);
12632 left_len = END_INDEX - min_of_start_and_end;
12633 rhs = min (left_len, LEN_LIMIT);
12636 Note: the cost of the code generated by this function is modeled
12637 by vect_estimate_min_profitable_iters, so changes here may need
12638 corresponding changes there. */
12641 vect_gen_len (tree len
, tree start_index
, tree end_index
, tree len_limit
)
12643 gimple_seq stmts
= NULL
;
12644 tree len_type
= TREE_TYPE (len
);
12645 gcc_assert (TREE_TYPE (start_index
) == len_type
);
12647 tree min
= gimple_build (&stmts
, MIN_EXPR
, len_type
, start_index
, end_index
);
12648 tree left_len
= gimple_build (&stmts
, MINUS_EXPR
, len_type
, end_index
, min
);
12649 tree rhs
= gimple_build (&stmts
, MIN_EXPR
, len_type
, left_len
, len_limit
);
12650 gimple
* stmt
= gimple_build_assign (len
, rhs
);
12651 gimple_seq_add_stmt (&stmts
, stmt
);