1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
,
95 stmt_vec_info stmt_info
, slp_tree node
,
96 tree vectype
, int misalign
,
97 enum vect_cost_model_location where
)
99 if ((kind
== vector_load
|| kind
== unaligned_load
)
100 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
101 kind
= vector_gather_load
;
102 if ((kind
== vector_store
|| kind
== unaligned_store
)
103 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
104 kind
= vector_scatter_store
;
106 stmt_info_for_cost si
107 = { count
, kind
, where
, stmt_info
, node
, vectype
, misalign
};
108 body_cost_vec
->safe_push (si
);
111 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
115 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
116 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
117 tree vectype
, int misalign
,
118 enum vect_cost_model_location where
)
120 return record_stmt_cost (body_cost_vec
, count
, kind
, stmt_info
, NULL
,
121 vectype
, misalign
, where
);
125 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
126 enum vect_cost_for_stmt kind
, slp_tree node
,
127 tree vectype
, int misalign
,
128 enum vect_cost_model_location where
)
130 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, node
,
131 vectype
, misalign
, where
);
135 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
136 enum vect_cost_for_stmt kind
,
137 enum vect_cost_model_location where
)
139 gcc_assert (kind
== cond_branch_taken
|| kind
== cond_branch_not_taken
140 || kind
== scalar_stmt
);
141 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, NULL
,
142 NULL_TREE
, 0, where
);
145 /* Return a variable of type ELEM_TYPE[NELEMS]. */
148 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
150 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
154 /* ARRAY is an array of vectors created by create_vector_array.
155 Return an SSA_NAME for the vector in index N. The reference
156 is part of the vectorization of STMT_INFO and the vector is associated
157 with scalar destination SCALAR_DEST. */
160 read_vector_array (vec_info
*vinfo
,
161 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
162 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
164 tree vect_type
, vect
, vect_name
, array_ref
;
167 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
168 vect_type
= TREE_TYPE (TREE_TYPE (array
));
169 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
170 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
171 build_int_cst (size_type_node
, n
),
172 NULL_TREE
, NULL_TREE
);
174 new_stmt
= gimple_build_assign (vect
, array_ref
);
175 vect_name
= make_ssa_name (vect
, new_stmt
);
176 gimple_assign_set_lhs (new_stmt
, vect_name
);
177 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
182 /* ARRAY is an array of vectors created by create_vector_array.
183 Emit code to store SSA_NAME VECT in index N of the array.
184 The store is part of the vectorization of STMT_INFO. */
187 write_vector_array (vec_info
*vinfo
,
188 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
189 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
194 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
195 build_int_cst (size_type_node
, n
),
196 NULL_TREE
, NULL_TREE
);
198 new_stmt
= gimple_build_assign (array_ref
, vect
);
199 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
202 /* PTR is a pointer to an array of type TYPE. Return a representation
203 of *PTR. The memory reference replaces those in FIRST_DR
207 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
211 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
212 /* Arrays have the same alignment as their type. */
213 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
217 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
218 Emit the clobber before *GSI. */
221 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
222 gimple_stmt_iterator
*gsi
, tree var
)
224 tree clobber
= build_clobber (TREE_TYPE (var
));
225 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
226 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
229 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
231 /* Function vect_mark_relevant.
233 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
236 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
237 enum vect_relevant relevant
, bool live_p
)
239 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
240 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
242 if (dump_enabled_p ())
243 dump_printf_loc (MSG_NOTE
, vect_location
,
244 "mark relevant %d, live %d: %G", relevant
, live_p
,
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
253 /* This is the last stmt in a sequence that was detected as a
254 pattern that can potentially be vectorized. Don't mark the stmt
255 as relevant/live because it's not going to be vectorized.
256 Instead mark the pattern-stmt that replaces it. */
258 if (dump_enabled_p ())
259 dump_printf_loc (MSG_NOTE
, vect_location
,
260 "last stmt in pattern. don't mark"
261 " relevant/live.\n");
262 stmt_vec_info old_stmt_info
= stmt_info
;
263 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
264 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
265 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
266 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
269 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
270 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
271 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
273 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
274 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
276 if (dump_enabled_p ())
277 dump_printf_loc (MSG_NOTE
, vect_location
,
278 "already marked relevant/live.\n");
282 worklist
->safe_push (stmt_info
);
286 /* Function is_simple_and_all_uses_invariant
288 Return true if STMT_INFO is simple and all uses of it are invariant. */
291 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
292 loop_vec_info loop_vinfo
)
297 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
301 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
303 enum vect_def_type dt
= vect_uninitialized_def
;
305 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
309 "use not simple.\n");
313 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
319 /* Function vect_stmt_relevant_p.
321 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
322 is "relevant for vectorization".
324 A stmt is considered "relevant for vectorization" if:
325 - it has uses outside the loop.
326 - it has vdefs (it alters memory).
327 - control stmts in the loop (except for the exit condition).
329 CHECKME: what other side effects would the vectorizer allow? */
332 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
333 enum vect_relevant
*relevant
, bool *live_p
)
335 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
337 imm_use_iterator imm_iter
;
341 *relevant
= vect_unused_in_scope
;
344 /* cond stmt other than loop exit cond. */
345 if (is_ctrl_stmt (stmt_info
->stmt
)
346 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
347 *relevant
= vect_used_in_scope
;
349 /* changing memory. */
350 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
351 if (gimple_vdef (stmt_info
->stmt
)
352 && !gimple_clobber_p (stmt_info
->stmt
))
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE
, vect_location
,
356 "vec_stmt_relevant_p: stmt has vdefs.\n");
357 *relevant
= vect_used_in_scope
;
360 /* uses outside the loop. */
361 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
363 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
365 basic_block bb
= gimple_bb (USE_STMT (use_p
));
366 if (!flow_bb_inside_loop_p (loop
, bb
))
368 if (is_gimple_debug (USE_STMT (use_p
)))
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE
, vect_location
,
373 "vec_stmt_relevant_p: used out of loop.\n");
375 /* We expect all such uses to be in the loop exit phis
376 (because of loop closed form) */
377 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
378 gcc_assert (bb
== single_exit (loop
)->dest
);
385 if (*live_p
&& *relevant
== vect_unused_in_scope
386 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE
, vect_location
,
390 "vec_stmt_relevant_p: stmt live but not relevant.\n");
391 *relevant
= vect_used_only_live
;
394 return (*live_p
|| *relevant
);
398 /* Function exist_non_indexing_operands_for_use_p
400 USE is one of the uses attached to STMT_INFO. Check if USE is
401 used in STMT_INFO for anything other than indexing an array. */
404 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
408 /* USE corresponds to some operand in STMT. If there is no data
409 reference in STMT, then any operand that corresponds to USE
410 is not indexing an array. */
411 if (!STMT_VINFO_DATA_REF (stmt_info
))
414 /* STMT has a data_ref. FORNOW this means that its of one of
418 (This should have been verified in analyze_data_refs).
420 'var' in the second case corresponds to a def, not a use,
421 so USE cannot correspond to any operands that are not used
424 Therefore, all we need to check is if STMT falls into the
425 first case, and whether var corresponds to USE. */
427 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
428 if (!assign
|| !gimple_assign_copy_p (assign
))
430 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
431 if (call
&& gimple_call_internal_p (call
))
433 internal_fn ifn
= gimple_call_internal_fn (call
);
434 int mask_index
= internal_fn_mask_index (ifn
);
436 && use
== gimple_call_arg (call
, mask_index
))
438 int stored_value_index
= internal_fn_stored_value_index (ifn
);
439 if (stored_value_index
>= 0
440 && use
== gimple_call_arg (call
, stored_value_index
))
442 if (internal_gather_scatter_fn_p (ifn
)
443 && use
== gimple_call_arg (call
, 1))
449 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
451 operand
= gimple_assign_rhs1 (assign
);
452 if (TREE_CODE (operand
) != SSA_NAME
)
463 Function process_use.
466 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
467 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
468 that defined USE. This is done by calling mark_relevant and passing it
469 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
470 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
474 Generally, LIVE_P and RELEVANT are used to define the liveness and
475 relevance info of the DEF_STMT of this USE:
476 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
477 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
479 - case 1: If USE is used only for address computations (e.g. array indexing),
480 which does not need to be directly vectorized, then the liveness/relevance
481 of the respective DEF_STMT is left unchanged.
482 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
483 we skip DEF_STMT cause it had already been processed.
484 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
485 "relevant" will be modified accordingly.
487 Return true if everything is as expected. Return false otherwise. */
490 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
491 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
494 stmt_vec_info dstmt_vinfo
;
495 enum vect_def_type dt
;
497 /* case 1: we are only interested in uses that need to be vectorized. Uses
498 that are used for address computation are not considered relevant. */
499 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
500 return opt_result::success ();
502 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
503 return opt_result::failure_at (stmt_vinfo
->stmt
,
505 " unsupported use in stmt.\n");
508 return opt_result::success ();
510 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
511 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
513 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
514 We have to force the stmt live since the epilogue loop needs it to
515 continue computing the reduction. */
516 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
517 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
518 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
519 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
520 && bb
->loop_father
== def_bb
->loop_father
)
522 if (dump_enabled_p ())
523 dump_printf_loc (MSG_NOTE
, vect_location
,
524 "reduc-stmt defining reduc-phi in the same nest.\n");
525 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
526 return opt_result::success ();
529 /* case 3a: outer-loop stmt defining an inner-loop stmt:
530 outer-loop-header-bb:
536 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE
, vect_location
,
540 "outer-loop def-stmt defining inner-loop stmt.\n");
544 case vect_unused_in_scope
:
545 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
546 vect_used_in_scope
: vect_unused_in_scope
;
549 case vect_used_in_outer_by_reduction
:
550 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
551 relevant
= vect_used_by_reduction
;
554 case vect_used_in_outer
:
555 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
556 relevant
= vect_used_in_scope
;
559 case vect_used_in_scope
:
567 /* case 3b: inner-loop stmt defining an outer-loop stmt:
568 outer-loop-header-bb:
572 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
574 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
576 if (dump_enabled_p ())
577 dump_printf_loc (MSG_NOTE
, vect_location
,
578 "inner-loop def-stmt defining outer-loop stmt.\n");
582 case vect_unused_in_scope
:
583 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
584 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
585 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
588 case vect_used_by_reduction
:
589 case vect_used_only_live
:
590 relevant
= vect_used_in_outer_by_reduction
;
593 case vect_used_in_scope
:
594 relevant
= vect_used_in_outer
;
601 /* We are also not interested in uses on loop PHI backedges that are
602 inductions. Otherwise we'll needlessly vectorize the IV increment
603 and cause hybrid SLP for SLP inductions. Unless the PHI is live
605 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
606 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
607 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
608 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
609 loop_latch_edge (bb
->loop_father
))
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_NOTE
, vect_location
,
614 "induction value on backedge.\n");
615 return opt_result::success ();
619 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
620 return opt_result::success ();
624 /* Function vect_mark_stmts_to_be_vectorized.
626 Not all stmts in the loop need to be vectorized. For example:
635 Stmt 1 and 3 do not need to be vectorized, because loop control and
636 addressing of vectorized data-refs are handled differently.
638 This pass detects such stmts. */
641 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
643 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
644 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
645 unsigned int nbbs
= loop
->num_nodes
;
646 gimple_stmt_iterator si
;
650 enum vect_relevant relevant
;
652 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
654 auto_vec
<stmt_vec_info
, 64> worklist
;
656 /* 1. Init worklist. */
657 for (i
= 0; i
< nbbs
; i
++)
660 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
662 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
667 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
668 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
670 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
672 if (is_gimple_debug (gsi_stmt (si
)))
674 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
675 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE
, vect_location
,
677 "init: stmt relevant? %G", stmt_info
->stmt
);
679 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
680 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
684 /* 2. Process_worklist */
685 while (worklist
.length () > 0)
690 stmt_vec_info stmt_vinfo
= worklist
.pop ();
691 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE
, vect_location
,
693 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
695 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696 (DEF_STMT) as relevant/irrelevant according to the relevance property
698 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
700 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
701 propagated as is to the DEF_STMTs of its USEs.
703 One exception is when STMT has been identified as defining a reduction
704 variable; in this case we set the relevance to vect_used_by_reduction.
705 This is because we distinguish between two kinds of relevant stmts -
706 those that are used by a reduction computation, and those that are
707 (also) used by a regular computation. This allows us later on to
708 identify stmts that are used solely by a reduction, and therefore the
709 order of the results that they produce does not have to be kept. */
711 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
713 case vect_reduction_def
:
714 gcc_assert (relevant
!= vect_unused_in_scope
);
715 if (relevant
!= vect_unused_in_scope
716 && relevant
!= vect_used_in_scope
717 && relevant
!= vect_used_by_reduction
718 && relevant
!= vect_used_only_live
)
719 return opt_result::failure_at
720 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
723 case vect_nested_cycle
:
724 if (relevant
!= vect_unused_in_scope
725 && relevant
!= vect_used_in_outer_by_reduction
726 && relevant
!= vect_used_in_outer
)
727 return opt_result::failure_at
728 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
731 case vect_double_reduction_def
:
732 if (relevant
!= vect_unused_in_scope
733 && relevant
!= vect_used_by_reduction
734 && relevant
!= vect_used_only_live
)
735 return opt_result::failure_at
736 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
743 if (is_pattern_stmt_p (stmt_vinfo
))
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
750 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
751 tree op
= gimple_assign_rhs1 (assign
);
754 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
757 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
758 loop_vinfo
, relevant
, &worklist
, false);
761 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
762 loop_vinfo
, relevant
, &worklist
, false);
767 for (; i
< gimple_num_ops (assign
); i
++)
769 op
= gimple_op (assign
, i
);
770 if (TREE_CODE (op
) == SSA_NAME
)
773 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
780 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
782 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
784 tree arg
= gimple_call_arg (call
, i
);
786 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
794 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
796 tree op
= USE_FROM_PTR (use_p
);
798 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
806 gather_scatter_info gs_info
;
807 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
810 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
819 } /* while worklist */
821 return opt_result::success ();
824 /* Function vect_model_simple_cost.
826 Models cost for simple operations, i.e. those that only emit ncopies of a
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
831 vect_model_simple_cost (vec_info
*,
832 stmt_vec_info stmt_info
, int ncopies
,
833 enum vect_def_type
*dt
,
836 stmt_vector_for_cost
*cost_vec
,
837 vect_cost_for_stmt kind
= vector_stmt
)
839 int inside_cost
= 0, prologue_cost
= 0;
841 gcc_assert (cost_vec
!= NULL
);
843 /* ??? Somehow we need to fix this at the callers. */
845 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
848 /* Cost the "broadcast" of a scalar operand in to a vector operand.
849 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
851 for (int i
= 0; i
< ndts
; i
++)
852 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
853 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
854 stmt_info
, 0, vect_prologue
);
856 /* Pass the inside-of-loop statements to the target-specific cost model. */
857 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
858 stmt_info
, 0, vect_body
);
860 if (dump_enabled_p ())
861 dump_printf_loc (MSG_NOTE
, vect_location
,
862 "vect_model_simple_cost: inside_cost = %d, "
863 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
867 /* Model cost for type demotion and promotion operations. PWR is
868 normally zero for single-step promotions and demotions. It will be
869 one if two-step promotion/demotion is required, and so on. NCOPIES
870 is the number of vector results (and thus number of instructions)
871 for the narrowest end of the operation chain. Each additional
872 step doubles the number of instructions required. If WIDEN_ARITH
873 is true the stmt is doing widening arithmetic. */
876 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
877 enum vect_def_type
*dt
,
878 unsigned int ncopies
, int pwr
,
879 stmt_vector_for_cost
*cost_vec
,
883 int inside_cost
= 0, prologue_cost
= 0;
885 for (i
= 0; i
< pwr
+ 1; i
++)
887 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
,
889 ? vector_stmt
: vec_promote_demote
,
890 stmt_info
, 0, vect_body
);
894 /* FORNOW: Assuming maximum 2 args per stmts. */
895 for (i
= 0; i
< 2; i
++)
896 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
897 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
898 stmt_info
, 0, vect_prologue
);
900 if (dump_enabled_p ())
901 dump_printf_loc (MSG_NOTE
, vect_location
,
902 "vect_model_promotion_demotion_cost: inside_cost = %d, "
903 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
906 /* Returns true if the current function returns DECL. */
909 cfun_returns (tree decl
)
913 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
915 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
918 if (gimple_return_retval (ret
) == decl
)
920 /* We often end up with an aggregate copy to the result decl,
921 handle that case as well. First skip intermediate clobbers
926 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
928 while (gimple_clobber_p (def
));
929 if (is_a
<gassign
*> (def
)
930 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
931 && gimple_assign_rhs1 (def
) == decl
)
937 /* Function vect_model_store_cost
939 Models cost for stores. In the case of grouped accesses, one access
940 has the overhead of the grouped access attributed to it. */
943 vect_model_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
944 vect_memory_access_type memory_access_type
,
945 dr_alignment_support alignment_support_scheme
,
947 vec_load_store_type vls_type
, slp_tree slp_node
,
948 stmt_vector_for_cost
*cost_vec
)
950 unsigned int inside_cost
= 0, prologue_cost
= 0;
951 stmt_vec_info first_stmt_info
= stmt_info
;
952 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
954 /* ??? Somehow we need to fix this at the callers. */
956 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
958 if (vls_type
== VLS_STORE_INVARIANT
)
961 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
962 stmt_info
, 0, vect_prologue
);
965 /* Grouped stores update all elements in the group at once,
966 so we want the DR for the first statement. */
967 if (!slp_node
&& grouped_access_p
)
968 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
970 /* True if we should include any once-per-group costs as well as
971 the cost of the statement itself. For SLP we only get called
972 once per group anyhow. */
973 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
975 /* We assume that the cost of a single store-lanes instruction is
976 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
977 access is instead being provided by a permute-and-store operation,
978 include the cost of the permutes. */
980 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
982 /* Uses a high and low interleave or shuffle operations for each
984 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
985 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
986 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
987 stmt_info
, 0, vect_body
);
989 if (dump_enabled_p ())
990 dump_printf_loc (MSG_NOTE
, vect_location
,
991 "vect_model_store_cost: strided group_size = %d .\n",
995 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
996 /* Costs of the stores. */
997 if (memory_access_type
== VMAT_ELEMENTWISE
998 || memory_access_type
== VMAT_GATHER_SCATTER
)
1000 /* N scalar stores plus extracting the elements. */
1001 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1002 inside_cost
+= record_stmt_cost (cost_vec
,
1003 ncopies
* assumed_nunits
,
1004 scalar_store
, stmt_info
, 0, vect_body
);
1007 vect_get_store_cost (vinfo
, stmt_info
, ncopies
, alignment_support_scheme
,
1008 misalignment
, &inside_cost
, cost_vec
);
1010 if (memory_access_type
== VMAT_ELEMENTWISE
1011 || memory_access_type
== VMAT_STRIDED_SLP
)
1013 /* N scalar stores plus extracting the elements. */
1014 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1015 inside_cost
+= record_stmt_cost (cost_vec
,
1016 ncopies
* assumed_nunits
,
1017 vec_to_scalar
, stmt_info
, 0, vect_body
);
1020 /* When vectorizing a store into the function result assign
1021 a penalty if the function returns in a multi-register location.
1022 In this case we assume we'll end up with having to spill the
1023 vector result and do piecewise loads as a conservative estimate. */
1024 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
1026 && (TREE_CODE (base
) == RESULT_DECL
1027 || (DECL_P (base
) && cfun_returns (base
)))
1028 && !aggregate_value_p (base
, cfun
->decl
))
1030 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
1031 /* ??? Handle PARALLEL in some way. */
1034 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1035 /* Assume that a single reg-reg move is possible and cheap,
1036 do not account for vector to gp register move cost. */
1040 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1042 stmt_info
, 0, vect_epilogue
);
1044 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1046 stmt_info
, 0, vect_epilogue
);
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_NOTE
, vect_location
,
1053 "vect_model_store_cost: inside_cost = %d, "
1054 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1058 /* Calculate cost of DR's memory access. */
1060 vect_get_store_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1061 dr_alignment_support alignment_support_scheme
,
1063 unsigned int *inside_cost
,
1064 stmt_vector_for_cost
*body_cost_vec
)
1066 switch (alignment_support_scheme
)
1070 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1071 vector_store
, stmt_info
, 0,
1074 if (dump_enabled_p ())
1075 dump_printf_loc (MSG_NOTE
, vect_location
,
1076 "vect_model_store_cost: aligned.\n");
1080 case dr_unaligned_supported
:
1082 /* Here, we assign an additional cost for the unaligned store. */
1083 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1084 unaligned_store
, stmt_info
,
1085 misalignment
, vect_body
);
1086 if (dump_enabled_p ())
1087 dump_printf_loc (MSG_NOTE
, vect_location
,
1088 "vect_model_store_cost: unaligned supported by "
1093 case dr_unaligned_unsupported
:
1095 *inside_cost
= VECT_MAX_COST
;
1097 if (dump_enabled_p ())
1098 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1099 "vect_model_store_cost: unsupported access.\n");
1109 /* Function vect_model_load_cost
1111 Models cost for loads. In the case of grouped accesses, one access has
1112 the overhead of the grouped access attributed to it. Since unaligned
1113 accesses are supported for loads, we also account for the costs of the
1114 access scheme chosen. */
1117 vect_model_load_cost (vec_info
*vinfo
,
1118 stmt_vec_info stmt_info
, unsigned ncopies
, poly_uint64 vf
,
1119 vect_memory_access_type memory_access_type
,
1120 dr_alignment_support alignment_support_scheme
,
1122 gather_scatter_info
*gs_info
,
1124 stmt_vector_for_cost
*cost_vec
)
1126 unsigned int inside_cost
= 0, prologue_cost
= 0;
1127 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1129 gcc_assert (cost_vec
);
1131 /* ??? Somehow we need to fix this at the callers. */
1133 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1135 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1137 /* If the load is permuted then the alignment is determined by
1138 the first group element not by the first scalar stmt DR. */
1139 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1140 /* Record the cost for the permutation. */
1141 unsigned n_perms
, n_loads
;
1142 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
,
1143 vf
, true, &n_perms
, &n_loads
);
1144 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1145 first_stmt_info
, 0, vect_body
);
1147 /* And adjust the number of loads performed. This handles
1148 redundancies as well as loads that are later dead. */
1152 /* Grouped loads read all elements in the group at once,
1153 so we want the DR for the first statement. */
1154 stmt_vec_info first_stmt_info
= stmt_info
;
1155 if (!slp_node
&& grouped_access_p
)
1156 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1158 /* True if we should include any once-per-group costs as well as
1159 the cost of the statement itself. For SLP we only get called
1160 once per group anyhow. */
1161 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1163 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1164 ones we actually need. Account for the cost of unused results. */
1165 if (first_stmt_p
&& !slp_node
&& memory_access_type
== VMAT_LOAD_STORE_LANES
)
1167 unsigned int gaps
= DR_GROUP_SIZE (first_stmt_info
);
1168 stmt_vec_info next_stmt_info
= first_stmt_info
;
1172 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
1174 while (next_stmt_info
);
1177 if (dump_enabled_p ())
1178 dump_printf_loc (MSG_NOTE
, vect_location
,
1179 "vect_model_load_cost: %d unused vectors.\n",
1181 vect_get_load_cost (vinfo
, stmt_info
, ncopies
* gaps
,
1182 alignment_support_scheme
, misalignment
, false,
1183 &inside_cost
, &prologue_cost
,
1184 cost_vec
, cost_vec
, true);
1188 /* We assume that the cost of a single load-lanes instruction is
1189 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1190 access is instead being provided by a load-and-permute operation,
1191 include the cost of the permutes. */
1193 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1195 /* Uses an even and odd extract operations or shuffle operations
1196 for each needed permute. */
1197 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1198 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1199 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1200 stmt_info
, 0, vect_body
);
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE
, vect_location
,
1204 "vect_model_load_cost: strided group_size = %d .\n",
1208 /* The loads themselves. */
1209 if (memory_access_type
== VMAT_ELEMENTWISE
1210 || memory_access_type
== VMAT_GATHER_SCATTER
)
1212 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1213 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1214 if (memory_access_type
== VMAT_GATHER_SCATTER
1215 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
1216 /* For emulated gathers N offset vector element extracts
1217 (we assume the scalar scaling and ptr + offset add is consumed by
1219 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
* assumed_nunits
,
1220 vec_to_scalar
, stmt_info
, 0,
1222 /* N scalar loads plus gathering them into a vector. */
1223 inside_cost
+= record_stmt_cost (cost_vec
,
1224 ncopies
* assumed_nunits
,
1225 scalar_load
, stmt_info
, 0, vect_body
);
1227 else if (memory_access_type
== VMAT_INVARIANT
)
1229 /* Invariant loads will ideally be hoisted and splat to a vector. */
1230 prologue_cost
+= record_stmt_cost (cost_vec
, 1,
1231 scalar_load
, stmt_info
, 0,
1233 prologue_cost
+= record_stmt_cost (cost_vec
, 1,
1234 scalar_to_vec
, stmt_info
, 0,
1238 vect_get_load_cost (vinfo
, stmt_info
, ncopies
,
1239 alignment_support_scheme
, misalignment
, first_stmt_p
,
1240 &inside_cost
, &prologue_cost
,
1241 cost_vec
, cost_vec
, true);
1242 if (memory_access_type
== VMAT_ELEMENTWISE
1243 || memory_access_type
== VMAT_STRIDED_SLP
1244 || (memory_access_type
== VMAT_GATHER_SCATTER
1245 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
))
1246 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1247 stmt_info
, 0, vect_body
);
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE
, vect_location
,
1251 "vect_model_load_cost: inside_cost = %d, "
1252 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1256 /* Calculate cost of DR's memory access. */
1258 vect_get_load_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1259 dr_alignment_support alignment_support_scheme
,
1261 bool add_realign_cost
, unsigned int *inside_cost
,
1262 unsigned int *prologue_cost
,
1263 stmt_vector_for_cost
*prologue_cost_vec
,
1264 stmt_vector_for_cost
*body_cost_vec
,
1265 bool record_prologue_costs
)
1267 switch (alignment_support_scheme
)
1271 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1272 stmt_info
, 0, vect_body
);
1274 if (dump_enabled_p ())
1275 dump_printf_loc (MSG_NOTE
, vect_location
,
1276 "vect_model_load_cost: aligned.\n");
1280 case dr_unaligned_supported
:
1282 /* Here, we assign an additional cost for the unaligned load. */
1283 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1284 unaligned_load
, stmt_info
,
1285 misalignment
, vect_body
);
1287 if (dump_enabled_p ())
1288 dump_printf_loc (MSG_NOTE
, vect_location
,
1289 "vect_model_load_cost: unaligned supported by "
1294 case dr_explicit_realign
:
1296 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1297 vector_load
, stmt_info
, 0, vect_body
);
1298 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1299 vec_perm
, stmt_info
, 0, vect_body
);
1301 /* FIXME: If the misalignment remains fixed across the iterations of
1302 the containing loop, the following cost should be added to the
1304 if (targetm
.vectorize
.builtin_mask_for_load
)
1305 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1306 stmt_info
, 0, vect_body
);
1308 if (dump_enabled_p ())
1309 dump_printf_loc (MSG_NOTE
, vect_location
,
1310 "vect_model_load_cost: explicit realign\n");
1314 case dr_explicit_realign_optimized
:
1316 if (dump_enabled_p ())
1317 dump_printf_loc (MSG_NOTE
, vect_location
,
1318 "vect_model_load_cost: unaligned software "
1321 /* Unaligned software pipeline has a load of an address, an initial
1322 load, and possibly a mask operation to "prime" the loop. However,
1323 if this is an access in a group of loads, which provide grouped
1324 access, then the above cost should only be considered for one
1325 access in the group. Inside the loop, there is a load op
1326 and a realignment op. */
1328 if (add_realign_cost
&& record_prologue_costs
)
1330 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1331 vector_stmt
, stmt_info
,
1333 if (targetm
.vectorize
.builtin_mask_for_load
)
1334 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1335 vector_stmt
, stmt_info
,
1339 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1340 stmt_info
, 0, vect_body
);
1341 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1342 stmt_info
, 0, vect_body
);
1344 if (dump_enabled_p ())
1345 dump_printf_loc (MSG_NOTE
, vect_location
,
1346 "vect_model_load_cost: explicit realign optimized"
1352 case dr_unaligned_unsupported
:
1354 *inside_cost
= VECT_MAX_COST
;
1356 if (dump_enabled_p ())
1357 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1358 "vect_model_load_cost: unsupported access.\n");
1367 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1368 the loop preheader for the vectorized stmt STMT_VINFO. */
1371 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1372 gimple_stmt_iterator
*gsi
)
1375 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1377 vinfo
->insert_on_entry (stmt_vinfo
, new_stmt
);
1379 if (dump_enabled_p ())
1380 dump_printf_loc (MSG_NOTE
, vect_location
,
1381 "created new init_stmt: %G", new_stmt
);
1384 /* Function vect_init_vector.
1386 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1387 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1388 vector type a vector with all elements equal to VAL is created first.
1389 Place the initialization at GSI if it is not NULL. Otherwise, place the
1390 initialization at the loop preheader.
1391 Return the DEF of INIT_STMT.
1392 It will be used in the vectorization of STMT_INFO. */
1395 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1396 gimple_stmt_iterator
*gsi
)
1401 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1402 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1404 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1405 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1407 /* Scalar boolean value should be transformed into
1408 all zeros or all ones value before building a vector. */
1409 if (VECTOR_BOOLEAN_TYPE_P (type
))
1411 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1412 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1414 if (CONSTANT_CLASS_P (val
))
1415 val
= integer_zerop (val
) ? false_val
: true_val
;
1418 new_temp
= make_ssa_name (TREE_TYPE (type
));
1419 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1420 val
, true_val
, false_val
);
1421 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1427 gimple_seq stmts
= NULL
;
1428 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1429 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1430 TREE_TYPE (type
), val
);
1432 /* ??? Condition vectorization expects us to do
1433 promotion of invariant/external defs. */
1434 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1435 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1436 !gsi_end_p (gsi2
); )
1438 init_stmt
= gsi_stmt (gsi2
);
1439 gsi_remove (&gsi2
, false);
1440 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1444 val
= build_vector_from_val (type
, val
);
1447 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1448 init_stmt
= gimple_build_assign (new_temp
, val
);
1449 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1454 /* Function vect_get_vec_defs_for_operand.
1456 OP is an operand in STMT_VINFO. This function returns a vector of
1457 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1459 In the case that OP is an SSA_NAME which is defined in the loop, then
1460 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1462 In case OP is an invariant or constant, a new stmt that creates a vector def
1463 needs to be introduced. VECTYPE may be used to specify a required type for
1464 vector invariant. */
1467 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1469 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1472 enum vect_def_type dt
;
1474 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1476 if (dump_enabled_p ())
1477 dump_printf_loc (MSG_NOTE
, vect_location
,
1478 "vect_get_vec_defs_for_operand: %T\n", op
);
1480 stmt_vec_info def_stmt_info
;
1481 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1482 &def_stmt_info
, &def_stmt
);
1483 gcc_assert (is_simple_use
);
1484 if (def_stmt
&& dump_enabled_p ())
1485 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1487 vec_oprnds
->create (ncopies
);
1488 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1490 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1494 vector_type
= vectype
;
1495 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1496 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1497 vector_type
= truth_type_for (stmt_vectype
);
1499 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1501 gcc_assert (vector_type
);
1502 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1504 vec_oprnds
->quick_push (vop
);
1508 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1509 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1510 for (unsigned i
= 0; i
< ncopies
; ++i
)
1511 vec_oprnds
->quick_push (gimple_get_lhs
1512 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1517 /* Get vectorized definitions for OP0 and OP1. */
1520 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1522 tree op0
, vec
<tree
> *vec_oprnds0
, tree vectype0
,
1523 tree op1
, vec
<tree
> *vec_oprnds1
, tree vectype1
,
1524 tree op2
, vec
<tree
> *vec_oprnds2
, tree vectype2
,
1525 tree op3
, vec
<tree
> *vec_oprnds3
, tree vectype3
)
1530 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1532 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1534 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1536 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1541 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1542 op0
, vec_oprnds0
, vectype0
);
1544 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1545 op1
, vec_oprnds1
, vectype1
);
1547 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1548 op2
, vec_oprnds2
, vectype2
);
1550 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1551 op3
, vec_oprnds3
, vectype3
);
1556 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1558 tree op0
, vec
<tree
> *vec_oprnds0
,
1559 tree op1
, vec
<tree
> *vec_oprnds1
,
1560 tree op2
, vec
<tree
> *vec_oprnds2
,
1561 tree op3
, vec
<tree
> *vec_oprnds3
)
1563 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1564 op0
, vec_oprnds0
, NULL_TREE
,
1565 op1
, vec_oprnds1
, NULL_TREE
,
1566 op2
, vec_oprnds2
, NULL_TREE
,
1567 op3
, vec_oprnds3
, NULL_TREE
);
1570 /* Helper function called by vect_finish_replace_stmt and
1571 vect_finish_stmt_generation. Set the location of the new
1572 statement and create and return a stmt_vec_info for it. */
1575 vect_finish_stmt_generation_1 (vec_info
*,
1576 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1578 if (dump_enabled_p ())
1579 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1583 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1585 /* While EH edges will generally prevent vectorization, stmt might
1586 e.g. be in a must-not-throw region. Ensure newly created stmts
1587 that could throw are part of the same region. */
1588 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1589 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1590 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1593 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1596 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1597 which sets the same scalar result as STMT_INFO did. Create and return a
1598 stmt_vec_info for VEC_STMT. */
1601 vect_finish_replace_stmt (vec_info
*vinfo
,
1602 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1604 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1605 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1607 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1608 gsi_replace (&gsi
, vec_stmt
, true);
1610 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1613 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1614 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1617 vect_finish_stmt_generation (vec_info
*vinfo
,
1618 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1619 gimple_stmt_iterator
*gsi
)
1621 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1623 if (!gsi_end_p (*gsi
)
1624 && gimple_has_mem_ops (vec_stmt
))
1626 gimple
*at_stmt
= gsi_stmt (*gsi
);
1627 tree vuse
= gimple_vuse (at_stmt
);
1628 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1630 tree vdef
= gimple_vdef (at_stmt
);
1631 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1632 gimple_set_modified (vec_stmt
, true);
1633 /* If we have an SSA vuse and insert a store, update virtual
1634 SSA form to avoid triggering the renamer. Do so only
1635 if we can easily see all uses - which is what almost always
1636 happens with the way vectorized stmts are inserted. */
1637 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1638 && ((is_gimple_assign (vec_stmt
)
1639 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1640 || (is_gimple_call (vec_stmt
)
1641 && (!(gimple_call_flags (vec_stmt
)
1642 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
))
1643 || (gimple_call_lhs (vec_stmt
)
1644 && !is_gimple_reg (gimple_call_lhs (vec_stmt
)))))))
1646 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1647 gimple_set_vdef (vec_stmt
, new_vdef
);
1648 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1652 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1653 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1656 /* We want to vectorize a call to combined function CFN with function
1657 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1658 as the types of all inputs. Check whether this is possible using
1659 an internal function, returning its code if so or IFN_LAST if not. */
1662 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1663 tree vectype_out
, tree vectype_in
)
1666 if (internal_fn_p (cfn
))
1667 ifn
= as_internal_fn (cfn
);
1669 ifn
= associated_internal_fn (fndecl
);
1670 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1672 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1673 if (info
.vectorizable
)
1675 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1676 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1677 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1678 OPTIMIZE_FOR_SPEED
))
1686 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1687 gimple_stmt_iterator
*);
1689 /* Check whether a load or store statement in the loop described by
1690 LOOP_VINFO is possible in a loop using partial vectors. This is
1691 testing whether the vectorizer pass has the appropriate support,
1692 as well as whether the target does.
1694 VLS_TYPE says whether the statement is a load or store and VECTYPE
1695 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1696 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1697 says how the load or store is going to be implemented and GROUP_SIZE
1698 is the number of load or store statements in the containing group.
1699 If the access is a gather load or scatter store, GS_INFO describes
1700 its arguments. If the load or store is conditional, SCALAR_MASK is the
1701 condition under which it occurs.
1703 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1704 vectors is not supported, otherwise record the required rgroup control
1708 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo
, tree vectype
,
1710 vec_load_store_type vls_type
,
1712 vect_memory_access_type
1714 gather_scatter_info
*gs_info
,
1717 /* Invariant loads need no special support. */
1718 if (memory_access_type
== VMAT_INVARIANT
)
1721 unsigned int nvectors
;
1723 nvectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1725 nvectors
= vect_get_num_copies (loop_vinfo
, vectype
);
1727 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1728 machine_mode vecmode
= TYPE_MODE (vectype
);
1729 bool is_load
= (vls_type
== VLS_LOAD
);
1730 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1733 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1734 : !vect_store_lanes_supported (vectype
, group_size
, true))
1736 if (dump_enabled_p ())
1737 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1738 "can't operate on partial vectors because"
1739 " the target doesn't have an appropriate"
1740 " load/store-lanes instruction.\n");
1741 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1744 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1749 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1751 internal_fn ifn
= (is_load
1752 ? IFN_MASK_GATHER_LOAD
1753 : IFN_MASK_SCATTER_STORE
);
1754 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1755 gs_info
->memory_type
,
1756 gs_info
->offset_vectype
,
1759 if (dump_enabled_p ())
1760 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1761 "can't operate on partial vectors because"
1762 " the target doesn't have an appropriate"
1763 " gather load or scatter store instruction.\n");
1764 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1767 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1772 if (memory_access_type
!= VMAT_CONTIGUOUS
1773 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1775 /* Element X of the data must come from iteration i * VF + X of the
1776 scalar loop. We need more work to support other mappings. */
1777 if (dump_enabled_p ())
1778 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1779 "can't operate on partial vectors because an"
1780 " access isn't contiguous.\n");
1781 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1785 if (!VECTOR_MODE_P (vecmode
))
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1789 "can't operate on partial vectors when emulating"
1790 " vector operations.\n");
1791 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1795 /* We might load more scalars than we need for permuting SLP loads.
1796 We checked in get_group_load_store_type that the extra elements
1797 don't leak into a new vector. */
1798 auto group_memory_nvectors
= [](poly_uint64 size
, poly_uint64 nunits
)
1800 unsigned int nvectors
;
1801 if (can_div_away_from_zero_p (size
, nunits
, &nvectors
))
1806 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1807 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1808 machine_mode mask_mode
;
1809 bool using_partial_vectors_p
= false;
1810 if (targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1811 && can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1813 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1814 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1815 using_partial_vectors_p
= true;
1819 if (get_len_load_store_mode (vecmode
, is_load
).exists (&vmode
))
1821 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1822 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
1823 unsigned factor
= (vecmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vecmode
);
1824 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, factor
);
1825 using_partial_vectors_p
= true;
1828 if (!using_partial_vectors_p
)
1830 if (dump_enabled_p ())
1831 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1832 "can't operate on partial vectors because the"
1833 " target doesn't have the appropriate partial"
1834 " vectorization load or store.\n");
1835 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1839 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1840 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1841 that needs to be applied to all loads and stores in a vectorized loop.
1842 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1843 otherwise return VEC_MASK & LOOP_MASK.
1845 MASK_TYPE is the type of both masks. If new statements are needed,
1846 insert them before GSI. */
1849 prepare_vec_mask (loop_vec_info loop_vinfo
, tree mask_type
, tree loop_mask
,
1850 tree vec_mask
, gimple_stmt_iterator
*gsi
)
1852 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1856 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1858 if (loop_vinfo
->vec_cond_masked_set
.contains ({ vec_mask
, loop_mask
}))
1861 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1862 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1863 vec_mask
, loop_mask
);
1865 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1869 /* Determine whether we can use a gather load or scatter store to vectorize
1870 strided load or store STMT_INFO by truncating the current offset to a
1871 smaller width. We need to be able to construct an offset vector:
1873 { 0, X, X*2, X*3, ... }
1875 without loss of precision, where X is STMT_INFO's DR_STEP.
1877 Return true if this is possible, describing the gather load or scatter
1878 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1881 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1882 loop_vec_info loop_vinfo
, bool masked_p
,
1883 gather_scatter_info
*gs_info
)
1885 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1886 data_reference
*dr
= dr_info
->dr
;
1887 tree step
= DR_STEP (dr
);
1888 if (TREE_CODE (step
) != INTEGER_CST
)
1890 /* ??? Perhaps we could use range information here? */
1891 if (dump_enabled_p ())
1892 dump_printf_loc (MSG_NOTE
, vect_location
,
1893 "cannot truncate variable step.\n");
1897 /* Get the number of bits in an element. */
1898 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1899 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1900 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1902 /* Set COUNT to the upper limit on the number of elements - 1.
1903 Start with the maximum vectorization factor. */
1904 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1906 /* Try lowering COUNT to the number of scalar latch iterations. */
1907 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1908 widest_int max_iters
;
1909 if (max_loop_iterations (loop
, &max_iters
)
1910 && max_iters
< count
)
1911 count
= max_iters
.to_shwi ();
1913 /* Try scales of 1 and the element size. */
1914 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1915 wi::overflow_type overflow
= wi::OVF_NONE
;
1916 for (int i
= 0; i
< 2; ++i
)
1918 int scale
= scales
[i
];
1920 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1923 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1924 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1927 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1928 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1930 /* Find the narrowest viable offset type. */
1931 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1932 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1935 /* See whether the target supports the operation with an offset
1936 no narrower than OFFSET_TYPE. */
1937 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1938 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1939 vectype
, memory_type
, offset_type
, scale
,
1940 &gs_info
->ifn
, &gs_info
->offset_vectype
)
1941 || gs_info
->ifn
== IFN_LAST
)
1944 gs_info
->decl
= NULL_TREE
;
1945 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1946 but we don't need to store that here. */
1947 gs_info
->base
= NULL_TREE
;
1948 gs_info
->element_type
= TREE_TYPE (vectype
);
1949 gs_info
->offset
= fold_convert (offset_type
, step
);
1950 gs_info
->offset_dt
= vect_constant_def
;
1951 gs_info
->scale
= scale
;
1952 gs_info
->memory_type
= memory_type
;
1956 if (overflow
&& dump_enabled_p ())
1957 dump_printf_loc (MSG_NOTE
, vect_location
,
1958 "truncating gather/scatter offset to %d bits"
1959 " might change its value.\n", element_bits
);
1964 /* Return true if we can use gather/scatter internal functions to
1965 vectorize STMT_INFO, which is a grouped or strided load or store.
1966 MASKED_P is true if load or store is conditional. When returning
1967 true, fill in GS_INFO with the information required to perform the
1971 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1972 loop_vec_info loop_vinfo
, bool masked_p
,
1973 gather_scatter_info
*gs_info
)
1975 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1976 || gs_info
->ifn
== IFN_LAST
)
1977 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1980 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1981 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1983 gcc_assert (TYPE_PRECISION (new_offset_type
)
1984 >= TYPE_PRECISION (old_offset_type
));
1985 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1987 if (dump_enabled_p ())
1988 dump_printf_loc (MSG_NOTE
, vect_location
,
1989 "using gather/scatter for strided/grouped access,"
1990 " scale = %d\n", gs_info
->scale
);
1995 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1996 elements with a known constant step. Return -1 if that step
1997 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2000 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
2002 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2003 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
2007 /* If the target supports a permute mask that reverses the elements in
2008 a vector of type VECTYPE, return that mask, otherwise return null. */
2011 perm_mask_for_reverse (tree vectype
)
2013 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2015 /* The encoding has a single stepped pattern. */
2016 vec_perm_builder
sel (nunits
, 1, 3);
2017 for (int i
= 0; i
< 3; ++i
)
2018 sel
.quick_push (nunits
- 1 - i
);
2020 vec_perm_indices
indices (sel
, 1, nunits
);
2021 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), TYPE_MODE (vectype
),
2024 return vect_gen_perm_mask_checked (vectype
, indices
);
2027 /* A subroutine of get_load_store_type, with a subset of the same
2028 arguments. Handle the case where STMT_INFO is a load or store that
2029 accesses consecutive elements with a negative step. Sets *POFFSET
2030 to the offset to be applied to the DR for the first access. */
2032 static vect_memory_access_type
2033 get_negative_load_store_type (vec_info
*vinfo
,
2034 stmt_vec_info stmt_info
, tree vectype
,
2035 vec_load_store_type vls_type
,
2036 unsigned int ncopies
, poly_int64
*poffset
)
2038 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2039 dr_alignment_support alignment_support_scheme
;
2043 if (dump_enabled_p ())
2044 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2045 "multiple types with negative step.\n");
2046 return VMAT_ELEMENTWISE
;
2049 /* For backward running DRs the first access in vectype actually is
2050 N-1 elements before the address of the DR. */
2051 *poffset
= ((-TYPE_VECTOR_SUBPARTS (vectype
) + 1)
2052 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype
))));
2054 int misalignment
= dr_misalignment (dr_info
, vectype
, *poffset
);
2055 alignment_support_scheme
2056 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
, misalignment
);
2057 if (alignment_support_scheme
!= dr_aligned
2058 && alignment_support_scheme
!= dr_unaligned_supported
)
2060 if (dump_enabled_p ())
2061 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2062 "negative step but alignment required.\n");
2064 return VMAT_ELEMENTWISE
;
2067 if (vls_type
== VLS_STORE_INVARIANT
)
2069 if (dump_enabled_p ())
2070 dump_printf_loc (MSG_NOTE
, vect_location
,
2071 "negative step with invariant source;"
2072 " no permute needed.\n");
2073 return VMAT_CONTIGUOUS_DOWN
;
2076 if (!perm_mask_for_reverse (vectype
))
2078 if (dump_enabled_p ())
2079 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2080 "negative step and reversing not supported.\n");
2082 return VMAT_ELEMENTWISE
;
2085 return VMAT_CONTIGUOUS_REVERSE
;
2088 /* STMT_INFO is either a masked or unconditional store. Return the value
2092 vect_get_store_rhs (stmt_vec_info stmt_info
)
2094 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2096 gcc_assert (gimple_assign_single_p (assign
));
2097 return gimple_assign_rhs1 (assign
);
2099 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2101 internal_fn ifn
= gimple_call_internal_fn (call
);
2102 int index
= internal_fn_stored_value_index (ifn
);
2103 gcc_assert (index
>= 0);
2104 return gimple_call_arg (call
, index
);
2109 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2111 This function returns a vector type which can be composed with NETLS pieces,
2112 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2113 same vector size as the return vector. It checks target whether supports
2114 pieces-size vector mode for construction firstly, if target fails to, check
2115 pieces-size scalar mode for construction further. It returns NULL_TREE if
2116 fails to find the available composition.
2118 For example, for (vtype=V16QI, nelts=4), we can probably get:
2119 - V16QI with PTYPE V4QI.
2120 - V4SI with PTYPE SI.
2124 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
2126 gcc_assert (VECTOR_TYPE_P (vtype
));
2127 gcc_assert (known_gt (nelts
, 0U));
2129 machine_mode vmode
= TYPE_MODE (vtype
);
2130 if (!VECTOR_MODE_P (vmode
))
2133 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
2134 unsigned int pbsize
;
2135 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
2137 /* First check if vec_init optab supports construction from
2138 vector pieces directly. */
2139 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
2140 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
2142 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
2143 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
2144 != CODE_FOR_nothing
))
2146 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
2150 /* Otherwise check if exists an integer type of the same piece size and
2151 if vec_init optab supports construction from it directly. */
2152 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
2153 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
2154 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
2155 != CODE_FOR_nothing
))
2157 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
2158 return build_vector_type (*ptype
, nelts
);
2165 /* A subroutine of get_load_store_type, with a subset of the same
2166 arguments. Handle the case where STMT_INFO is part of a grouped load
2169 For stores, the statements in the group are all consecutive
2170 and there is no gap at the end. For loads, the statements in the
2171 group might not be consecutive; there can be gaps between statements
2172 as well as at the end. */
2175 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2176 tree vectype
, slp_tree slp_node
,
2177 bool masked_p
, vec_load_store_type vls_type
,
2178 vect_memory_access_type
*memory_access_type
,
2179 poly_int64
*poffset
,
2180 dr_alignment_support
*alignment_support_scheme
,
2182 gather_scatter_info
*gs_info
)
2184 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2185 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2186 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2187 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2188 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2189 bool single_element_p
= (stmt_info
== first_stmt_info
2190 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2191 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2192 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2194 /* True if the vectorized statements would access beyond the last
2195 statement in the group. */
2196 bool overrun_p
= false;
2198 /* True if we can cope with such overrun by peeling for gaps, so that
2199 there is at least one final scalar iteration after the vector loop. */
2200 bool can_overrun_p
= (!masked_p
2201 && vls_type
== VLS_LOAD
2205 /* There can only be a gap at the end of the group if the stride is
2206 known at compile time. */
2207 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2209 /* Stores can't yet have gaps. */
2210 gcc_assert (slp_node
|| vls_type
== VLS_LOAD
|| gap
== 0);
2214 /* For SLP vectorization we directly vectorize a subchain
2215 without permutation. */
2216 if (! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
2218 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node
)[0]);
2219 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2221 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2222 separated by the stride, until we have a complete vector.
2223 Fall back to scalar accesses if that isn't possible. */
2224 if (multiple_p (nunits
, group_size
))
2225 *memory_access_type
= VMAT_STRIDED_SLP
;
2227 *memory_access_type
= VMAT_ELEMENTWISE
;
2231 overrun_p
= loop_vinfo
&& gap
!= 0;
2232 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2234 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2235 "Grouped store with gaps requires"
2236 " non-consecutive accesses\n");
2239 /* An overrun is fine if the trailing elements are smaller
2240 than the alignment boundary B. Every vector access will
2241 be a multiple of B and so we are guaranteed to access a
2242 non-gap element in the same B-sized block. */
2244 && gap
< (vect_known_alignment_in_bytes (first_dr_info
,
2246 / vect_get_scalar_dr_size (first_dr_info
)))
2249 /* If the gap splits the vector in half and the target
2250 can do half-vector operations avoid the epilogue peeling
2251 by simply loading half of the vector only. Usually
2252 the construction with an upper zero half will be elided. */
2253 dr_alignment_support alss
;
2254 int misalign
= dr_misalignment (first_dr_info
, vectype
);
2258 && (((alss
= vect_supportable_dr_alignment (vinfo
, first_dr_info
,
2259 vectype
, misalign
)))
2261 || alss
== dr_unaligned_supported
)
2262 && known_eq (nunits
, (group_size
- gap
) * 2)
2263 && known_eq (nunits
, group_size
)
2264 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2268 if (overrun_p
&& !can_overrun_p
)
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2272 "Peeling for outer loop is not supported\n");
2275 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2278 if (single_element_p
)
2279 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2280 only correct for single element "interleaving" SLP. */
2281 *memory_access_type
= get_negative_load_store_type
2282 (vinfo
, stmt_info
, vectype
, vls_type
, 1, poffset
);
2285 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2286 separated by the stride, until we have a complete vector.
2287 Fall back to scalar accesses if that isn't possible. */
2288 if (multiple_p (nunits
, group_size
))
2289 *memory_access_type
= VMAT_STRIDED_SLP
;
2291 *memory_access_type
= VMAT_ELEMENTWISE
;
2296 gcc_assert (!loop_vinfo
|| cmp
> 0);
2297 *memory_access_type
= VMAT_CONTIGUOUS
;
2300 /* When we have a contiguous access across loop iterations
2301 but the access in the loop doesn't cover the full vector
2302 we can end up with no gap recorded but still excess
2303 elements accessed, see PR103116. Make sure we peel for
2304 gaps if necessary and sufficient and give up if not. */
2306 && *memory_access_type
== VMAT_CONTIGUOUS
2307 && SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
2308 && !multiple_p (group_size
* LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
2311 unsigned HOST_WIDE_INT cnunits
, cvf
;
2313 || !nunits
.is_constant (&cnunits
)
2314 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&cvf
)
2315 /* Peeling for gaps assumes that a single scalar iteration
2316 is enough to make sure the last vector iteration doesn't
2317 access excess elements.
2318 ??? Enhancements include peeling multiple iterations
2319 or using masked loads with a static mask. */
2320 || (group_size
* cvf
) % cnunits
+ group_size
< cnunits
)
2322 if (dump_enabled_p ())
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2324 "peeling for gaps insufficient for "
2334 /* We can always handle this case using elementwise accesses,
2335 but see if something more efficient is available. */
2336 *memory_access_type
= VMAT_ELEMENTWISE
;
2338 /* If there is a gap at the end of the group then these optimizations
2339 would access excess elements in the last iteration. */
2340 bool would_overrun_p
= (gap
!= 0);
2341 /* An overrun is fine if the trailing elements are smaller than the
2342 alignment boundary B. Every vector access will be a multiple of B
2343 and so we are guaranteed to access a non-gap element in the
2344 same B-sized block. */
2347 && gap
< (vect_known_alignment_in_bytes (first_dr_info
, vectype
)
2348 / vect_get_scalar_dr_size (first_dr_info
)))
2349 would_overrun_p
= false;
2351 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2352 && (can_overrun_p
|| !would_overrun_p
)
2353 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2355 /* First cope with the degenerate case of a single-element
2357 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2360 /* Otherwise try using LOAD/STORE_LANES. */
2361 else if (vls_type
== VLS_LOAD
2362 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2363 : vect_store_lanes_supported (vectype
, group_size
,
2366 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2367 overrun_p
= would_overrun_p
;
2370 /* If that fails, try using permuting loads. */
2371 else if (vls_type
== VLS_LOAD
2372 ? vect_grouped_load_supported (vectype
, single_element_p
,
2374 : vect_grouped_store_supported (vectype
, group_size
))
2376 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2377 overrun_p
= would_overrun_p
;
2381 /* As a last resort, trying using a gather load or scatter store.
2383 ??? Although the code can handle all group sizes correctly,
2384 it probably isn't a win to use separate strided accesses based
2385 on nearby locations. Or, even if it's a win over scalar code,
2386 it might not be a win over vectorizing at a lower VF, if that
2387 allows us to use contiguous accesses. */
2388 if (*memory_access_type
== VMAT_ELEMENTWISE
2391 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2393 *memory_access_type
= VMAT_GATHER_SCATTER
;
2396 if (*memory_access_type
== VMAT_GATHER_SCATTER
2397 || *memory_access_type
== VMAT_ELEMENTWISE
)
2399 *alignment_support_scheme
= dr_unaligned_supported
;
2400 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2404 *misalignment
= dr_misalignment (first_dr_info
, vectype
, *poffset
);
2405 *alignment_support_scheme
2406 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, vectype
,
2410 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2412 /* STMT is the leader of the group. Check the operands of all the
2413 stmts of the group. */
2414 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2415 while (next_stmt_info
)
2417 tree op
= vect_get_store_rhs (next_stmt_info
);
2418 enum vect_def_type dt
;
2419 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2421 if (dump_enabled_p ())
2422 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2423 "use not simple.\n");
2426 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2432 gcc_assert (can_overrun_p
);
2433 if (dump_enabled_p ())
2434 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2435 "Data access with gaps requires scalar "
2437 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2443 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2444 if there is a memory access type that the vectorized form can use,
2445 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2446 or scatters, fill in GS_INFO accordingly. In addition
2447 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2448 the target does not support the alignment scheme. *MISALIGNMENT
2449 is set according to the alignment of the access (including
2450 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2452 SLP says whether we're performing SLP rather than loop vectorization.
2453 MASKED_P is true if the statement is conditional on a vectorized mask.
2454 VECTYPE is the vector type that the vectorized statements will use.
2455 NCOPIES is the number of vector statements that will be needed. */
2458 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2459 tree vectype
, slp_tree slp_node
,
2460 bool masked_p
, vec_load_store_type vls_type
,
2461 unsigned int ncopies
,
2462 vect_memory_access_type
*memory_access_type
,
2463 poly_int64
*poffset
,
2464 dr_alignment_support
*alignment_support_scheme
,
2466 gather_scatter_info
*gs_info
)
2468 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2469 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2470 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2472 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2474 *memory_access_type
= VMAT_GATHER_SCATTER
;
2475 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2477 /* When using internal functions, we rely on pattern recognition
2478 to convert the type of the offset to the type that the target
2479 requires, with the result being a call to an internal function.
2480 If that failed for some reason (e.g. because another pattern
2481 took priority), just handle cases in which the offset already
2482 has the right type. */
2483 else if (gs_info
->ifn
!= IFN_LAST
2484 && !is_gimple_call (stmt_info
->stmt
)
2485 && !tree_nop_conversion_p (TREE_TYPE (gs_info
->offset
),
2486 TREE_TYPE (gs_info
->offset_vectype
)))
2488 if (dump_enabled_p ())
2489 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2490 "%s offset requires a conversion\n",
2491 vls_type
== VLS_LOAD
? "gather" : "scatter");
2494 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2495 &gs_info
->offset_dt
,
2496 &gs_info
->offset_vectype
))
2498 if (dump_enabled_p ())
2499 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2500 "%s index use not simple.\n",
2501 vls_type
== VLS_LOAD
? "gather" : "scatter");
2504 else if (gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
2506 if (vls_type
!= VLS_LOAD
)
2508 if (dump_enabled_p ())
2509 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2510 "unsupported emulated scatter.\n");
2513 else if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant ()
2514 || !TYPE_VECTOR_SUBPARTS
2515 (gs_info
->offset_vectype
).is_constant ()
2516 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2517 (gs_info
->offset_vectype
),
2518 TYPE_VECTOR_SUBPARTS (vectype
)))
2520 if (dump_enabled_p ())
2521 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2522 "unsupported vector types for emulated "
2527 /* Gather-scatter accesses perform only component accesses, alignment
2528 is irrelevant for them. */
2529 *alignment_support_scheme
= dr_unaligned_supported
;
2531 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2533 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
,
2535 vls_type
, memory_access_type
, poffset
,
2536 alignment_support_scheme
,
2537 misalignment
, gs_info
))
2540 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2542 gcc_assert (!slp_node
);
2544 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2546 *memory_access_type
= VMAT_GATHER_SCATTER
;
2548 *memory_access_type
= VMAT_ELEMENTWISE
;
2549 /* Alignment is irrelevant here. */
2550 *alignment_support_scheme
= dr_unaligned_supported
;
2554 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2557 gcc_assert (vls_type
== VLS_LOAD
);
2558 *memory_access_type
= VMAT_INVARIANT
;
2559 /* Invariant accesses perform only component accesses, alignment
2560 is irrelevant for them. */
2561 *alignment_support_scheme
= dr_unaligned_supported
;
2566 *memory_access_type
= get_negative_load_store_type
2567 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
, poffset
);
2569 *memory_access_type
= VMAT_CONTIGUOUS
;
2570 *misalignment
= dr_misalignment (STMT_VINFO_DR_INFO (stmt_info
),
2572 *alignment_support_scheme
2573 = vect_supportable_dr_alignment (vinfo
,
2574 STMT_VINFO_DR_INFO (stmt_info
),
2575 vectype
, *misalignment
);
2579 if ((*memory_access_type
== VMAT_ELEMENTWISE
2580 || *memory_access_type
== VMAT_STRIDED_SLP
)
2581 && !nunits
.is_constant ())
2583 if (dump_enabled_p ())
2584 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2585 "Not using elementwise accesses due to variable "
2586 "vectorization factor.\n");
2590 if (*alignment_support_scheme
== dr_unaligned_unsupported
)
2592 if (dump_enabled_p ())
2593 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2594 "unsupported unaligned access\n");
2598 /* FIXME: At the moment the cost model seems to underestimate the
2599 cost of using elementwise accesses. This check preserves the
2600 traditional behavior until that can be fixed. */
2601 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2602 if (!first_stmt_info
)
2603 first_stmt_info
= stmt_info
;
2604 if (*memory_access_type
== VMAT_ELEMENTWISE
2605 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2606 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2607 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2608 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2610 if (dump_enabled_p ())
2611 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2612 "not falling back to elementwise accesses\n");
2618 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2619 conditional operation STMT_INFO. When returning true, store the mask
2620 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2621 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2622 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2625 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2626 slp_tree slp_node
, unsigned mask_index
,
2627 tree
*mask
, slp_tree
*mask_node
,
2628 vect_def_type
*mask_dt_out
, tree
*mask_vectype_out
)
2630 enum vect_def_type mask_dt
;
2632 slp_tree mask_node_1
;
2633 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, mask_index
,
2634 mask
, &mask_node_1
, &mask_dt
, &mask_vectype
))
2636 if (dump_enabled_p ())
2637 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2638 "mask use not simple.\n");
2642 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask
)))
2644 if (dump_enabled_p ())
2645 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2646 "mask argument is not a boolean.\n");
2650 /* If the caller is not prepared for adjusting an external/constant
2651 SLP mask vector type fail. */
2654 && SLP_TREE_DEF_TYPE (mask_node_1
) != vect_internal_def
)
2656 if (dump_enabled_p ())
2657 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2658 "SLP mask argument is not vectorized.\n");
2662 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2664 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2666 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2668 if (dump_enabled_p ())
2669 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2670 "could not find an appropriate vector mask type.\n");
2674 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2675 TYPE_VECTOR_SUBPARTS (vectype
)))
2677 if (dump_enabled_p ())
2678 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2679 "vector mask type %T"
2680 " does not match vector data type %T.\n",
2681 mask_vectype
, vectype
);
2686 *mask_dt_out
= mask_dt
;
2687 *mask_vectype_out
= mask_vectype
;
2689 *mask_node
= mask_node_1
;
2693 /* Return true if stored value RHS is suitable for vectorizing store
2694 statement STMT_INFO. When returning true, store the type of the
2695 definition in *RHS_DT_OUT, the type of the vectorized store value in
2696 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2699 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2700 slp_tree slp_node
, tree rhs
,
2701 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2702 vec_load_store_type
*vls_type_out
)
2704 /* In the case this is a store from a constant make sure
2705 native_encode_expr can handle it. */
2706 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2708 if (dump_enabled_p ())
2709 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2710 "cannot encode constant as a byte sequence.\n");
2715 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2717 if (gimple_call_internal_p (call
)
2718 && internal_store_fn_p (gimple_call_internal_fn (call
)))
2719 op_no
= internal_fn_stored_value_index (gimple_call_internal_fn (call
));
2722 enum vect_def_type rhs_dt
;
2725 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, op_no
,
2726 &rhs
, &slp_op
, &rhs_dt
, &rhs_vectype
))
2728 if (dump_enabled_p ())
2729 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2730 "use not simple.\n");
2734 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2735 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2737 if (dump_enabled_p ())
2738 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2739 "incompatible vector types.\n");
2743 *rhs_dt_out
= rhs_dt
;
2744 *rhs_vectype_out
= rhs_vectype
;
2745 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2746 *vls_type_out
= VLS_STORE_INVARIANT
;
2748 *vls_type_out
= VLS_STORE
;
2752 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2753 Note that we support masks with floating-point type, in which case the
2754 floats are interpreted as a bitmask. */
2757 vect_build_all_ones_mask (vec_info
*vinfo
,
2758 stmt_vec_info stmt_info
, tree masktype
)
2760 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2761 return build_int_cst (masktype
, -1);
2762 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2764 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2765 mask
= build_vector_from_val (masktype
, mask
);
2766 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2768 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2772 for (int j
= 0; j
< 6; ++j
)
2774 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2775 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2776 mask
= build_vector_from_val (masktype
, mask
);
2777 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2782 /* Build an all-zero merge value of type VECTYPE while vectorizing
2783 STMT_INFO as a gather load. */
2786 vect_build_zero_merge_argument (vec_info
*vinfo
,
2787 stmt_vec_info stmt_info
, tree vectype
)
2790 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2791 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2792 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2796 for (int j
= 0; j
< 6; ++j
)
2798 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2799 merge
= build_real (TREE_TYPE (vectype
), r
);
2803 merge
= build_vector_from_val (vectype
, merge
);
2804 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2807 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2808 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2809 the gather load operation. If the load is conditional, MASK is the
2810 unvectorized condition and MASK_DT is its definition type, otherwise
2814 vect_build_gather_load_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2815 gimple_stmt_iterator
*gsi
,
2817 gather_scatter_info
*gs_info
,
2820 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2821 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2822 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2823 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2824 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2825 edge pe
= loop_preheader_edge (loop
);
2826 enum { NARROW
, NONE
, WIDEN
} modifier
;
2827 poly_uint64 gather_off_nunits
2828 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2830 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2831 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2832 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2833 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2834 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2835 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2836 tree scaletype
= TREE_VALUE (arglist
);
2837 tree real_masktype
= masktype
;
2838 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2840 || TREE_CODE (masktype
) == INTEGER_TYPE
2841 || types_compatible_p (srctype
, masktype
)));
2843 masktype
= truth_type_for (srctype
);
2845 tree mask_halftype
= masktype
;
2846 tree perm_mask
= NULL_TREE
;
2847 tree mask_perm_mask
= NULL_TREE
;
2848 if (known_eq (nunits
, gather_off_nunits
))
2850 else if (known_eq (nunits
* 2, gather_off_nunits
))
2854 /* Currently widening gathers and scatters are only supported for
2855 fixed-length vectors. */
2856 int count
= gather_off_nunits
.to_constant ();
2857 vec_perm_builder
sel (count
, count
, 1);
2858 for (int i
= 0; i
< count
; ++i
)
2859 sel
.quick_push (i
| (count
/ 2));
2861 vec_perm_indices
indices (sel
, 1, count
);
2862 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2865 else if (known_eq (nunits
, gather_off_nunits
* 2))
2869 /* Currently narrowing gathers and scatters are only supported for
2870 fixed-length vectors. */
2871 int count
= nunits
.to_constant ();
2872 vec_perm_builder
sel (count
, count
, 1);
2873 sel
.quick_grow (count
);
2874 for (int i
= 0; i
< count
; ++i
)
2875 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2876 vec_perm_indices
indices (sel
, 2, count
);
2877 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2881 if (mask
&& VECTOR_TYPE_P (real_masktype
))
2883 for (int i
= 0; i
< count
; ++i
)
2884 sel
[i
] = i
| (count
/ 2);
2885 indices
.new_vector (sel
, 2, count
);
2886 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2889 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2894 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2895 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2897 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2898 if (!is_gimple_min_invariant (ptr
))
2901 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2902 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2903 gcc_assert (!new_bb
);
2906 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2908 tree vec_oprnd0
= NULL_TREE
;
2909 tree vec_mask
= NULL_TREE
;
2910 tree src_op
= NULL_TREE
;
2911 tree mask_op
= NULL_TREE
;
2912 tree prev_res
= NULL_TREE
;
2916 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2917 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2920 auto_vec
<tree
> vec_oprnds0
;
2921 auto_vec
<tree
> vec_masks
;
2922 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2923 modifier
== WIDEN
? ncopies
/ 2 : ncopies
,
2924 gs_info
->offset
, &vec_oprnds0
);
2926 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2927 modifier
== NARROW
? ncopies
/ 2 : ncopies
,
2928 mask
, &vec_masks
, masktype
);
2929 for (int j
= 0; j
< ncopies
; ++j
)
2932 if (modifier
== WIDEN
&& (j
& 1))
2933 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
2934 perm_mask
, stmt_info
, gsi
);
2936 op
= vec_oprnd0
= vec_oprnds0
[modifier
== WIDEN
? j
/ 2 : j
];
2938 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2940 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2941 TYPE_VECTOR_SUBPARTS (idxtype
)));
2942 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2943 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2944 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2945 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2951 if (mask_perm_mask
&& (j
& 1))
2952 mask_op
= permute_vec_elements (vinfo
, mask_op
, mask_op
,
2953 mask_perm_mask
, stmt_info
, gsi
);
2956 if (modifier
== NARROW
)
2959 vec_mask
= vec_masks
[j
/ 2];
2962 vec_mask
= vec_masks
[j
];
2965 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2967 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2968 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2969 gcc_assert (known_eq (sub1
, sub2
));
2970 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2971 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2973 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2974 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2978 if (modifier
== NARROW
&& !VECTOR_TYPE_P (real_masktype
))
2980 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2982 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2983 : VEC_UNPACK_LO_EXPR
,
2985 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2991 tree mask_arg
= mask_op
;
2992 if (masktype
!= real_masktype
)
2994 tree utype
, optype
= TREE_TYPE (mask_op
);
2995 if (VECTOR_TYPE_P (real_masktype
)
2996 || TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2997 utype
= real_masktype
;
2999 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
3000 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
3001 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
3003 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
3004 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3006 if (!useless_type_conversion_p (real_masktype
, utype
))
3008 gcc_assert (TYPE_PRECISION (utype
)
3009 <= TYPE_PRECISION (real_masktype
));
3010 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
3011 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
3012 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3015 src_op
= build_zero_cst (srctype
);
3017 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
3020 if (!useless_type_conversion_p (vectype
, rettype
))
3022 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
3023 TYPE_VECTOR_SUBPARTS (rettype
)));
3024 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
3025 gimple_call_set_lhs (new_stmt
, op
);
3026 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3027 var
= make_ssa_name (vec_dest
);
3028 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
3029 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
3030 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3034 var
= make_ssa_name (vec_dest
, new_stmt
);
3035 gimple_call_set_lhs (new_stmt
, var
);
3036 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3039 if (modifier
== NARROW
)
3046 var
= permute_vec_elements (vinfo
, prev_res
, var
, perm_mask
,
3048 new_stmt
= SSA_NAME_DEF_STMT (var
);
3051 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3053 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3056 /* Prepare the base and offset in GS_INFO for vectorization.
3057 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3058 to the vectorized offset argument for the first copy of STMT_INFO.
3059 STMT_INFO is the statement described by GS_INFO and LOOP is the
3063 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo
,
3064 class loop
*loop
, stmt_vec_info stmt_info
,
3065 slp_tree slp_node
, gather_scatter_info
*gs_info
,
3066 tree
*dataref_ptr
, vec
<tree
> *vec_offset
)
3068 gimple_seq stmts
= NULL
;
3069 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
3073 edge pe
= loop_preheader_edge (loop
);
3074 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3075 gcc_assert (!new_bb
);
3078 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_offset
);
3082 = vect_get_num_copies (loop_vinfo
, gs_info
->offset_vectype
);
3083 vect_get_vec_defs_for_operand (loop_vinfo
, stmt_info
, ncopies
,
3084 gs_info
->offset
, vec_offset
,
3085 gs_info
->offset_vectype
);
3089 /* Prepare to implement a grouped or strided load or store using
3090 the gather load or scatter store operation described by GS_INFO.
3091 STMT_INFO is the load or store statement.
3093 Set *DATAREF_BUMP to the amount that should be added to the base
3094 address after each copy of the vectorized statement. Set *VEC_OFFSET
3095 to an invariant offset vector in which element I has the value
3096 I * DR_STEP / SCALE. */
3099 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
3100 loop_vec_info loop_vinfo
,
3101 gather_scatter_info
*gs_info
,
3102 tree
*dataref_bump
, tree
*vec_offset
)
3104 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3105 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3107 tree bump
= size_binop (MULT_EXPR
,
3108 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
3109 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
3110 *dataref_bump
= cse_and_gimplify_to_preheader (loop_vinfo
, bump
);
3112 /* The offset given in GS_INFO can have pointer type, so use the element
3113 type of the vector instead. */
3114 tree offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
3116 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3117 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
3118 ssize_int (gs_info
->scale
));
3119 step
= fold_convert (offset_type
, step
);
3121 /* Create {0, X, X*2, X*3, ...}. */
3122 tree offset
= fold_build2 (VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
3123 build_zero_cst (offset_type
), step
);
3124 *vec_offset
= cse_and_gimplify_to_preheader (loop_vinfo
, offset
);
3127 /* Return the amount that should be added to a vector pointer to move
3128 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3129 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3133 vect_get_data_ptr_increment (vec_info
*vinfo
,
3134 dr_vec_info
*dr_info
, tree aggr_type
,
3135 vect_memory_access_type memory_access_type
)
3137 if (memory_access_type
== VMAT_INVARIANT
)
3138 return size_zero_node
;
3140 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3141 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3142 if (tree_int_cst_sgn (step
) == -1)
3143 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3147 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3150 vectorizable_bswap (vec_info
*vinfo
,
3151 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3152 gimple
**vec_stmt
, slp_tree slp_node
,
3154 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3157 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3158 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3161 op
= gimple_call_arg (stmt
, 0);
3162 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3163 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3165 /* Multiple types in SLP are handled by creating the appropriate number of
3166 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3171 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3173 gcc_assert (ncopies
>= 1);
3175 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3179 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3180 unsigned word_bytes
;
3181 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3184 /* The encoding uses one stepped pattern for each byte in the word. */
3185 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3186 for (unsigned i
= 0; i
< 3; ++i
)
3187 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3188 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3190 vec_perm_indices
indices (elts
, 1, num_bytes
);
3191 machine_mode vmode
= TYPE_MODE (char_vectype
);
3192 if (!can_vec_perm_const_p (vmode
, vmode
, indices
))
3198 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3200 if (dump_enabled_p ())
3201 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3202 "incompatible vector types for invariants\n");
3206 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3207 DUMP_VECT_SCOPE ("vectorizable_bswap");
3208 record_stmt_cost (cost_vec
,
3209 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3210 record_stmt_cost (cost_vec
,
3212 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
,
3213 vec_perm
, stmt_info
, 0, vect_body
);
3217 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3220 vec
<tree
> vec_oprnds
= vNULL
;
3221 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
3223 /* Arguments are ready. create the new vector stmt. */
3226 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3229 tree tem
= make_ssa_name (char_vectype
);
3230 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3231 char_vectype
, vop
));
3232 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3233 tree tem2
= make_ssa_name (char_vectype
);
3234 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3235 tem
, tem
, bswap_vconst
);
3236 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3237 tem
= make_ssa_name (vectype
);
3238 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3240 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3242 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3244 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3248 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3250 vec_oprnds
.release ();
3254 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3255 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3256 in a single step. On success, store the binary pack code in
3260 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3261 tree_code
*convert_code
)
3263 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3264 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3268 int multi_step_cvt
= 0;
3269 auto_vec
<tree
, 8> interm_types
;
3270 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3271 &code
, &multi_step_cvt
, &interm_types
)
3275 *convert_code
= code
;
3279 /* Function vectorizable_call.
3281 Check if STMT_INFO performs a function call that can be vectorized.
3282 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3283 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3284 Return true if STMT_INFO is vectorizable in this way. */
3287 vectorizable_call (vec_info
*vinfo
,
3288 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3289 gimple
**vec_stmt
, slp_tree slp_node
,
3290 stmt_vector_for_cost
*cost_vec
)
3296 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3297 tree vectype_out
, vectype_in
;
3298 poly_uint64 nunits_in
;
3299 poly_uint64 nunits_out
;
3300 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3301 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3302 tree fndecl
, new_temp
, rhs_type
;
3303 enum vect_def_type dt
[4]
3304 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3305 vect_unknown_def_type
};
3306 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3307 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3308 int ndts
= ARRAY_SIZE (dt
);
3310 auto_vec
<tree
, 8> vargs
;
3311 enum { NARROW
, NONE
, WIDEN
} modifier
;
3315 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3318 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3322 /* Is STMT_INFO a vectorizable call? */
3323 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3327 if (gimple_call_internal_p (stmt
)
3328 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3329 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3330 /* Handled by vectorizable_load and vectorizable_store. */
3333 if (gimple_call_lhs (stmt
) == NULL_TREE
3334 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3337 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3339 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3341 /* Process function arguments. */
3342 rhs_type
= NULL_TREE
;
3343 vectype_in
= NULL_TREE
;
3344 nargs
= gimple_call_num_args (stmt
);
3346 /* Bail out if the function has more than four arguments, we do not have
3347 interesting builtin functions to vectorize with more than two arguments
3348 except for fma. No arguments is also not good. */
3349 if (nargs
== 0 || nargs
> 4)
3352 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3353 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3354 if (cfn
== CFN_GOMP_SIMD_LANE
)
3357 rhs_type
= unsigned_type_node
;
3361 if (internal_fn_p (cfn
))
3362 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3364 for (i
= 0; i
< nargs
; i
++)
3366 if ((int) i
== mask_opno
)
3368 if (!vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_opno
,
3369 &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3374 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3375 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3377 if (dump_enabled_p ())
3378 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3379 "use not simple.\n");
3383 /* We can only handle calls with arguments of the same type. */
3385 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3387 if (dump_enabled_p ())
3388 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3389 "argument types differ.\n");
3393 rhs_type
= TREE_TYPE (op
);
3396 vectype_in
= vectypes
[i
];
3397 else if (vectypes
[i
]
3398 && !types_compatible_p (vectypes
[i
], vectype_in
))
3400 if (dump_enabled_p ())
3401 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3402 "argument vector types differ.\n");
3406 /* If all arguments are external or constant defs, infer the vector type
3407 from the scalar type. */
3409 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3411 gcc_assert (vectype_in
);
3414 if (dump_enabled_p ())
3415 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3416 "no vectype for scalar type %T\n", rhs_type
);
3420 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3421 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3422 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3423 by a pack of the two vectors into an SI vector. We would need
3424 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3425 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3427 if (dump_enabled_p ())
3428 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3429 "mismatched vector sizes %T and %T\n",
3430 vectype_in
, vectype_out
);
3434 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3435 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3437 if (dump_enabled_p ())
3438 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3439 "mixed mask and nonmask vector types\n");
3443 if (vect_emulated_vector_p (vectype_in
) || vect_emulated_vector_p (vectype_out
))
3445 if (dump_enabled_p ())
3446 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3447 "use emulated vector type for call\n");
3452 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3453 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3454 if (known_eq (nunits_in
* 2, nunits_out
))
3456 else if (known_eq (nunits_out
, nunits_in
))
3458 else if (known_eq (nunits_out
* 2, nunits_in
))
3463 /* We only handle functions that do not read or clobber memory. */
3464 if (gimple_vuse (stmt
))
3466 if (dump_enabled_p ())
3467 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3468 "function reads from or writes to memory.\n");
3472 /* For now, we only vectorize functions if a target specific builtin
3473 is available. TODO -- in some cases, it might be profitable to
3474 insert the calls for pieces of the vector, in order to be able
3475 to vectorize other operations in the loop. */
3477 internal_fn ifn
= IFN_LAST
;
3478 tree callee
= gimple_call_fndecl (stmt
);
3480 /* First try using an internal function. */
3481 tree_code convert_code
= ERROR_MARK
;
3483 && (modifier
== NONE
3484 || (modifier
== NARROW
3485 && simple_integer_narrowing (vectype_out
, vectype_in
,
3487 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3490 /* If that fails, try asking for a target-specific built-in function. */
3491 if (ifn
== IFN_LAST
)
3493 if (cfn
!= CFN_LAST
)
3494 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3495 (cfn
, vectype_out
, vectype_in
);
3496 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3497 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3498 (callee
, vectype_out
, vectype_in
);
3501 if (ifn
== IFN_LAST
&& !fndecl
)
3503 if (cfn
== CFN_GOMP_SIMD_LANE
3506 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3507 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3508 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3509 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3511 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3512 { 0, 1, 2, ... vf - 1 } vector. */
3513 gcc_assert (nargs
== 0);
3515 else if (modifier
== NONE
3516 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3517 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3518 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3519 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3520 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3521 slp_op
, vectype_in
, cost_vec
);
3524 if (dump_enabled_p ())
3525 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3526 "function is not vectorizable.\n");
3533 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3534 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3536 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3538 /* Sanity check: make sure that at least one copy of the vectorized stmt
3539 needs to be generated. */
3540 gcc_assert (ncopies
>= 1);
3542 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
3543 internal_fn cond_fn
= get_conditional_internal_fn (ifn
);
3544 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3545 if (!vec_stmt
) /* transformation not required. */
3548 for (i
= 0; i
< nargs
; ++i
)
3549 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
],
3551 ? vectypes
[i
] : vectype_in
))
3553 if (dump_enabled_p ())
3554 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3555 "incompatible vector types for invariants\n");
3558 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3559 DUMP_VECT_SCOPE ("vectorizable_call");
3560 vect_model_simple_cost (vinfo
, stmt_info
,
3561 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3562 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3563 record_stmt_cost (cost_vec
, ncopies
/ 2,
3564 vec_promote_demote
, stmt_info
, 0, vect_body
);
3567 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
3568 && (reduc_idx
>= 0 || mask_opno
>= 0))
3571 && (cond_fn
== IFN_LAST
3572 || !direct_internal_fn_supported_p (cond_fn
, vectype_out
,
3573 OPTIMIZE_FOR_SPEED
)))
3575 if (dump_enabled_p ())
3576 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3577 "can't use a fully-masked loop because no"
3578 " conditional operation is available.\n");
3579 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
3583 unsigned int nvectors
3585 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3587 tree scalar_mask
= NULL_TREE
;
3589 scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3590 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3591 vectype_out
, scalar_mask
);
3599 if (dump_enabled_p ())
3600 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3603 scalar_dest
= gimple_call_lhs (stmt
);
3604 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3606 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3607 unsigned int vect_nargs
= nargs
;
3608 if (masked_loop_p
&& reduc_idx
>= 0)
3614 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3616 tree prev_res
= NULL_TREE
;
3617 vargs
.safe_grow (vect_nargs
, true);
3618 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3619 for (j
= 0; j
< ncopies
; ++j
)
3621 /* Build argument list for the vectorized call. */
3624 vec
<tree
> vec_oprnds0
;
3626 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3627 vec_oprnds0
= vec_defs
[0];
3629 /* Arguments are ready. Create the new vector stmt. */
3630 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3633 if (masked_loop_p
&& reduc_idx
>= 0)
3635 unsigned int vec_num
= vec_oprnds0
.length ();
3636 /* Always true for SLP. */
3637 gcc_assert (ncopies
== 1);
3638 vargs
[varg
++] = vect_get_loop_mask (gsi
, masks
, vec_num
,
3642 for (k
= 0; k
< nargs
; k
++)
3644 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3645 vargs
[varg
++] = vec_oprndsk
[i
];
3647 if (masked_loop_p
&& reduc_idx
>= 0)
3648 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3650 if (modifier
== NARROW
)
3652 /* We don't define any narrowing conditional functions
3654 gcc_assert (mask_opno
< 0);
3655 tree half_res
= make_ssa_name (vectype_in
);
3657 = gimple_build_call_internal_vec (ifn
, vargs
);
3658 gimple_call_set_lhs (call
, half_res
);
3659 gimple_call_set_nothrow (call
, true);
3660 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3663 prev_res
= half_res
;
3666 new_temp
= make_ssa_name (vec_dest
);
3667 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3668 prev_res
, half_res
);
3669 vect_finish_stmt_generation (vinfo
, stmt_info
,
3674 if (mask_opno
>= 0 && masked_loop_p
)
3676 unsigned int vec_num
= vec_oprnds0
.length ();
3677 /* Always true for SLP. */
3678 gcc_assert (ncopies
== 1);
3679 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3681 vargs
[mask_opno
] = prepare_vec_mask
3682 (loop_vinfo
, TREE_TYPE (mask
), mask
,
3683 vargs
[mask_opno
], gsi
);
3687 if (ifn
!= IFN_LAST
)
3688 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3690 call
= gimple_build_call_vec (fndecl
, vargs
);
3691 new_temp
= make_ssa_name (vec_dest
, call
);
3692 gimple_call_set_lhs (call
, new_temp
);
3693 gimple_call_set_nothrow (call
, true);
3694 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3697 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3703 if (masked_loop_p
&& reduc_idx
>= 0)
3704 vargs
[varg
++] = vect_get_loop_mask (gsi
, masks
, ncopies
,
3706 for (i
= 0; i
< nargs
; i
++)
3708 op
= gimple_call_arg (stmt
, i
);
3711 vec_defs
.quick_push (vNULL
);
3712 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3716 vargs
[varg
++] = vec_defs
[i
][j
];
3718 if (masked_loop_p
&& reduc_idx
>= 0)
3719 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3721 if (mask_opno
>= 0 && masked_loop_p
)
3723 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3726 = prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
3727 vargs
[mask_opno
], gsi
);
3731 if (cfn
== CFN_GOMP_SIMD_LANE
)
3733 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3735 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3736 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3737 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3738 new_temp
= make_ssa_name (vec_dest
);
3739 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3740 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3742 else if (modifier
== NARROW
)
3744 /* We don't define any narrowing conditional functions at
3746 gcc_assert (mask_opno
< 0);
3747 tree half_res
= make_ssa_name (vectype_in
);
3748 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3749 gimple_call_set_lhs (call
, half_res
);
3750 gimple_call_set_nothrow (call
, true);
3751 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3754 prev_res
= half_res
;
3757 new_temp
= make_ssa_name (vec_dest
);
3758 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3759 prev_res
, half_res
);
3760 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3765 if (ifn
!= IFN_LAST
)
3766 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3768 call
= gimple_build_call_vec (fndecl
, vargs
);
3769 new_temp
= make_ssa_name (vec_dest
, call
);
3770 gimple_call_set_lhs (call
, new_temp
);
3771 gimple_call_set_nothrow (call
, true);
3772 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3776 if (j
== (modifier
== NARROW
? 1 : 0))
3777 *vec_stmt
= new_stmt
;
3778 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3780 for (i
= 0; i
< nargs
; i
++)
3782 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3783 vec_oprndsi
.release ();
3786 else if (modifier
== NARROW
)
3788 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3789 /* We don't define any narrowing conditional functions at present. */
3790 gcc_assert (mask_opno
< 0);
3791 for (j
= 0; j
< ncopies
; ++j
)
3793 /* Build argument list for the vectorized call. */
3795 vargs
.create (nargs
* 2);
3801 vec
<tree
> vec_oprnds0
;
3803 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3804 vec_oprnds0
= vec_defs
[0];
3806 /* Arguments are ready. Create the new vector stmt. */
3807 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3811 for (k
= 0; k
< nargs
; k
++)
3813 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3814 vargs
.quick_push (vec_oprndsk
[i
]);
3815 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3818 if (ifn
!= IFN_LAST
)
3819 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3821 call
= gimple_build_call_vec (fndecl
, vargs
);
3822 new_temp
= make_ssa_name (vec_dest
, call
);
3823 gimple_call_set_lhs (call
, new_temp
);
3824 gimple_call_set_nothrow (call
, true);
3825 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3826 SLP_TREE_VEC_STMTS (slp_node
).quick_push (call
);
3831 for (i
= 0; i
< nargs
; i
++)
3833 op
= gimple_call_arg (stmt
, i
);
3836 vec_defs
.quick_push (vNULL
);
3837 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
3838 op
, &vec_defs
[i
], vectypes
[i
]);
3840 vec_oprnd0
= vec_defs
[i
][2*j
];
3841 vec_oprnd1
= vec_defs
[i
][2*j
+1];
3843 vargs
.quick_push (vec_oprnd0
);
3844 vargs
.quick_push (vec_oprnd1
);
3847 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3848 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3849 gimple_call_set_lhs (new_stmt
, new_temp
);
3850 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3852 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3856 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3858 for (i
= 0; i
< nargs
; i
++)
3860 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3861 vec_oprndsi
.release ();
3865 /* No current target implements this case. */
3870 /* The call in STMT might prevent it from being removed in dce.
3871 We however cannot remove it here, due to the way the ssa name
3872 it defines is mapped to the new definition. So just replace
3873 rhs of the statement with something harmless. */
3878 stmt_info
= vect_orig_stmt (stmt_info
);
3879 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3882 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3883 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3889 struct simd_call_arg_info
3893 HOST_WIDE_INT linear_step
;
3894 enum vect_def_type dt
;
3896 bool simd_lane_linear
;
3899 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3900 is linear within simd lane (but not within whole loop), note it in
3904 vect_simd_lane_linear (tree op
, class loop
*loop
,
3905 struct simd_call_arg_info
*arginfo
)
3907 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3909 if (!is_gimple_assign (def_stmt
)
3910 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3911 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3914 tree base
= gimple_assign_rhs1 (def_stmt
);
3915 HOST_WIDE_INT linear_step
= 0;
3916 tree v
= gimple_assign_rhs2 (def_stmt
);
3917 while (TREE_CODE (v
) == SSA_NAME
)
3920 def_stmt
= SSA_NAME_DEF_STMT (v
);
3921 if (is_gimple_assign (def_stmt
))
3922 switch (gimple_assign_rhs_code (def_stmt
))
3925 t
= gimple_assign_rhs2 (def_stmt
);
3926 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3928 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3929 v
= gimple_assign_rhs1 (def_stmt
);
3932 t
= gimple_assign_rhs2 (def_stmt
);
3933 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3935 linear_step
= tree_to_shwi (t
);
3936 v
= gimple_assign_rhs1 (def_stmt
);
3939 t
= gimple_assign_rhs1 (def_stmt
);
3940 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3941 || (TYPE_PRECISION (TREE_TYPE (v
))
3942 < TYPE_PRECISION (TREE_TYPE (t
))))
3951 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3953 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3954 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3959 arginfo
->linear_step
= linear_step
;
3961 arginfo
->simd_lane_linear
= true;
3967 /* Return the number of elements in vector type VECTYPE, which is associated
3968 with a SIMD clone. At present these vectors always have a constant
3971 static unsigned HOST_WIDE_INT
3972 simd_clone_subparts (tree vectype
)
3974 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3977 /* Function vectorizable_simd_clone_call.
3979 Check if STMT_INFO performs a function call that can be vectorized
3980 by calling a simd clone of the function.
3981 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3982 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3983 Return true if STMT_INFO is vectorizable in this way. */
3986 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3987 gimple_stmt_iterator
*gsi
,
3988 gimple
**vec_stmt
, slp_tree slp_node
,
3989 stmt_vector_for_cost
*)
3994 tree vec_oprnd0
= NULL_TREE
;
3997 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3998 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3999 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
4000 tree fndecl
, new_temp
;
4002 auto_vec
<simd_call_arg_info
> arginfo
;
4003 vec
<tree
> vargs
= vNULL
;
4005 tree lhs
, rtype
, ratype
;
4006 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
4009 /* Is STMT a vectorizable call? */
4010 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
4014 fndecl
= gimple_call_fndecl (stmt
);
4015 if (fndecl
== NULL_TREE
4016 && gimple_call_internal_p (stmt
, IFN_MASK_CALL
))
4018 fndecl
= gimple_call_arg (stmt
, 0);
4019 gcc_checking_assert (TREE_CODE (fndecl
) == ADDR_EXPR
);
4020 fndecl
= TREE_OPERAND (fndecl
, 0);
4021 gcc_checking_assert (TREE_CODE (fndecl
) == FUNCTION_DECL
);
4024 if (fndecl
== NULL_TREE
)
4027 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
4028 if (node
== NULL
|| node
->simd_clones
== NULL
)
4031 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4034 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4038 if (gimple_call_lhs (stmt
)
4039 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
4042 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
4044 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4046 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
4053 /* Process function arguments. */
4054 nargs
= gimple_call_num_args (stmt
) - arg_offset
;
4056 /* Bail out if the function has zero arguments. */
4060 arginfo
.reserve (nargs
, true);
4062 for (i
= 0; i
< nargs
; i
++)
4064 simd_call_arg_info thisarginfo
;
4067 thisarginfo
.linear_step
= 0;
4068 thisarginfo
.align
= 0;
4069 thisarginfo
.op
= NULL_TREE
;
4070 thisarginfo
.simd_lane_linear
= false;
4072 op
= gimple_call_arg (stmt
, i
+ arg_offset
);
4073 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
4074 &thisarginfo
.vectype
)
4075 || thisarginfo
.dt
== vect_uninitialized_def
)
4077 if (dump_enabled_p ())
4078 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4079 "use not simple.\n");
4083 if (thisarginfo
.dt
== vect_constant_def
4084 || thisarginfo
.dt
== vect_external_def
)
4085 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
4087 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
4089 /* For linear arguments, the analyze phase should have saved
4090 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4091 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
4092 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
4094 gcc_assert (vec_stmt
);
4095 thisarginfo
.linear_step
4096 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
4098 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
4099 thisarginfo
.simd_lane_linear
4100 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
4101 == boolean_true_node
);
4102 /* If loop has been peeled for alignment, we need to adjust it. */
4103 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
4104 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
4105 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
4107 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
4108 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
4109 tree opt
= TREE_TYPE (thisarginfo
.op
);
4110 bias
= fold_convert (TREE_TYPE (step
), bias
);
4111 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
4113 = fold_build2 (POINTER_TYPE_P (opt
)
4114 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
4115 thisarginfo
.op
, bias
);
4119 && thisarginfo
.dt
!= vect_constant_def
4120 && thisarginfo
.dt
!= vect_external_def
4122 && TREE_CODE (op
) == SSA_NAME
4123 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
4125 && tree_fits_shwi_p (iv
.step
))
4127 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
4128 thisarginfo
.op
= iv
.base
;
4130 else if ((thisarginfo
.dt
== vect_constant_def
4131 || thisarginfo
.dt
== vect_external_def
)
4132 && POINTER_TYPE_P (TREE_TYPE (op
)))
4133 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
4134 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4136 if (POINTER_TYPE_P (TREE_TYPE (op
))
4137 && !thisarginfo
.linear_step
4139 && thisarginfo
.dt
!= vect_constant_def
4140 && thisarginfo
.dt
!= vect_external_def
4143 && TREE_CODE (op
) == SSA_NAME
)
4144 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
4146 arginfo
.quick_push (thisarginfo
);
4149 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4150 if (!vf
.is_constant ())
4152 if (dump_enabled_p ())
4153 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4154 "not considering SIMD clones; not yet supported"
4155 " for variable-width vectors.\n");
4159 unsigned int badness
= 0;
4160 struct cgraph_node
*bestn
= NULL
;
4161 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
4162 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
4164 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4165 n
= n
->simdclone
->next_clone
)
4167 unsigned int this_badness
= 0;
4168 unsigned int num_calls
;
4169 if (!constant_multiple_p (vf
, n
->simdclone
->simdlen
, &num_calls
)
4170 || n
->simdclone
->nargs
!= nargs
)
4173 this_badness
+= exact_log2 (num_calls
) * 4096;
4174 if (n
->simdclone
->inbranch
)
4175 this_badness
+= 8192;
4176 int target_badness
= targetm
.simd_clone
.usable (n
);
4177 if (target_badness
< 0)
4179 this_badness
+= target_badness
* 512;
4180 for (i
= 0; i
< nargs
; i
++)
4182 switch (n
->simdclone
->args
[i
].arg_type
)
4184 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4185 if (!useless_type_conversion_p
4186 (n
->simdclone
->args
[i
].orig_type
,
4187 TREE_TYPE (gimple_call_arg (stmt
, i
+ arg_offset
))))
4189 else if (arginfo
[i
].dt
== vect_constant_def
4190 || arginfo
[i
].dt
== vect_external_def
4191 || arginfo
[i
].linear_step
)
4194 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4195 if (arginfo
[i
].dt
!= vect_constant_def
4196 && arginfo
[i
].dt
!= vect_external_def
)
4199 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4200 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4201 if (arginfo
[i
].dt
== vect_constant_def
4202 || arginfo
[i
].dt
== vect_external_def
4203 || (arginfo
[i
].linear_step
4204 != n
->simdclone
->args
[i
].linear_step
))
4207 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4208 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4209 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4210 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4211 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4212 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4216 case SIMD_CLONE_ARG_TYPE_MASK
:
4219 if (i
== (size_t) -1)
4221 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4226 if (arginfo
[i
].align
)
4227 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4228 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4230 if (i
== (size_t) -1)
4232 if (bestn
== NULL
|| this_badness
< badness
)
4235 badness
= this_badness
;
4242 for (i
= 0; i
< nargs
; i
++)
4244 if ((arginfo
[i
].dt
== vect_constant_def
4245 || arginfo
[i
].dt
== vect_external_def
)
4246 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4248 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
+ arg_offset
));
4249 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4251 if (arginfo
[i
].vectype
== NULL
4252 || !constant_multiple_p (bestn
->simdclone
->simdlen
,
4253 simd_clone_subparts (arginfo
[i
].vectype
)))
4257 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
4258 && VECTOR_BOOLEAN_TYPE_P (bestn
->simdclone
->args
[i
].vector_type
))
4260 if (dump_enabled_p ())
4261 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4262 "vector mask arguments are not supported.\n");
4266 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_MASK
4267 && bestn
->simdclone
->mask_mode
== VOIDmode
4268 && (simd_clone_subparts (bestn
->simdclone
->args
[i
].vector_type
)
4269 != simd_clone_subparts (arginfo
[i
].vectype
)))
4271 /* FORNOW we only have partial support for vector-type masks that
4272 can't hold all of simdlen. */
4273 if (dump_enabled_p ())
4274 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4276 "in-branch vector clones are not yet"
4277 " supported for mismatched vector sizes.\n");
4280 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_MASK
4281 && bestn
->simdclone
->mask_mode
!= VOIDmode
)
4283 /* FORNOW don't support integer-type masks. */
4284 if (dump_enabled_p ())
4285 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4287 "in-branch vector clones are not yet"
4288 " supported for integer mask modes.\n");
4293 fndecl
= bestn
->decl
;
4294 nunits
= bestn
->simdclone
->simdlen
;
4295 ncopies
= vector_unroll_factor (vf
, nunits
);
4297 /* If the function isn't const, only allow it in simd loops where user
4298 has asserted that at least nunits consecutive iterations can be
4299 performed using SIMD instructions. */
4300 if ((loop
== NULL
|| maybe_lt ((unsigned) loop
->safelen
, nunits
))
4301 && gimple_vuse (stmt
))
4304 /* Sanity check: make sure that at least one copy of the vectorized stmt
4305 needs to be generated. */
4306 gcc_assert (ncopies
>= 1);
4308 if (!vec_stmt
) /* transformation not required. */
4310 /* When the original call is pure or const but the SIMD ABI dictates
4311 an aggregate return we will have to use a virtual definition and
4312 in a loop eventually even need to add a virtual PHI. That's
4313 not straight-forward so allow to fix this up via renaming. */
4314 if (gimple_call_lhs (stmt
)
4315 && !gimple_vdef (stmt
)
4316 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn
->decl
))) == ARRAY_TYPE
)
4317 vinfo
->any_known_not_updated_vssa
= true;
4318 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4319 for (i
= 0; i
< nargs
; i
++)
4320 if ((bestn
->simdclone
->args
[i
].arg_type
4321 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4322 || (bestn
->simdclone
->args
[i
].arg_type
4323 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4325 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4328 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4329 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4330 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4331 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4332 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4333 tree sll
= arginfo
[i
].simd_lane_linear
4334 ? boolean_true_node
: boolean_false_node
;
4335 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4337 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4338 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4339 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4340 dt, slp_node, cost_vec); */
4346 if (dump_enabled_p ())
4347 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4350 scalar_dest
= gimple_call_lhs (stmt
);
4351 vec_dest
= NULL_TREE
;
4356 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4357 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4358 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4361 rtype
= TREE_TYPE (ratype
);
4365 auto_vec
<vec
<tree
> > vec_oprnds
;
4366 auto_vec
<unsigned> vec_oprnds_i
;
4367 vec_oprnds
.safe_grow_cleared (nargs
, true);
4368 vec_oprnds_i
.safe_grow_cleared (nargs
, true);
4369 for (j
= 0; j
< ncopies
; ++j
)
4371 /* Build argument list for the vectorized call. */
4373 vargs
.create (nargs
);
4377 for (i
= 0; i
< nargs
; i
++)
4379 unsigned int k
, l
, m
, o
;
4381 op
= gimple_call_arg (stmt
, i
+ arg_offset
);
4382 switch (bestn
->simdclone
->args
[i
].arg_type
)
4384 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4385 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4386 o
= vector_unroll_factor (nunits
,
4387 simd_clone_subparts (atype
));
4388 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4390 if (simd_clone_subparts (atype
)
4391 < simd_clone_subparts (arginfo
[i
].vectype
))
4393 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4394 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4395 / simd_clone_subparts (atype
));
4396 gcc_assert ((k
& (k
- 1)) == 0);
4399 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4400 ncopies
* o
/ k
, op
,
4402 vec_oprnds_i
[i
] = 0;
4403 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4407 vec_oprnd0
= arginfo
[i
].op
;
4408 if ((m
& (k
- 1)) == 0)
4409 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4411 arginfo
[i
].op
= vec_oprnd0
;
4413 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4415 bitsize_int ((m
& (k
- 1)) * prec
));
4417 = gimple_build_assign (make_ssa_name (atype
),
4419 vect_finish_stmt_generation (vinfo
, stmt_info
,
4421 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4425 k
= (simd_clone_subparts (atype
)
4426 / simd_clone_subparts (arginfo
[i
].vectype
));
4427 gcc_assert ((k
& (k
- 1)) == 0);
4428 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4430 vec_alloc (ctor_elts
, k
);
4433 for (l
= 0; l
< k
; l
++)
4435 if (m
== 0 && l
== 0)
4437 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4441 vec_oprnds_i
[i
] = 0;
4442 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4445 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4446 arginfo
[i
].op
= vec_oprnd0
;
4449 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4453 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0
),
4457 = build1 (VIEW_CONVERT_EXPR
, atype
, vec_oprnd0
);
4459 = gimple_build_assign (make_ssa_name (atype
),
4461 vect_finish_stmt_generation (vinfo
, stmt_info
,
4463 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4466 vargs
.safe_push (vec_oprnd0
);
4469 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4471 = gimple_build_assign (make_ssa_name (atype
),
4473 vect_finish_stmt_generation (vinfo
, stmt_info
,
4475 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4480 case SIMD_CLONE_ARG_TYPE_MASK
:
4481 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4482 if (bestn
->simdclone
->mask_mode
!= VOIDmode
)
4484 /* FORNOW: this is disabled above. */
4489 tree elt_type
= TREE_TYPE (atype
);
4490 tree one
= fold_convert (elt_type
, integer_one_node
);
4491 tree zero
= fold_convert (elt_type
, integer_zero_node
);
4492 o
= vector_unroll_factor (nunits
,
4493 simd_clone_subparts (atype
));
4494 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4496 if (simd_clone_subparts (atype
)
4497 < simd_clone_subparts (arginfo
[i
].vectype
))
4499 /* The mask type has fewer elements than simdlen. */
4504 else if (simd_clone_subparts (atype
)
4505 == simd_clone_subparts (arginfo
[i
].vectype
))
4507 /* The SIMD clone function has the same number of
4508 elements as the current function. */
4511 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4515 vec_oprnds_i
[i
] = 0;
4517 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4519 = build3 (VEC_COND_EXPR
, atype
, vec_oprnd0
,
4520 build_vector_from_val (atype
, one
),
4521 build_vector_from_val (atype
, zero
));
4523 = gimple_build_assign (make_ssa_name (atype
),
4525 vect_finish_stmt_generation (vinfo
, stmt_info
,
4527 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4531 /* The mask type has more elements than simdlen. */
4539 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4540 vargs
.safe_push (op
);
4542 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4543 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4548 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4549 &stmts
, true, NULL_TREE
);
4553 edge pe
= loop_preheader_edge (loop
);
4554 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4555 gcc_assert (!new_bb
);
4557 if (arginfo
[i
].simd_lane_linear
)
4559 vargs
.safe_push (arginfo
[i
].op
);
4562 tree phi_res
= copy_ssa_name (op
);
4563 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4564 add_phi_arg (new_phi
, arginfo
[i
].op
,
4565 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4567 = POINTER_TYPE_P (TREE_TYPE (op
))
4568 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4569 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4570 ? sizetype
: TREE_TYPE (op
);
4572 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4574 tree tcst
= wide_int_to_tree (type
, cst
);
4575 tree phi_arg
= copy_ssa_name (op
);
4577 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4578 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4579 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4580 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4582 arginfo
[i
].op
= phi_res
;
4583 vargs
.safe_push (phi_res
);
4588 = POINTER_TYPE_P (TREE_TYPE (op
))
4589 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4590 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4591 ? sizetype
: TREE_TYPE (op
);
4593 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4595 tree tcst
= wide_int_to_tree (type
, cst
);
4596 new_temp
= make_ssa_name (TREE_TYPE (op
));
4598 = gimple_build_assign (new_temp
, code
,
4599 arginfo
[i
].op
, tcst
);
4600 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4601 vargs
.safe_push (new_temp
);
4604 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4605 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4606 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4607 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4608 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4609 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4615 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4619 || known_eq (simd_clone_subparts (rtype
), nunits
));
4621 new_temp
= create_tmp_var (ratype
);
4622 else if (useless_type_conversion_p (vectype
, rtype
))
4623 new_temp
= make_ssa_name (vec_dest
, new_call
);
4625 new_temp
= make_ssa_name (rtype
, new_call
);
4626 gimple_call_set_lhs (new_call
, new_temp
);
4628 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4629 gimple
*new_stmt
= new_call
;
4633 if (!multiple_p (simd_clone_subparts (vectype
), nunits
))
4636 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4637 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4638 k
= vector_unroll_factor (nunits
,
4639 simd_clone_subparts (vectype
));
4640 gcc_assert ((k
& (k
- 1)) == 0);
4641 for (l
= 0; l
< k
; l
++)
4646 t
= build_fold_addr_expr (new_temp
);
4647 t
= build2 (MEM_REF
, vectype
, t
,
4648 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4651 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4652 bitsize_int (prec
), bitsize_int (l
* prec
));
4653 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4654 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4656 if (j
== 0 && l
== 0)
4657 *vec_stmt
= new_stmt
;
4658 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4662 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4665 else if (!multiple_p (nunits
, simd_clone_subparts (vectype
)))
4667 unsigned int k
= (simd_clone_subparts (vectype
)
4668 / simd_clone_subparts (rtype
));
4669 gcc_assert ((k
& (k
- 1)) == 0);
4670 if ((j
& (k
- 1)) == 0)
4671 vec_alloc (ret_ctor_elts
, k
);
4675 o
= vector_unroll_factor (nunits
,
4676 simd_clone_subparts (rtype
));
4677 for (m
= 0; m
< o
; m
++)
4679 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4680 size_int (m
), NULL_TREE
, NULL_TREE
);
4681 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4683 vect_finish_stmt_generation (vinfo
, stmt_info
,
4685 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4686 gimple_assign_lhs (new_stmt
));
4688 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4691 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4692 if ((j
& (k
- 1)) != k
- 1)
4694 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4696 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4697 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4699 if ((unsigned) j
== k
- 1)
4700 *vec_stmt
= new_stmt
;
4701 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4706 tree t
= build_fold_addr_expr (new_temp
);
4707 t
= build2 (MEM_REF
, vectype
, t
,
4708 build_int_cst (TREE_TYPE (t
), 0));
4709 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4710 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4711 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4713 else if (!useless_type_conversion_p (vectype
, rtype
))
4715 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
4717 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4718 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4723 *vec_stmt
= new_stmt
;
4724 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4727 for (i
= 0; i
< nargs
; ++i
)
4729 vec
<tree
> oprndsi
= vec_oprnds
[i
];
4734 /* Mark the clone as no longer being a candidate for GC. */
4735 bestn
->gc_candidate
= false;
4737 /* The call in STMT might prevent it from being removed in dce.
4738 We however cannot remove it here, due to the way the ssa name
4739 it defines is mapped to the new definition. So just replace
4740 rhs of the statement with something harmless. */
4748 type
= TREE_TYPE (scalar_dest
);
4749 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4750 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4753 new_stmt
= gimple_build_nop ();
4754 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4755 unlink_stmt_vdef (stmt
);
4761 /* Function vect_gen_widened_results_half
4763 Create a vector stmt whose code, type, number of arguments, and result
4764 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4765 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4766 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4767 needs to be created (DECL is a function-decl of a target-builtin).
4768 STMT_INFO is the original scalar stmt that we are vectorizing. */
4771 vect_gen_widened_results_half (vec_info
*vinfo
, enum tree_code code
,
4772 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4773 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4774 stmt_vec_info stmt_info
)
4779 /* Generate half of the widened result: */
4780 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4781 if (op_type
!= binary_op
)
4783 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4784 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4785 gimple_assign_set_lhs (new_stmt
, new_temp
);
4786 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4792 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4793 For multi-step conversions store the resulting vectors and call the function
4797 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4799 stmt_vec_info stmt_info
,
4800 vec
<tree
> &vec_dsts
,
4801 gimple_stmt_iterator
*gsi
,
4802 slp_tree slp_node
, enum tree_code code
)
4805 tree vop0
, vop1
, new_tmp
, vec_dest
;
4807 vec_dest
= vec_dsts
.pop ();
4809 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4811 /* Create demotion operation. */
4812 vop0
= (*vec_oprnds
)[i
];
4813 vop1
= (*vec_oprnds
)[i
+ 1];
4814 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4815 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4816 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4817 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4820 /* Store the resulting vector for next recursive call. */
4821 (*vec_oprnds
)[i
/2] = new_tmp
;
4824 /* This is the last step of the conversion sequence. Store the
4825 vectors in SLP_NODE or in vector info of the scalar statement
4826 (or in STMT_VINFO_RELATED_STMT chain). */
4828 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4830 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4834 /* For multi-step demotion operations we first generate demotion operations
4835 from the source type to the intermediate types, and then combine the
4836 results (stored in VEC_OPRNDS) in demotion operation to the destination
4840 /* At each level of recursion we have half of the operands we had at the
4842 vec_oprnds
->truncate ((i
+1)/2);
4843 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
4845 stmt_info
, vec_dsts
, gsi
,
4846 slp_node
, VEC_PACK_TRUNC_EXPR
);
4849 vec_dsts
.quick_push (vec_dest
);
4853 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4854 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4855 STMT_INFO. For multi-step conversions store the resulting vectors and
4856 call the function recursively. */
4859 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
4860 vec
<tree
> *vec_oprnds0
,
4861 vec
<tree
> *vec_oprnds1
,
4862 stmt_vec_info stmt_info
, tree vec_dest
,
4863 gimple_stmt_iterator
*gsi
,
4864 enum tree_code code1
,
4865 enum tree_code code2
, int op_type
)
4868 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4869 gimple
*new_stmt1
, *new_stmt2
;
4870 vec
<tree
> vec_tmp
= vNULL
;
4872 vec_tmp
.create (vec_oprnds0
->length () * 2);
4873 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4875 if (op_type
== binary_op
)
4876 vop1
= (*vec_oprnds1
)[i
];
4880 /* Generate the two halves of promotion operation. */
4881 new_stmt1
= vect_gen_widened_results_half (vinfo
, code1
, vop0
, vop1
,
4882 op_type
, vec_dest
, gsi
,
4884 new_stmt2
= vect_gen_widened_results_half (vinfo
, code2
, vop0
, vop1
,
4885 op_type
, vec_dest
, gsi
,
4887 if (is_gimple_call (new_stmt1
))
4889 new_tmp1
= gimple_call_lhs (new_stmt1
);
4890 new_tmp2
= gimple_call_lhs (new_stmt2
);
4894 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4895 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4898 /* Store the results for the next step. */
4899 vec_tmp
.quick_push (new_tmp1
);
4900 vec_tmp
.quick_push (new_tmp2
);
4903 vec_oprnds0
->release ();
4904 *vec_oprnds0
= vec_tmp
;
4907 /* Create vectorized promotion stmts for widening stmts using only half the
4908 potential vector size for input. */
4910 vect_create_half_widening_stmts (vec_info
*vinfo
,
4911 vec
<tree
> *vec_oprnds0
,
4912 vec
<tree
> *vec_oprnds1
,
4913 stmt_vec_info stmt_info
, tree vec_dest
,
4914 gimple_stmt_iterator
*gsi
,
4915 enum tree_code code1
,
4923 vec
<tree
> vec_tmp
= vNULL
;
4925 vec_tmp
.create (vec_oprnds0
->length ());
4926 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4928 tree new_tmp1
, new_tmp2
, new_tmp3
, out_type
;
4930 gcc_assert (op_type
== binary_op
);
4931 vop1
= (*vec_oprnds1
)[i
];
4933 /* Widen the first vector input. */
4934 out_type
= TREE_TYPE (vec_dest
);
4935 new_tmp1
= make_ssa_name (out_type
);
4936 new_stmt1
= gimple_build_assign (new_tmp1
, NOP_EXPR
, vop0
);
4937 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt1
, gsi
);
4938 if (VECTOR_TYPE_P (TREE_TYPE (vop1
)))
4940 /* Widen the second vector input. */
4941 new_tmp2
= make_ssa_name (out_type
);
4942 new_stmt2
= gimple_build_assign (new_tmp2
, NOP_EXPR
, vop1
);
4943 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt2
, gsi
);
4944 /* Perform the operation. With both vector inputs widened. */
4945 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, new_tmp2
);
4949 /* Perform the operation. With the single vector input widened. */
4950 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, vop1
);
4953 new_tmp3
= make_ssa_name (vec_dest
, new_stmt3
);
4954 gimple_assign_set_lhs (new_stmt3
, new_tmp3
);
4955 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt3
, gsi
);
4957 /* Store the results for the next step. */
4958 vec_tmp
.quick_push (new_tmp3
);
4961 vec_oprnds0
->release ();
4962 *vec_oprnds0
= vec_tmp
;
4966 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4967 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4968 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4969 Return true if STMT_INFO is vectorizable in this way. */
4972 vectorizable_conversion (vec_info
*vinfo
,
4973 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4974 gimple
**vec_stmt
, slp_tree slp_node
,
4975 stmt_vector_for_cost
*cost_vec
)
4979 tree op0
, op1
= NULL_TREE
;
4980 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4981 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4982 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4984 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4986 poly_uint64 nunits_in
;
4987 poly_uint64 nunits_out
;
4988 tree vectype_out
, vectype_in
;
4990 tree lhs_type
, rhs_type
;
4991 enum { NARROW
, NONE
, WIDEN
} modifier
;
4992 vec
<tree
> vec_oprnds0
= vNULL
;
4993 vec
<tree
> vec_oprnds1
= vNULL
;
4995 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4996 int multi_step_cvt
= 0;
4997 vec
<tree
> interm_types
= vNULL
;
4998 tree intermediate_type
, cvt_type
= NULL_TREE
;
5000 unsigned short fltsz
;
5002 /* Is STMT a vectorizable conversion? */
5004 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5007 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5011 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5015 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5018 code
= gimple_assign_rhs_code (stmt
);
5019 if (!CONVERT_EXPR_CODE_P (code
)
5020 && code
!= FIX_TRUNC_EXPR
5021 && code
!= FLOAT_EXPR
5022 && code
!= WIDEN_PLUS_EXPR
5023 && code
!= WIDEN_MINUS_EXPR
5024 && code
!= WIDEN_MULT_EXPR
5025 && code
!= WIDEN_LSHIFT_EXPR
)
5028 bool widen_arith
= (code
== WIDEN_PLUS_EXPR
5029 || code
== WIDEN_MINUS_EXPR
5030 || code
== WIDEN_MULT_EXPR
5031 || code
== WIDEN_LSHIFT_EXPR
);
5032 op_type
= TREE_CODE_LENGTH (code
);
5034 /* Check types of lhs and rhs. */
5035 scalar_dest
= gimple_assign_lhs (stmt
);
5036 lhs_type
= TREE_TYPE (scalar_dest
);
5037 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5039 /* Check the operands of the operation. */
5040 slp_tree slp_op0
, slp_op1
= NULL
;
5041 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5042 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
5044 if (dump_enabled_p ())
5045 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5046 "use not simple.\n");
5050 rhs_type
= TREE_TYPE (op0
);
5051 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
5052 && !((INTEGRAL_TYPE_P (lhs_type
)
5053 && INTEGRAL_TYPE_P (rhs_type
))
5054 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
5055 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
5058 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5059 && ((INTEGRAL_TYPE_P (lhs_type
)
5060 && !type_has_mode_precision_p (lhs_type
))
5061 || (INTEGRAL_TYPE_P (rhs_type
)
5062 && !type_has_mode_precision_p (rhs_type
))))
5064 if (dump_enabled_p ())
5065 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5066 "type conversion to/from bit-precision unsupported."
5071 if (op_type
== binary_op
)
5073 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
5074 || code
== WIDEN_PLUS_EXPR
|| code
== WIDEN_MINUS_EXPR
);
5076 op1
= gimple_assign_rhs2 (stmt
);
5078 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
5079 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
5081 if (dump_enabled_p ())
5082 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5083 "use not simple.\n");
5086 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5089 vectype_in
= vectype1_in
;
5092 /* If op0 is an external or constant def, infer the vector type
5093 from the scalar type. */
5095 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
5097 gcc_assert (vectype_in
);
5100 if (dump_enabled_p ())
5101 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5102 "no vectype for scalar type %T\n", rhs_type
);
5107 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5108 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5110 if (dump_enabled_p ())
5111 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5112 "can't convert between boolean and non "
5113 "boolean vectors %T\n", rhs_type
);
5118 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
5119 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5120 if (known_eq (nunits_out
, nunits_in
))
5125 else if (multiple_p (nunits_out
, nunits_in
))
5129 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
5133 /* Multiple types in SLP are handled by creating the appropriate number of
5134 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5138 else if (modifier
== NARROW
)
5139 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
5141 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
5143 /* Sanity check: make sure that at least one copy of the vectorized stmt
5144 needs to be generated. */
5145 gcc_assert (ncopies
>= 1);
5147 bool found_mode
= false;
5148 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
5149 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
5150 opt_scalar_mode rhs_mode_iter
;
5152 /* Supportable by target? */
5156 if (code
!= FIX_TRUNC_EXPR
5157 && code
!= FLOAT_EXPR
5158 && !CONVERT_EXPR_CODE_P (code
))
5160 if (supportable_convert_operation (code
, vectype_out
, vectype_in
, &code1
))
5164 if (dump_enabled_p ())
5165 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5166 "conversion not supported by target.\n");
5170 if (known_eq (nunits_in
, nunits_out
))
5172 if (!supportable_half_widening_operation (code
, vectype_out
,
5173 vectype_in
, &code1
))
5175 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5178 if (supportable_widening_operation (vinfo
, code
, stmt_info
,
5179 vectype_out
, vectype_in
, &code1
,
5180 &code2
, &multi_step_cvt
,
5183 /* Binary widening operation can only be supported directly by the
5185 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5189 if (code
!= FLOAT_EXPR
5190 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
5193 fltsz
= GET_MODE_SIZE (lhs_mode
);
5194 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
5196 rhs_mode
= rhs_mode_iter
.require ();
5197 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
5201 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5202 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5203 if (cvt_type
== NULL_TREE
)
5206 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5208 if (!supportable_convert_operation (code
, vectype_out
,
5209 cvt_type
, &codecvt1
))
5212 else if (!supportable_widening_operation (vinfo
, code
, stmt_info
,
5213 vectype_out
, cvt_type
,
5214 &codecvt1
, &codecvt2
,
5219 gcc_assert (multi_step_cvt
== 0);
5221 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
5223 vectype_in
, &code1
, &code2
,
5224 &multi_step_cvt
, &interm_types
))
5234 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5235 codecvt2
= ERROR_MARK
;
5239 interm_types
.safe_push (cvt_type
);
5240 cvt_type
= NULL_TREE
;
5245 gcc_assert (op_type
== unary_op
);
5246 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
5247 &code1
, &multi_step_cvt
,
5251 if (code
!= FIX_TRUNC_EXPR
5252 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
5256 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5257 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5258 if (cvt_type
== NULL_TREE
)
5260 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
5263 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
5264 &code1
, &multi_step_cvt
,
5273 if (!vec_stmt
) /* transformation not required. */
5276 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
5277 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
5279 if (dump_enabled_p ())
5280 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5281 "incompatible vector types for invariants\n");
5284 DUMP_VECT_SCOPE ("vectorizable_conversion");
5285 if (modifier
== NONE
)
5287 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
5288 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5291 else if (modifier
== NARROW
)
5293 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
5294 /* The final packing step produces one vector result per copy. */
5295 unsigned int nvectors
5296 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
5297 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5298 multi_step_cvt
, cost_vec
,
5303 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5304 /* The initial unpacking step produces two vector results
5305 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5306 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5307 unsigned int nvectors
5309 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5311 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5312 multi_step_cvt
, cost_vec
,
5315 interm_types
.release ();
5320 if (dump_enabled_p ())
5321 dump_printf_loc (MSG_NOTE
, vect_location
,
5322 "transform conversion. ncopies = %d.\n", ncopies
);
5324 if (op_type
== binary_op
)
5326 if (CONSTANT_CLASS_P (op0
))
5327 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5328 else if (CONSTANT_CLASS_P (op1
))
5329 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5332 /* In case of multi-step conversion, we first generate conversion operations
5333 to the intermediate types, and then from that types to the final one.
5334 We create vector destinations for the intermediate type (TYPES) received
5335 from supportable_*_operation, and store them in the correct order
5336 for future use in vect_create_vectorized_*_stmts (). */
5337 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5338 vec_dest
= vect_create_destination_var (scalar_dest
,
5339 (cvt_type
&& modifier
== WIDEN
)
5340 ? cvt_type
: vectype_out
);
5341 vec_dsts
.quick_push (vec_dest
);
5345 for (i
= interm_types
.length () - 1;
5346 interm_types
.iterate (i
, &intermediate_type
); i
--)
5348 vec_dest
= vect_create_destination_var (scalar_dest
,
5350 vec_dsts
.quick_push (vec_dest
);
5355 vec_dest
= vect_create_destination_var (scalar_dest
,
5357 ? vectype_out
: cvt_type
);
5362 if (modifier
== WIDEN
)
5364 else if (modifier
== NARROW
)
5367 ninputs
= vect_pow2 (multi_step_cvt
);
5375 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5377 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5379 /* Arguments are ready, create the new vector stmt. */
5380 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5381 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
5382 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5383 gimple_assign_set_lhs (new_stmt
, new_temp
);
5384 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5387 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5389 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5394 /* In case the vectorization factor (VF) is bigger than the number
5395 of elements that we can fit in a vectype (nunits), we have to
5396 generate more than one vector stmt - i.e - we need to "unroll"
5397 the vector stmt by a factor VF/nunits. */
5398 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5400 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
5402 if (code
== WIDEN_LSHIFT_EXPR
)
5404 int oprnds_size
= vec_oprnds0
.length ();
5405 vec_oprnds1
.create (oprnds_size
);
5406 for (i
= 0; i
< oprnds_size
; ++i
)
5407 vec_oprnds1
.quick_push (op1
);
5409 /* Arguments are ready. Create the new vector stmts. */
5410 for (i
= multi_step_cvt
; i
>= 0; i
--)
5412 tree this_dest
= vec_dsts
[i
];
5413 enum tree_code c1
= code1
, c2
= code2
;
5414 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5419 if (known_eq (nunits_out
, nunits_in
))
5420 vect_create_half_widening_stmts (vinfo
, &vec_oprnds0
,
5421 &vec_oprnds1
, stmt_info
,
5425 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5426 &vec_oprnds1
, stmt_info
,
5431 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5436 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5437 new_temp
= make_ssa_name (vec_dest
);
5438 new_stmt
= gimple_build_assign (new_temp
, codecvt1
, vop0
);
5439 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5442 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
5445 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5447 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5452 /* In case the vectorization factor (VF) is bigger than the number
5453 of elements that we can fit in a vectype (nunits), we have to
5454 generate more than one vector stmt - i.e - we need to "unroll"
5455 the vector stmt by a factor VF/nunits. */
5456 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5458 /* Arguments are ready. Create the new vector stmts. */
5460 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5462 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5463 new_temp
= make_ssa_name (vec_dest
);
5465 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5466 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5467 vec_oprnds0
[i
] = new_temp
;
5470 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5472 stmt_info
, vec_dsts
, gsi
,
5477 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5479 vec_oprnds0
.release ();
5480 vec_oprnds1
.release ();
5481 interm_types
.release ();
5486 /* Return true if we can assume from the scalar form of STMT_INFO that
5487 neither the scalar nor the vector forms will generate code. STMT_INFO
5488 is known not to involve a data reference. */
5491 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5493 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5497 tree lhs
= gimple_assign_lhs (stmt
);
5498 tree_code code
= gimple_assign_rhs_code (stmt
);
5499 tree rhs
= gimple_assign_rhs1 (stmt
);
5501 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5504 if (CONVERT_EXPR_CODE_P (code
))
5505 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5510 /* Function vectorizable_assignment.
5512 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5513 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5514 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5515 Return true if STMT_INFO is vectorizable in this way. */
5518 vectorizable_assignment (vec_info
*vinfo
,
5519 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5520 gimple
**vec_stmt
, slp_tree slp_node
,
5521 stmt_vector_for_cost
*cost_vec
)
5526 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5528 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5532 vec
<tree
> vec_oprnds
= vNULL
;
5534 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5535 enum tree_code code
;
5538 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5541 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5545 /* Is vectorizable assignment? */
5546 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5550 scalar_dest
= gimple_assign_lhs (stmt
);
5551 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5554 if (STMT_VINFO_DATA_REF (stmt_info
))
5557 code
= gimple_assign_rhs_code (stmt
);
5558 if (!(gimple_assign_single_p (stmt
)
5559 || code
== PAREN_EXPR
5560 || CONVERT_EXPR_CODE_P (code
)))
5563 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5564 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5566 /* Multiple types in SLP are handled by creating the appropriate number of
5567 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5572 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5574 gcc_assert (ncopies
>= 1);
5577 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5578 &dt
[0], &vectype_in
))
5580 if (dump_enabled_p ())
5581 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5582 "use not simple.\n");
5586 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5588 /* We can handle NOP_EXPR conversions that do not change the number
5589 of elements or the vector size. */
5590 if ((CONVERT_EXPR_CODE_P (code
)
5591 || code
== VIEW_CONVERT_EXPR
)
5593 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5594 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5595 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5598 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
5599 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5601 if (dump_enabled_p ())
5602 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5603 "can't convert between boolean and non "
5604 "boolean vectors %T\n", TREE_TYPE (op
));
5609 /* We do not handle bit-precision changes. */
5610 if ((CONVERT_EXPR_CODE_P (code
)
5611 || code
== VIEW_CONVERT_EXPR
)
5612 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5613 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5614 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5615 /* But a conversion that does not change the bit-pattern is ok. */
5616 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5617 > TYPE_PRECISION (TREE_TYPE (op
)))
5618 && TYPE_UNSIGNED (TREE_TYPE (op
))))
5620 if (dump_enabled_p ())
5621 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5622 "type conversion to/from bit-precision "
5627 if (!vec_stmt
) /* transformation not required. */
5630 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
5632 if (dump_enabled_p ())
5633 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5634 "incompatible vector types for invariants\n");
5637 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5638 DUMP_VECT_SCOPE ("vectorizable_assignment");
5639 if (!vect_nop_conversion_p (stmt_info
))
5640 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5646 if (dump_enabled_p ())
5647 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5650 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5653 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
5655 /* Arguments are ready. create the new vector stmt. */
5656 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5658 if (CONVERT_EXPR_CODE_P (code
)
5659 || code
== VIEW_CONVERT_EXPR
)
5660 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5661 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5662 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5663 gimple_assign_set_lhs (new_stmt
, new_temp
);
5664 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5666 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5668 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5671 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5673 vec_oprnds
.release ();
5678 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5679 either as shift by a scalar or by a vector. */
5682 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5685 machine_mode vec_mode
;
5690 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5694 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5696 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5698 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5700 || (optab_handler (optab
, TYPE_MODE (vectype
))
5701 == CODE_FOR_nothing
))
5705 vec_mode
= TYPE_MODE (vectype
);
5706 icode
= (int) optab_handler (optab
, vec_mode
);
5707 if (icode
== CODE_FOR_nothing
)
5714 /* Function vectorizable_shift.
5716 Check if STMT_INFO performs a shift operation that can be vectorized.
5717 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5718 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5719 Return true if STMT_INFO is vectorizable in this way. */
5722 vectorizable_shift (vec_info
*vinfo
,
5723 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5724 gimple
**vec_stmt
, slp_tree slp_node
,
5725 stmt_vector_for_cost
*cost_vec
)
5729 tree op0
, op1
= NULL
;
5730 tree vec_oprnd1
= NULL_TREE
;
5732 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5733 enum tree_code code
;
5734 machine_mode vec_mode
;
5738 machine_mode optab_op2_mode
;
5739 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5741 poly_uint64 nunits_in
;
5742 poly_uint64 nunits_out
;
5747 vec
<tree
> vec_oprnds0
= vNULL
;
5748 vec
<tree
> vec_oprnds1
= vNULL
;
5751 bool scalar_shift_arg
= true;
5752 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5753 bool incompatible_op1_vectype_p
= false;
5755 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5758 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5759 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5763 /* Is STMT a vectorizable binary/unary operation? */
5764 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5768 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5771 code
= gimple_assign_rhs_code (stmt
);
5773 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5774 || code
== RROTATE_EXPR
))
5777 scalar_dest
= gimple_assign_lhs (stmt
);
5778 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5779 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5781 if (dump_enabled_p ())
5782 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5783 "bit-precision shifts not supported.\n");
5788 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5789 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5791 if (dump_enabled_p ())
5792 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5793 "use not simple.\n");
5796 /* If op0 is an external or constant def, infer the vector type
5797 from the scalar type. */
5799 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
5801 gcc_assert (vectype
);
5804 if (dump_enabled_p ())
5805 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5806 "no vectype for scalar type\n");
5810 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5811 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5812 if (maybe_ne (nunits_out
, nunits_in
))
5815 stmt_vec_info op1_def_stmt_info
;
5817 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
5818 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
5820 if (dump_enabled_p ())
5821 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5822 "use not simple.\n");
5826 /* Multiple types in SLP are handled by creating the appropriate number of
5827 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5832 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5834 gcc_assert (ncopies
>= 1);
5836 /* Determine whether the shift amount is a vector, or scalar. If the
5837 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5839 if ((dt
[1] == vect_internal_def
5840 || dt
[1] == vect_induction_def
5841 || dt
[1] == vect_nested_cycle
)
5843 scalar_shift_arg
= false;
5844 else if (dt
[1] == vect_constant_def
5845 || dt
[1] == vect_external_def
5846 || dt
[1] == vect_internal_def
)
5848 /* In SLP, need to check whether the shift count is the same,
5849 in loops if it is a constant or invariant, it is always
5853 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5854 stmt_vec_info slpstmt_info
;
5856 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5858 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5859 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5860 scalar_shift_arg
= false;
5863 /* For internal SLP defs we have to make sure we see scalar stmts
5864 for all vector elements.
5865 ??? For different vectors we could resort to a different
5866 scalar shift operand but code-generation below simply always
5868 if (dt
[1] == vect_internal_def
5869 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5871 scalar_shift_arg
= false;
5874 /* If the shift amount is computed by a pattern stmt we cannot
5875 use the scalar amount directly thus give up and use a vector
5877 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5878 scalar_shift_arg
= false;
5882 if (dump_enabled_p ())
5883 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5884 "operand mode requires invariant argument.\n");
5888 /* Vector shifted by vector. */
5889 bool was_scalar_shift_arg
= scalar_shift_arg
;
5890 if (!scalar_shift_arg
)
5892 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5893 if (dump_enabled_p ())
5894 dump_printf_loc (MSG_NOTE
, vect_location
,
5895 "vector/vector shift/rotate found.\n");
5898 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
5900 incompatible_op1_vectype_p
5901 = (op1_vectype
== NULL_TREE
5902 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5903 TYPE_VECTOR_SUBPARTS (vectype
))
5904 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5905 if (incompatible_op1_vectype_p
5907 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
5908 || slp_op1
->refcnt
!= 1))
5910 if (dump_enabled_p ())
5911 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5912 "unusable type for last operand in"
5913 " vector/vector shift/rotate.\n");
5917 /* See if the machine has a vector shifted by scalar insn and if not
5918 then see if it has a vector shifted by vector insn. */
5921 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5923 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5925 if (dump_enabled_p ())
5926 dump_printf_loc (MSG_NOTE
, vect_location
,
5927 "vector/scalar shift/rotate found.\n");
5931 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5933 && (optab_handler (optab
, TYPE_MODE (vectype
))
5934 != CODE_FOR_nothing
))
5936 scalar_shift_arg
= false;
5938 if (dump_enabled_p ())
5939 dump_printf_loc (MSG_NOTE
, vect_location
,
5940 "vector/vector shift/rotate found.\n");
5943 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
5947 /* Unlike the other binary operators, shifts/rotates have
5948 the rhs being int, instead of the same type as the lhs,
5949 so make sure the scalar is the right type if we are
5950 dealing with vectors of long long/long/short/char. */
5951 incompatible_op1_vectype_p
5953 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
5955 if (incompatible_op1_vectype_p
5956 && dt
[1] == vect_internal_def
)
5958 if (dump_enabled_p ())
5959 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5960 "unusable type for last operand in"
5961 " vector/vector shift/rotate.\n");
5968 /* Supportable by target? */
5971 if (dump_enabled_p ())
5972 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5976 vec_mode
= TYPE_MODE (vectype
);
5977 icode
= (int) optab_handler (optab
, vec_mode
);
5978 if (icode
== CODE_FOR_nothing
)
5980 if (dump_enabled_p ())
5981 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5982 "op not supported by target.\n");
5985 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5986 if (vect_emulated_vector_p (vectype
))
5989 if (!vec_stmt
) /* transformation not required. */
5992 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5993 || ((!scalar_shift_arg
|| dt
[1] == vect_internal_def
)
5994 && (!incompatible_op1_vectype_p
5995 || dt
[1] == vect_constant_def
)
5996 && !vect_maybe_update_slp_op_vectype
5998 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
6000 if (dump_enabled_p ())
6001 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6002 "incompatible vector types for invariants\n");
6005 /* Now adjust the constant shift amount in place. */
6007 && incompatible_op1_vectype_p
6008 && dt
[1] == vect_constant_def
)
6010 for (unsigned i
= 0;
6011 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
6013 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
6014 = fold_convert (TREE_TYPE (vectype
),
6015 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
6016 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
6020 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
6021 DUMP_VECT_SCOPE ("vectorizable_shift");
6022 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
6023 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
6029 if (dump_enabled_p ())
6030 dump_printf_loc (MSG_NOTE
, vect_location
,
6031 "transform binary/unary operation.\n");
6033 if (incompatible_op1_vectype_p
&& !slp_node
)
6035 gcc_assert (!scalar_shift_arg
&& was_scalar_shift_arg
);
6036 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
6037 if (dt
[1] != vect_constant_def
)
6038 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
6039 TREE_TYPE (vectype
), NULL
);
6043 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6045 if (scalar_shift_arg
&& dt
[1] != vect_internal_def
)
6047 /* Vector shl and shr insn patterns can be defined with scalar
6048 operand 2 (shift operand). In this case, use constant or loop
6049 invariant op1 directly, without extending it to vector mode
6051 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
6052 if (!VECTOR_MODE_P (optab_op2_mode
))
6054 if (dump_enabled_p ())
6055 dump_printf_loc (MSG_NOTE
, vect_location
,
6056 "operand 1 using scalar mode.\n");
6058 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
6059 vec_oprnds1
.quick_push (vec_oprnd1
);
6060 /* Store vec_oprnd1 for every vector stmt to be created.
6061 We check during the analysis that all the shift arguments
6063 TODO: Allow different constants for different vector
6064 stmts generated for an SLP instance. */
6066 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
6067 vec_oprnds1
.quick_push (vec_oprnd1
);
6070 else if (!scalar_shift_arg
&& slp_node
&& incompatible_op1_vectype_p
)
6072 if (was_scalar_shift_arg
)
6074 /* If the argument was the same in all lanes create
6075 the correctly typed vector shift amount directly. */
6076 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
6077 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
6078 !loop_vinfo
? gsi
: NULL
);
6079 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
6080 !loop_vinfo
? gsi
: NULL
);
6081 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
6082 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
6083 vec_oprnds1
.quick_push (vec_oprnd1
);
6085 else if (dt
[1] == vect_constant_def
)
6086 /* The constant shift amount has been adjusted in place. */
6089 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
6092 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6093 (a special case for certain kind of vector shifts); otherwise,
6094 operand 1 should be of a vector type (the usual case). */
6095 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6097 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
6099 /* Arguments are ready. Create the new vector stmt. */
6100 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6102 /* For internal defs where we need to use a scalar shift arg
6103 extract the first lane. */
6104 if (scalar_shift_arg
&& dt
[1] == vect_internal_def
)
6106 vop1
= vec_oprnds1
[0];
6107 new_temp
= make_ssa_name (TREE_TYPE (TREE_TYPE (vop1
)));
6109 = gimple_build_assign (new_temp
,
6110 build3 (BIT_FIELD_REF
, TREE_TYPE (new_temp
),
6112 TYPE_SIZE (TREE_TYPE (new_temp
)),
6113 bitsize_zero_node
));
6114 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6118 vop1
= vec_oprnds1
[i
];
6119 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
6120 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6121 gimple_assign_set_lhs (new_stmt
, new_temp
);
6122 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6124 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6126 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6130 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6132 vec_oprnds0
.release ();
6133 vec_oprnds1
.release ();
6139 /* Function vectorizable_operation.
6141 Check if STMT_INFO performs a binary, unary or ternary operation that can
6143 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6144 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6145 Return true if STMT_INFO is vectorizable in this way. */
6148 vectorizable_operation (vec_info
*vinfo
,
6149 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6150 gimple
**vec_stmt
, slp_tree slp_node
,
6151 stmt_vector_for_cost
*cost_vec
)
6155 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
6157 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6158 enum tree_code code
, orig_code
;
6159 machine_mode vec_mode
;
6163 bool target_support_p
;
6164 enum vect_def_type dt
[3]
6165 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
6167 poly_uint64 nunits_in
;
6168 poly_uint64 nunits_out
;
6170 int ncopies
, vec_num
;
6172 vec
<tree
> vec_oprnds0
= vNULL
;
6173 vec
<tree
> vec_oprnds1
= vNULL
;
6174 vec
<tree
> vec_oprnds2
= vNULL
;
6175 tree vop0
, vop1
, vop2
;
6176 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6178 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6181 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6185 /* Is STMT a vectorizable binary/unary operation? */
6186 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6190 /* Loads and stores are handled in vectorizable_{load,store}. */
6191 if (STMT_VINFO_DATA_REF (stmt_info
))
6194 orig_code
= code
= gimple_assign_rhs_code (stmt
);
6196 /* Shifts are handled in vectorizable_shift. */
6197 if (code
== LSHIFT_EXPR
6198 || code
== RSHIFT_EXPR
6199 || code
== LROTATE_EXPR
6200 || code
== RROTATE_EXPR
)
6203 /* Comparisons are handled in vectorizable_comparison. */
6204 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
6207 /* Conditions are handled in vectorizable_condition. */
6208 if (code
== COND_EXPR
)
6211 /* For pointer addition and subtraction, we should use the normal
6212 plus and minus for the vector operation. */
6213 if (code
== POINTER_PLUS_EXPR
)
6215 if (code
== POINTER_DIFF_EXPR
)
6218 /* Support only unary or binary operations. */
6219 op_type
= TREE_CODE_LENGTH (code
);
6220 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
6222 if (dump_enabled_p ())
6223 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6224 "num. args = %d (not unary/binary/ternary op).\n",
6229 scalar_dest
= gimple_assign_lhs (stmt
);
6230 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6232 /* Most operations cannot handle bit-precision types without extra
6234 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
6236 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
6237 /* Exception are bitwise binary operations. */
6238 && code
!= BIT_IOR_EXPR
6239 && code
!= BIT_XOR_EXPR
6240 && code
!= BIT_AND_EXPR
)
6242 if (dump_enabled_p ())
6243 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6244 "bit-precision arithmetic not supported.\n");
6249 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6250 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6252 if (dump_enabled_p ())
6253 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6254 "use not simple.\n");
6257 bool is_invariant
= (dt
[0] == vect_external_def
6258 || dt
[0] == vect_constant_def
);
6259 /* If op0 is an external or constant def, infer the vector type
6260 from the scalar type. */
6263 /* For boolean type we cannot determine vectype by
6264 invariant value (don't know whether it is a vector
6265 of booleans or vector of integers). We use output
6266 vectype because operations on boolean don't change
6268 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6270 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6272 if (dump_enabled_p ())
6273 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6274 "not supported operation on bool value.\n");
6277 vectype
= vectype_out
;
6280 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6284 gcc_assert (vectype
);
6287 if (dump_enabled_p ())
6288 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6289 "no vectype for scalar type %T\n",
6295 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6296 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6297 if (maybe_ne (nunits_out
, nunits_in
))
6300 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6301 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6302 if (op_type
== binary_op
|| op_type
== ternary_op
)
6304 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6305 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6307 if (dump_enabled_p ())
6308 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6309 "use not simple.\n");
6312 is_invariant
&= (dt
[1] == vect_external_def
6313 || dt
[1] == vect_constant_def
);
6315 && maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype2
)))
6318 if (op_type
== ternary_op
)
6320 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6321 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6323 if (dump_enabled_p ())
6324 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6325 "use not simple.\n");
6328 is_invariant
&= (dt
[2] == vect_external_def
6329 || dt
[2] == vect_constant_def
);
6331 && maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype3
)))
6335 /* Multiple types in SLP are handled by creating the appropriate number of
6336 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6341 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6345 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6349 gcc_assert (ncopies
>= 1);
6351 /* Reject attempts to combine mask types with nonmask types, e.g. if
6352 we have an AND between a (nonmask) boolean loaded from memory and
6353 a (mask) boolean result of a comparison.
6355 TODO: We could easily fix these cases up using pattern statements. */
6356 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6357 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6358 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6360 if (dump_enabled_p ())
6361 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6362 "mixed mask and nonmask vector types\n");
6366 /* Supportable by target? */
6368 vec_mode
= TYPE_MODE (vectype
);
6369 if (code
== MULT_HIGHPART_EXPR
)
6370 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6373 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6376 if (dump_enabled_p ())
6377 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6381 target_support_p
= (optab_handler (optab
, vec_mode
)
6382 != CODE_FOR_nothing
);
6385 bool using_emulated_vectors_p
= vect_emulated_vector_p (vectype
);
6386 if (!target_support_p
)
6388 if (dump_enabled_p ())
6389 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6390 "op not supported by target.\n");
6391 /* Check only during analysis. */
6392 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6393 || (!vec_stmt
&& !vect_can_vectorize_without_simd_p (code
)))
6395 if (dump_enabled_p ())
6396 dump_printf_loc (MSG_NOTE
, vect_location
,
6397 "proceeding using word mode.\n");
6398 using_emulated_vectors_p
= true;
6401 if (using_emulated_vectors_p
6402 && !vect_can_vectorize_without_simd_p (code
))
6404 if (dump_enabled_p ())
6405 dump_printf (MSG_NOTE
, "using word mode not possible.\n");
6409 /* ??? We should instead expand the operations here, instead of
6410 relying on vector lowering which has this hard cap on the number
6411 of vector elements below it performs elementwise operations. */
6412 if (using_emulated_vectors_p
6413 && (code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== NEGATE_EXPR
)
6414 && ((BITS_PER_WORD
/ vector_element_bits (vectype
)) < 4
6415 || maybe_lt (nunits_out
, 4U)))
6417 if (dump_enabled_p ())
6418 dump_printf (MSG_NOTE
, "not using word mode for +- and less than "
6419 "four vector elements\n");
6423 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6424 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6425 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6427 /* If operating on inactive elements could generate spurious traps,
6428 we need to restrict the operation to active lanes. Note that this
6429 specifically doesn't apply to unhoisted invariants, since they
6430 operate on the same value for every lane.
6432 Similarly, if this operation is part of a reduction, a fully-masked
6433 loop should only change the active lanes of the reduction chain,
6434 keeping the inactive lanes as-is. */
6435 bool mask_out_inactive
= ((!is_invariant
&& gimple_could_trap_p (stmt
))
6438 if (!vec_stmt
) /* transformation not required. */
6441 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
6442 && mask_out_inactive
)
6444 if (cond_fn
== IFN_LAST
6445 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
6446 OPTIMIZE_FOR_SPEED
))
6448 if (dump_enabled_p ())
6449 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6450 "can't use a fully-masked loop because no"
6451 " conditional operation is available.\n");
6452 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
6455 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6459 /* Put types on constant and invariant SLP children. */
6461 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6462 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6463 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6465 if (dump_enabled_p ())
6466 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6467 "incompatible vector types for invariants\n");
6471 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6472 DUMP_VECT_SCOPE ("vectorizable_operation");
6473 vect_model_simple_cost (vinfo
, stmt_info
,
6474 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6475 if (using_emulated_vectors_p
)
6477 /* The above vect_model_simple_cost call handles constants
6478 in the prologue and (mis-)costs one of the stmts as
6479 vector stmt. See tree-vect-generic.cc:do_plus_minus/do_negate
6480 for the actual lowering that will be applied. */
6482 = slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
;
6496 record_stmt_cost (cost_vec
, n
, scalar_stmt
, stmt_info
, 0, vect_body
);
6503 if (dump_enabled_p ())
6504 dump_printf_loc (MSG_NOTE
, vect_location
,
6505 "transform binary/unary operation.\n");
6507 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6509 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6510 vectors with unsigned elements, but the result is signed. So, we
6511 need to compute the MINUS_EXPR into vectype temporary and
6512 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6513 tree vec_cvt_dest
= NULL_TREE
;
6514 if (orig_code
== POINTER_DIFF_EXPR
)
6516 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6517 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6521 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6523 /* In case the vectorization factor (VF) is bigger than the number
6524 of elements that we can fit in a vectype (nunits), we have to generate
6525 more than one vector stmt - i.e - we need to "unroll" the
6526 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6527 from one copy of the vector stmt to the next, in the field
6528 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6529 stages to find the correct vector defs to be used when vectorizing
6530 stmts that use the defs of the current stmt. The example below
6531 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6532 we need to create 4 vectorized stmts):
6534 before vectorization:
6535 RELATED_STMT VEC_STMT
6539 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6541 RELATED_STMT VEC_STMT
6542 VS1_0: vx0 = memref0 VS1_1 -
6543 VS1_1: vx1 = memref1 VS1_2 -
6544 VS1_2: vx2 = memref2 VS1_3 -
6545 VS1_3: vx3 = memref3 - -
6546 S1: x = load - VS1_0
6549 step2: vectorize stmt S2 (done here):
6550 To vectorize stmt S2 we first need to find the relevant vector
6551 def for the first operand 'x'. This is, as usual, obtained from
6552 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6553 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6554 relevant vector def 'vx0'. Having found 'vx0' we can generate
6555 the vector stmt VS2_0, and as usual, record it in the
6556 STMT_VINFO_VEC_STMT of stmt S2.
6557 When creating the second copy (VS2_1), we obtain the relevant vector
6558 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6559 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6560 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6561 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6562 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6563 chain of stmts and pointers:
6564 RELATED_STMT VEC_STMT
6565 VS1_0: vx0 = memref0 VS1_1 -
6566 VS1_1: vx1 = memref1 VS1_2 -
6567 VS1_2: vx2 = memref2 VS1_3 -
6568 VS1_3: vx3 = memref3 - -
6569 S1: x = load - VS1_0
6570 VS2_0: vz0 = vx0 + v1 VS2_1 -
6571 VS2_1: vz1 = vx1 + v1 VS2_2 -
6572 VS2_2: vz2 = vx2 + v1 VS2_3 -
6573 VS2_3: vz3 = vx3 + v1 - -
6574 S2: z = x + 1 - VS2_0 */
6576 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6577 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
6578 /* Arguments are ready. Create the new vector stmt. */
6579 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6581 gimple
*new_stmt
= NULL
;
6582 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6583 ? vec_oprnds1
[i
] : NULL_TREE
);
6584 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
6585 if (masked_loop_p
&& mask_out_inactive
)
6587 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6589 auto_vec
<tree
> vops (5);
6590 vops
.quick_push (mask
);
6591 vops
.quick_push (vop0
);
6593 vops
.quick_push (vop1
);
6595 vops
.quick_push (vop2
);
6598 /* Perform the operation on active elements only and take
6599 inactive elements from the reduction chain input. */
6601 vops
.quick_push (reduc_idx
== 1 ? vop1
: vop0
);
6605 auto else_value
= targetm
.preferred_else_value
6606 (cond_fn
, vectype
, vops
.length () - 1, &vops
[1]);
6607 vops
.quick_push (else_value
);
6609 gcall
*call
= gimple_build_call_internal_vec (cond_fn
, vops
);
6610 new_temp
= make_ssa_name (vec_dest
, call
);
6611 gimple_call_set_lhs (call
, new_temp
);
6612 gimple_call_set_nothrow (call
, true);
6613 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
6618 tree mask
= NULL_TREE
;
6619 /* When combining two masks check if either of them is elsewhere
6620 combined with a loop mask, if that's the case we can mark that the
6621 new combined mask doesn't need to be combined with a loop mask. */
6623 && code
== BIT_AND_EXPR
6624 && VECTOR_BOOLEAN_TYPE_P (vectype
))
6626 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op0
,
6629 mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6632 vop0
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
6636 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op1
,
6639 mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6642 vop1
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
6647 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
6648 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6649 gimple_assign_set_lhs (new_stmt
, new_temp
);
6650 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6651 if (using_emulated_vectors_p
)
6652 suppress_warning (new_stmt
, OPT_Wvector_operation_performance
);
6654 /* Enter the combined value into the vector cond hash so we don't
6655 AND it with a loop mask again. */
6657 loop_vinfo
->vec_cond_masked_set
.add ({ new_temp
, mask
});
6661 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6662 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6664 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6665 gimple_assign_set_lhs (new_stmt
, new_temp
);
6666 vect_finish_stmt_generation (vinfo
, stmt_info
,
6671 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6673 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6677 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6679 vec_oprnds0
.release ();
6680 vec_oprnds1
.release ();
6681 vec_oprnds2
.release ();
6686 /* A helper function to ensure data reference DR_INFO's base alignment. */
6689 ensure_base_align (dr_vec_info
*dr_info
)
6691 /* Alignment is only analyzed for the first element of a DR group,
6692 use that to look at base alignment we need to enforce. */
6693 if (STMT_VINFO_GROUPED_ACCESS (dr_info
->stmt
))
6694 dr_info
= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info
->stmt
));
6696 gcc_assert (dr_info
->misalignment
!= DR_MISALIGNMENT_UNINITIALIZED
);
6698 if (dr_info
->base_misaligned
)
6700 tree base_decl
= dr_info
->base_decl
;
6702 // We should only be able to increase the alignment of a base object if
6703 // we know what its new alignment should be at compile time.
6704 unsigned HOST_WIDE_INT align_base_to
=
6705 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6707 if (decl_in_symtab_p (base_decl
))
6708 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6709 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6711 SET_DECL_ALIGN (base_decl
, align_base_to
);
6712 DECL_USER_ALIGN (base_decl
) = 1;
6714 dr_info
->base_misaligned
= false;
6719 /* Function get_group_alias_ptr_type.
6721 Return the alias type for the group starting at FIRST_STMT_INFO. */
6724 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6726 struct data_reference
*first_dr
, *next_dr
;
6728 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6729 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6730 while (next_stmt_info
)
6732 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6733 if (get_alias_set (DR_REF (first_dr
))
6734 != get_alias_set (DR_REF (next_dr
)))
6736 if (dump_enabled_p ())
6737 dump_printf_loc (MSG_NOTE
, vect_location
,
6738 "conflicting alias set types.\n");
6739 return ptr_type_node
;
6741 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6743 return reference_alias_ptr_type (DR_REF (first_dr
));
6747 /* Function scan_operand_equal_p.
6749 Helper function for check_scan_store. Compare two references
6750 with .GOMP_SIMD_LANE bases. */
6753 scan_operand_equal_p (tree ref1
, tree ref2
)
6755 tree ref
[2] = { ref1
, ref2
};
6756 poly_int64 bitsize
[2], bitpos
[2];
6757 tree offset
[2], base
[2];
6758 for (int i
= 0; i
< 2; ++i
)
6761 int unsignedp
, reversep
, volatilep
= 0;
6762 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6763 &offset
[i
], &mode
, &unsignedp
,
6764 &reversep
, &volatilep
);
6765 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6767 if (TREE_CODE (base
[i
]) == MEM_REF
6768 && offset
[i
] == NULL_TREE
6769 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6771 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6772 if (is_gimple_assign (def_stmt
)
6773 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6774 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6775 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6777 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6779 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6780 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6785 if (!operand_equal_p (base
[0], base
[1], 0))
6787 if (maybe_ne (bitsize
[0], bitsize
[1]))
6789 if (offset
[0] != offset
[1])
6791 if (!offset
[0] || !offset
[1])
6793 if (!operand_equal_p (offset
[0], offset
[1], 0))
6796 for (int i
= 0; i
< 2; ++i
)
6798 step
[i
] = integer_one_node
;
6799 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6801 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6802 if (is_gimple_assign (def_stmt
)
6803 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6804 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6807 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6808 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6811 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6813 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6814 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6816 tree rhs1
= NULL_TREE
;
6817 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6819 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6820 if (gimple_assign_cast_p (def_stmt
))
6821 rhs1
= gimple_assign_rhs1 (def_stmt
);
6823 else if (CONVERT_EXPR_P (offset
[i
]))
6824 rhs1
= TREE_OPERAND (offset
[i
], 0);
6826 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6827 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6828 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6829 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6832 if (!operand_equal_p (offset
[0], offset
[1], 0)
6833 || !operand_equal_p (step
[0], step
[1], 0))
6841 enum scan_store_kind
{
6842 /* Normal permutation. */
6843 scan_store_kind_perm
,
6845 /* Whole vector left shift permutation with zero init. */
6846 scan_store_kind_lshift_zero
,
6848 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6849 scan_store_kind_lshift_cond
6852 /* Function check_scan_store.
6854 Verify if we can perform the needed permutations or whole vector shifts.
6855 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6856 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6857 to do at each step. */
6860 scan_store_can_perm_p (tree vectype
, tree init
,
6861 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6863 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6864 unsigned HOST_WIDE_INT nunits
;
6865 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6867 int units_log2
= exact_log2 (nunits
);
6868 if (units_log2
<= 0)
6872 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6873 for (i
= 0; i
<= units_log2
; ++i
)
6875 unsigned HOST_WIDE_INT j
, k
;
6876 enum scan_store_kind kind
= scan_store_kind_perm
;
6877 vec_perm_builder
sel (nunits
, nunits
, 1);
6878 sel
.quick_grow (nunits
);
6879 if (i
== units_log2
)
6881 for (j
= 0; j
< nunits
; ++j
)
6882 sel
[j
] = nunits
- 1;
6886 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6888 for (k
= 0; j
< nunits
; ++j
, ++k
)
6889 sel
[j
] = nunits
+ k
;
6891 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6892 if (!can_vec_perm_const_p (vec_mode
, vec_mode
, indices
))
6894 if (i
== units_log2
)
6897 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6899 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6901 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6902 /* Whole vector shifts shift in zeros, so if init is all zero
6903 constant, there is no need to do anything further. */
6904 if ((TREE_CODE (init
) != INTEGER_CST
6905 && TREE_CODE (init
) != REAL_CST
)
6906 || !initializer_zerop (init
))
6908 tree masktype
= truth_type_for (vectype
);
6909 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6911 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6914 kind
= whole_vector_shift_kind
;
6916 if (use_whole_vector
)
6918 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6919 use_whole_vector
->safe_grow_cleared (i
, true);
6920 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6921 use_whole_vector
->safe_push (kind
);
6929 /* Function check_scan_store.
6931 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6934 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
6935 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6936 vect_memory_access_type memory_access_type
)
6938 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6939 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6942 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6945 || memory_access_type
!= VMAT_CONTIGUOUS
6946 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6947 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6948 || loop_vinfo
== NULL
6949 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6950 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6951 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
6952 || !integer_zerop (DR_INIT (dr_info
->dr
))
6953 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6954 || !alias_sets_conflict_p (get_alias_set (vectype
),
6955 get_alias_set (TREE_TYPE (ref_type
))))
6957 if (dump_enabled_p ())
6958 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6959 "unsupported OpenMP scan store.\n");
6963 /* We need to pattern match code built by OpenMP lowering and simplified
6964 by following optimizations into something we can handle.
6965 #pragma omp simd reduction(inscan,+:r)
6969 #pragma omp scan inclusive (r)
6972 shall have body with:
6973 // Initialization for input phase, store the reduction initializer:
6974 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6975 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6977 // Actual input phase:
6979 r.0_5 = D.2042[_20];
6982 // Initialization for scan phase:
6983 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6989 // Actual scan phase:
6991 r.1_8 = D.2042[_20];
6993 The "omp simd array" variable D.2042 holds the privatized copy used
6994 inside of the loop and D.2043 is another one that holds copies of
6995 the current original list item. The separate GOMP_SIMD_LANE ifn
6996 kinds are there in order to allow optimizing the initializer store
6997 and combiner sequence, e.g. if it is originally some C++ish user
6998 defined reduction, but allow the vectorizer to pattern recognize it
6999 and turn into the appropriate vectorized scan.
7001 For exclusive scan, this is slightly different:
7002 #pragma omp simd reduction(inscan,+:r)
7006 #pragma omp scan exclusive (r)
7009 shall have body with:
7010 // Initialization for input phase, store the reduction initializer:
7011 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7012 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7014 // Actual input phase:
7016 r.0_5 = D.2042[_20];
7019 // Initialization for scan phase:
7020 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7026 // Actual scan phase:
7028 r.1_8 = D.2044[_20];
7031 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
7033 /* Match the D.2042[_21] = 0; store above. Just require that
7034 it is a constant or external definition store. */
7035 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
7038 if (dump_enabled_p ())
7039 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7040 "unsupported OpenMP scan initializer store.\n");
7044 if (! loop_vinfo
->scan_map
)
7045 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
7046 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7047 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
7050 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
7052 /* These stores can be vectorized normally. */
7056 if (rhs_dt
!= vect_internal_def
)
7059 if (dump_enabled_p ())
7060 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7061 "unsupported OpenMP scan combiner pattern.\n");
7065 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7066 tree rhs
= gimple_assign_rhs1 (stmt
);
7067 if (TREE_CODE (rhs
) != SSA_NAME
)
7070 gimple
*other_store_stmt
= NULL
;
7071 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7072 bool inscan_var_store
7073 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7075 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7077 if (!inscan_var_store
)
7079 use_operand_p use_p
;
7080 imm_use_iterator iter
;
7081 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7083 gimple
*use_stmt
= USE_STMT (use_p
);
7084 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7086 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
7087 || !is_gimple_assign (use_stmt
)
7088 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
7090 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
7092 other_store_stmt
= use_stmt
;
7094 if (other_store_stmt
== NULL
)
7096 rhs
= gimple_assign_lhs (other_store_stmt
);
7097 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
7101 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
7103 use_operand_p use_p
;
7104 imm_use_iterator iter
;
7105 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7107 gimple
*use_stmt
= USE_STMT (use_p
);
7108 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7110 if (other_store_stmt
)
7112 other_store_stmt
= use_stmt
;
7118 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7119 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
7120 || !is_gimple_assign (def_stmt
)
7121 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
7124 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7125 /* For pointer addition, we should use the normal plus for the vector
7129 case POINTER_PLUS_EXPR
:
7132 case MULT_HIGHPART_EXPR
:
7137 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
7140 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7141 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7142 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
7145 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7146 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7147 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
7148 || !gimple_assign_load_p (load1_stmt
)
7149 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
7150 || !gimple_assign_load_p (load2_stmt
))
7153 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7154 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7155 if (load1_stmt_info
== NULL
7156 || load2_stmt_info
== NULL
7157 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
7158 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
7159 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
7160 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7163 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
7165 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7166 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
7167 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
7169 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7171 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7175 use_operand_p use_p
;
7176 imm_use_iterator iter
;
7177 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
7179 gimple
*use_stmt
= USE_STMT (use_p
);
7180 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
7182 if (other_store_stmt
)
7184 other_store_stmt
= use_stmt
;
7188 if (other_store_stmt
== NULL
)
7190 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
7191 || !gimple_store_p (other_store_stmt
))
7194 stmt_vec_info other_store_stmt_info
7195 = loop_vinfo
->lookup_stmt (other_store_stmt
);
7196 if (other_store_stmt_info
== NULL
7197 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
7198 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7201 gimple
*stmt1
= stmt
;
7202 gimple
*stmt2
= other_store_stmt
;
7203 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7204 std::swap (stmt1
, stmt2
);
7205 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7206 gimple_assign_rhs1 (load2_stmt
)))
7208 std::swap (rhs1
, rhs2
);
7209 std::swap (load1_stmt
, load2_stmt
);
7210 std::swap (load1_stmt_info
, load2_stmt_info
);
7212 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7213 gimple_assign_rhs1 (load1_stmt
)))
7216 tree var3
= NULL_TREE
;
7217 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
7218 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
7219 gimple_assign_rhs1 (load2_stmt
)))
7221 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7223 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7224 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
7225 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
7227 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7228 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
7229 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
7230 || lookup_attribute ("omp simd inscan exclusive",
7231 DECL_ATTRIBUTES (var3
)))
7235 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
7236 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
7237 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
7240 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7241 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
7242 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
7243 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
7244 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7245 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
7248 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7249 std::swap (var1
, var2
);
7251 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7253 if (!lookup_attribute ("omp simd inscan exclusive",
7254 DECL_ATTRIBUTES (var1
)))
7259 if (loop_vinfo
->scan_map
== NULL
)
7261 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7265 /* The IL is as expected, now check if we can actually vectorize it.
7272 should be vectorized as (where _40 is the vectorized rhs
7273 from the D.2042[_21] = 0; store):
7274 _30 = MEM <vector(8) int> [(int *)&D.2043];
7275 _31 = MEM <vector(8) int> [(int *)&D.2042];
7276 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7278 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7279 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7281 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7282 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7283 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7285 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7286 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7288 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7289 MEM <vector(8) int> [(int *)&D.2043] = _39;
7290 MEM <vector(8) int> [(int *)&D.2042] = _38;
7297 should be vectorized as (where _40 is the vectorized rhs
7298 from the D.2042[_21] = 0; store):
7299 _30 = MEM <vector(8) int> [(int *)&D.2043];
7300 _31 = MEM <vector(8) int> [(int *)&D.2042];
7301 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7302 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7304 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7305 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7306 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7308 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7309 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7310 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7312 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7313 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7316 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7317 MEM <vector(8) int> [(int *)&D.2044] = _39;
7318 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7319 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7320 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7321 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7324 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7325 if (units_log2
== -1)
7332 /* Function vectorizable_scan_store.
7334 Helper of vectorizable_score, arguments like on vectorizable_store.
7335 Handle only the transformation, checking is done in check_scan_store. */
7338 vectorizable_scan_store (vec_info
*vinfo
,
7339 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7340 gimple
**vec_stmt
, int ncopies
)
7342 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7343 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7344 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7345 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7347 if (dump_enabled_p ())
7348 dump_printf_loc (MSG_NOTE
, vect_location
,
7349 "transform scan store. ncopies = %d\n", ncopies
);
7351 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7352 tree rhs
= gimple_assign_rhs1 (stmt
);
7353 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7355 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7356 bool inscan_var_store
7357 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7359 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7361 use_operand_p use_p
;
7362 imm_use_iterator iter
;
7363 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7365 gimple
*use_stmt
= USE_STMT (use_p
);
7366 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7368 rhs
= gimple_assign_lhs (use_stmt
);
7373 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7374 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7375 if (code
== POINTER_PLUS_EXPR
)
7377 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7378 && commutative_tree_code (code
));
7379 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7380 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7381 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7382 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7383 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7384 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7385 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7386 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7387 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7388 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7389 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7391 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7393 std::swap (rhs1
, rhs2
);
7394 std::swap (var1
, var2
);
7395 std::swap (load1_dr_info
, load2_dr_info
);
7398 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7401 unsigned HOST_WIDE_INT nunits
;
7402 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7404 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7405 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7406 gcc_assert (units_log2
> 0);
7407 auto_vec
<tree
, 16> perms
;
7408 perms
.quick_grow (units_log2
+ 1);
7409 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7410 for (int i
= 0; i
<= units_log2
; ++i
)
7412 unsigned HOST_WIDE_INT j
, k
;
7413 vec_perm_builder
sel (nunits
, nunits
, 1);
7414 sel
.quick_grow (nunits
);
7415 if (i
== units_log2
)
7416 for (j
= 0; j
< nunits
; ++j
)
7417 sel
[j
] = nunits
- 1;
7420 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7422 for (k
= 0; j
< nunits
; ++j
, ++k
)
7423 sel
[j
] = nunits
+ k
;
7425 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7426 if (!use_whole_vector
.is_empty ()
7427 && use_whole_vector
[i
] != scan_store_kind_perm
)
7429 if (zero_vec
== NULL_TREE
)
7430 zero_vec
= build_zero_cst (vectype
);
7431 if (masktype
== NULL_TREE
7432 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7433 masktype
= truth_type_for (vectype
);
7434 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7437 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7440 tree vec_oprnd1
= NULL_TREE
;
7441 tree vec_oprnd2
= NULL_TREE
;
7442 tree vec_oprnd3
= NULL_TREE
;
7443 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7444 tree dataref_offset
= build_int_cst (ref_type
, 0);
7445 tree bump
= vect_get_data_ptr_increment (vinfo
, dr_info
,
7446 vectype
, VMAT_CONTIGUOUS
);
7447 tree ldataref_ptr
= NULL_TREE
;
7448 tree orig
= NULL_TREE
;
7449 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7450 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7451 auto_vec
<tree
> vec_oprnds1
;
7452 auto_vec
<tree
> vec_oprnds2
;
7453 auto_vec
<tree
> vec_oprnds3
;
7454 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
7455 *init
, &vec_oprnds1
,
7456 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
7457 rhs2
, &vec_oprnds3
);
7458 for (int j
= 0; j
< ncopies
; j
++)
7460 vec_oprnd1
= vec_oprnds1
[j
];
7461 if (ldataref_ptr
== NULL
)
7462 vec_oprnd2
= vec_oprnds2
[j
];
7463 vec_oprnd3
= vec_oprnds3
[j
];
7466 else if (!inscan_var_store
)
7467 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7471 vec_oprnd2
= make_ssa_name (vectype
);
7472 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7473 unshare_expr (ldataref_ptr
),
7475 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7476 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7477 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7478 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7479 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7482 tree v
= vec_oprnd2
;
7483 for (int i
= 0; i
< units_log2
; ++i
)
7485 tree new_temp
= make_ssa_name (vectype
);
7486 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7488 && (use_whole_vector
[i
]
7489 != scan_store_kind_perm
))
7490 ? zero_vec
: vec_oprnd1
, v
,
7492 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7493 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7494 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7496 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7498 /* Whole vector shift shifted in zero bits, but if *init
7499 is not initializer_zerop, we need to replace those elements
7500 with elements from vec_oprnd1. */
7501 tree_vector_builder
vb (masktype
, nunits
, 1);
7502 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7503 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7504 ? boolean_false_node
: boolean_true_node
);
7506 tree new_temp2
= make_ssa_name (vectype
);
7507 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7508 new_temp
, vec_oprnd1
);
7509 vect_finish_stmt_generation (vinfo
, stmt_info
,
7511 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7512 new_temp
= new_temp2
;
7515 /* For exclusive scan, perform the perms[i] permutation once
7518 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7526 tree new_temp2
= make_ssa_name (vectype
);
7527 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7528 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7529 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7534 tree new_temp
= make_ssa_name (vectype
);
7535 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7536 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7537 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7539 tree last_perm_arg
= new_temp
;
7540 /* For exclusive scan, new_temp computed above is the exclusive scan
7541 prefix sum. Turn it into inclusive prefix sum for the broadcast
7542 of the last element into orig. */
7543 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7545 last_perm_arg
= make_ssa_name (vectype
);
7546 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7547 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7548 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7551 orig
= make_ssa_name (vectype
);
7552 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7553 last_perm_arg
, perms
[units_log2
]);
7554 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7555 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7557 if (!inscan_var_store
)
7559 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7560 unshare_expr (dataref_ptr
),
7562 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7563 g
= gimple_build_assign (data_ref
, new_temp
);
7564 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7565 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7569 if (inscan_var_store
)
7570 for (int j
= 0; j
< ncopies
; j
++)
7573 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7575 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7576 unshare_expr (dataref_ptr
),
7578 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7579 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7580 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7581 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7587 /* Function vectorizable_store.
7589 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7590 that can be vectorized.
7591 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7592 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7593 Return true if STMT_INFO is vectorizable in this way. */
7596 vectorizable_store (vec_info
*vinfo
,
7597 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7598 gimple
**vec_stmt
, slp_tree slp_node
,
7599 stmt_vector_for_cost
*cost_vec
)
7603 tree vec_oprnd
= NULL_TREE
;
7605 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7606 class loop
*loop
= NULL
;
7607 machine_mode vec_mode
;
7609 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7610 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7611 tree dataref_ptr
= NULL_TREE
;
7612 tree dataref_offset
= NULL_TREE
;
7613 gimple
*ptr_incr
= NULL
;
7616 stmt_vec_info first_stmt_info
;
7618 unsigned int group_size
, i
;
7619 vec
<tree
> oprnds
= vNULL
;
7620 vec
<tree
> result_chain
= vNULL
;
7621 vec
<tree
> vec_oprnds
= vNULL
;
7622 bool slp
= (slp_node
!= NULL
);
7623 unsigned int vec_num
;
7624 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
7626 gather_scatter_info gs_info
;
7628 vec_load_store_type vls_type
;
7631 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7634 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7638 /* Is vectorizable store? */
7640 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7641 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7643 tree scalar_dest
= gimple_assign_lhs (assign
);
7644 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7645 && is_pattern_stmt_p (stmt_info
))
7646 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7647 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7648 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7649 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7650 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7651 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7652 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7653 && TREE_CODE (scalar_dest
) != MEM_REF
)
7658 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7659 if (!call
|| !gimple_call_internal_p (call
))
7662 internal_fn ifn
= gimple_call_internal_fn (call
);
7663 if (!internal_store_fn_p (ifn
))
7666 if (slp_node
!= NULL
)
7668 if (dump_enabled_p ())
7669 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7670 "SLP of masked stores not supported.\n");
7674 int mask_index
= internal_fn_mask_index (ifn
);
7676 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
7677 &mask
, NULL
, &mask_dt
, &mask_vectype
))
7681 op
= vect_get_store_rhs (stmt_info
);
7683 /* Cannot have hybrid store SLP -- that would mean storing to the
7684 same location twice. */
7685 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7687 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7688 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7692 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7693 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7698 /* Multiple types in SLP are handled by creating the appropriate number of
7699 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7704 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7706 gcc_assert (ncopies
>= 1);
7708 /* FORNOW. This restriction should be relaxed. */
7709 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7711 if (dump_enabled_p ())
7712 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7713 "multiple types in nested loop.\n");
7717 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
7718 op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7721 elem_type
= TREE_TYPE (vectype
);
7722 vec_mode
= TYPE_MODE (vectype
);
7724 if (!STMT_VINFO_DATA_REF (stmt_info
))
7727 vect_memory_access_type memory_access_type
;
7728 enum dr_alignment_support alignment_support_scheme
;
7731 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, vls_type
,
7732 ncopies
, &memory_access_type
, &poffset
,
7733 &alignment_support_scheme
, &misalignment
, &gs_info
))
7738 if (memory_access_type
== VMAT_CONTIGUOUS
)
7740 if (!VECTOR_MODE_P (vec_mode
)
7741 || !can_vec_mask_load_store_p (vec_mode
,
7742 TYPE_MODE (mask_vectype
), false))
7745 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7746 && (memory_access_type
!= VMAT_GATHER_SCATTER
7747 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7749 if (dump_enabled_p ())
7750 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7751 "unsupported access type for masked store.\n");
7757 /* FORNOW. In some cases can vectorize even if data-type not supported
7758 (e.g. - array initialization with 0). */
7759 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7763 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7764 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7765 && memory_access_type
!= VMAT_GATHER_SCATTER
7766 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7769 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7770 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7771 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7775 first_stmt_info
= stmt_info
;
7776 first_dr_info
= dr_info
;
7777 group_size
= vec_num
= 1;
7780 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7782 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7783 memory_access_type
))
7787 if (!vec_stmt
) /* transformation not required. */
7789 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7792 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
7793 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
7794 vls_type
, group_size
,
7795 memory_access_type
, &gs_info
,
7799 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
7802 if (dump_enabled_p ())
7803 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7804 "incompatible vector types for invariants\n");
7808 if (dump_enabled_p ()
7809 && memory_access_type
!= VMAT_ELEMENTWISE
7810 && memory_access_type
!= VMAT_GATHER_SCATTER
7811 && alignment_support_scheme
!= dr_aligned
)
7812 dump_printf_loc (MSG_NOTE
, vect_location
,
7813 "Vectorizing an unaligned access.\n");
7815 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7816 vect_model_store_cost (vinfo
, stmt_info
, ncopies
,
7817 memory_access_type
, alignment_support_scheme
,
7818 misalignment
, vls_type
, slp_node
, cost_vec
);
7821 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7825 ensure_base_align (dr_info
);
7827 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7829 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7830 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7831 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7832 tree ptr
, var
, scale
, vec_mask
;
7833 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7834 tree mask_halfvectype
= mask_vectype
;
7835 edge pe
= loop_preheader_edge (loop
);
7838 enum { NARROW
, NONE
, WIDEN
} modifier
;
7839 poly_uint64 scatter_off_nunits
7840 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7842 if (known_eq (nunits
, scatter_off_nunits
))
7844 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7848 /* Currently gathers and scatters are only supported for
7849 fixed-length vectors. */
7850 unsigned int count
= scatter_off_nunits
.to_constant ();
7851 vec_perm_builder
sel (count
, count
, 1);
7852 for (i
= 0; i
< (unsigned int) count
; ++i
)
7853 sel
.quick_push (i
| (count
/ 2));
7855 vec_perm_indices
indices (sel
, 1, count
);
7856 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7858 gcc_assert (perm_mask
!= NULL_TREE
);
7860 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7864 /* Currently gathers and scatters are only supported for
7865 fixed-length vectors. */
7866 unsigned int count
= nunits
.to_constant ();
7867 vec_perm_builder
sel (count
, count
, 1);
7868 for (i
= 0; i
< (unsigned int) count
; ++i
)
7869 sel
.quick_push (i
| (count
/ 2));
7871 vec_perm_indices
indices (sel
, 2, count
);
7872 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7873 gcc_assert (perm_mask
!= NULL_TREE
);
7877 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7882 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7883 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7884 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7885 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7886 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7887 scaletype
= TREE_VALUE (arglist
);
7889 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7890 && TREE_CODE (rettype
) == VOID_TYPE
);
7892 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7893 if (!is_gimple_min_invariant (ptr
))
7895 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7896 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7897 gcc_assert (!new_bb
);
7900 if (mask
== NULL_TREE
)
7902 mask_arg
= build_int_cst (masktype
, -1);
7903 mask_arg
= vect_init_vector (vinfo
, stmt_info
,
7904 mask_arg
, masktype
, NULL
);
7907 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7909 auto_vec
<tree
> vec_oprnds0
;
7910 auto_vec
<tree
> vec_oprnds1
;
7911 auto_vec
<tree
> vec_masks
;
7914 tree mask_vectype
= truth_type_for (vectype
);
7915 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7917 ? ncopies
/ 2 : ncopies
,
7918 mask
, &vec_masks
, mask_vectype
);
7920 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7922 ? ncopies
/ 2 : ncopies
,
7923 gs_info
.offset
, &vec_oprnds0
);
7924 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7926 ? ncopies
/ 2 : ncopies
,
7928 for (j
= 0; j
< ncopies
; ++j
)
7930 if (modifier
== WIDEN
)
7933 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
7934 perm_mask
, stmt_info
, gsi
);
7936 op
= vec_oprnd0
= vec_oprnds0
[j
/ 2];
7937 src
= vec_oprnd1
= vec_oprnds1
[j
];
7939 mask_op
= vec_mask
= vec_masks
[j
];
7941 else if (modifier
== NARROW
)
7944 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
7945 perm_mask
, stmt_info
, gsi
);
7947 src
= vec_oprnd1
= vec_oprnds1
[j
/ 2];
7948 op
= vec_oprnd0
= vec_oprnds0
[j
];
7950 mask_op
= vec_mask
= vec_masks
[j
/ 2];
7954 op
= vec_oprnd0
= vec_oprnds0
[j
];
7955 src
= vec_oprnd1
= vec_oprnds1
[j
];
7957 mask_op
= vec_mask
= vec_masks
[j
];
7960 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7962 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7963 TYPE_VECTOR_SUBPARTS (srctype
)));
7964 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7965 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7967 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7968 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7972 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7974 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7975 TYPE_VECTOR_SUBPARTS (idxtype
)));
7976 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7977 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7979 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7980 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7988 if (modifier
== NARROW
)
7990 var
= vect_get_new_ssa_name (mask_halfvectype
,
7993 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7994 : VEC_UNPACK_LO_EXPR
,
7996 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7999 tree optype
= TREE_TYPE (mask_arg
);
8000 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
8003 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
8004 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
8005 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
8007 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
8008 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8010 if (!useless_type_conversion_p (masktype
, utype
))
8012 gcc_assert (TYPE_PRECISION (utype
)
8013 <= TYPE_PRECISION (masktype
));
8014 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
8015 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
8016 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8022 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
8023 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8025 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8027 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
8030 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
8031 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
8033 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8034 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
8039 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
8041 /* We vectorize all the stmts of the interleaving group when we
8042 reach the last stmt in the group. */
8043 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
8044 < DR_GROUP_SIZE (first_stmt_info
)
8053 grouped_store
= false;
8054 /* VEC_NUM is the number of vect stmts to be created for this
8056 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8057 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8058 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
8059 == first_stmt_info
);
8060 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8061 op
= vect_get_store_rhs (first_stmt_info
);
8064 /* VEC_NUM is the number of vect stmts to be created for this
8066 vec_num
= group_size
;
8068 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8071 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
8073 if (dump_enabled_p ())
8074 dump_printf_loc (MSG_NOTE
, vect_location
,
8075 "transform store. ncopies = %d\n", ncopies
);
8077 if (memory_access_type
== VMAT_ELEMENTWISE
8078 || memory_access_type
== VMAT_STRIDED_SLP
)
8080 gimple_stmt_iterator incr_gsi
;
8086 tree stride_base
, stride_step
, alias_off
;
8090 /* Checked by get_load_store_type. */
8091 unsigned int const_nunits
= nunits
.to_constant ();
8093 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
8094 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
8096 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
8098 = fold_build_pointer_plus
8099 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8100 size_binop (PLUS_EXPR
,
8101 convert_to_ptrofftype (dr_offset
),
8102 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8103 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8105 /* For a store with loop-invariant (but other than power-of-2)
8106 stride (i.e. not a grouped access) like so:
8108 for (i = 0; i < n; i += stride)
8111 we generate a new induction variable and new stores from
8112 the components of the (vectorized) rhs:
8114 for (j = 0; ; j += VF*stride)
8119 array[j + stride] = tmp2;
8123 unsigned nstores
= const_nunits
;
8125 tree ltype
= elem_type
;
8126 tree lvectype
= vectype
;
8129 if (group_size
< const_nunits
8130 && const_nunits
% group_size
== 0)
8132 nstores
= const_nunits
/ group_size
;
8134 ltype
= build_vector_type (elem_type
, group_size
);
8137 /* First check if vec_extract optab doesn't support extraction
8138 of vector elts directly. */
8139 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
8141 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
8142 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
8143 group_size
).exists (&vmode
)
8144 || (convert_optab_handler (vec_extract_optab
,
8145 TYPE_MODE (vectype
), vmode
)
8146 == CODE_FOR_nothing
))
8148 /* Try to avoid emitting an extract of vector elements
8149 by performing the extracts using an integer type of the
8150 same size, extracting from a vector of those and then
8151 re-interpreting it as the original vector type if
8154 = group_size
* GET_MODE_BITSIZE (elmode
);
8155 unsigned int lnunits
= const_nunits
/ group_size
;
8156 /* If we can't construct such a vector fall back to
8157 element extracts from the original vector type and
8158 element size stores. */
8159 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
8160 && VECTOR_MODE_P (TYPE_MODE (vectype
))
8161 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
8162 lnunits
).exists (&vmode
)
8163 && (convert_optab_handler (vec_extract_optab
,
8165 != CODE_FOR_nothing
))
8169 ltype
= build_nonstandard_integer_type (lsize
, 1);
8170 lvectype
= build_vector_type (ltype
, nstores
);
8172 /* Else fall back to vector extraction anyway.
8173 Fewer stores are more important than avoiding spilling
8174 of the vector we extract from. Compared to the
8175 construction case in vectorizable_load no store-forwarding
8176 issue exists here for reasonable archs. */
8179 else if (group_size
>= const_nunits
8180 && group_size
% const_nunits
== 0)
8183 lnel
= const_nunits
;
8187 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
8188 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8191 ivstep
= stride_step
;
8192 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
8193 build_int_cst (TREE_TYPE (ivstep
), vf
));
8195 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8197 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8198 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8199 create_iv (stride_base
, ivstep
, NULL
,
8200 loop
, &incr_gsi
, insert_after
,
8202 incr
= gsi_stmt (incr_gsi
);
8204 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8206 alias_off
= build_int_cst (ref_type
, 0);
8207 stmt_vec_info next_stmt_info
= first_stmt_info
;
8208 for (g
= 0; g
< group_size
; g
++)
8210 running_off
= offvar
;
8213 tree size
= TYPE_SIZE_UNIT (ltype
);
8214 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
8216 tree newoff
= copy_ssa_name (running_off
, NULL
);
8217 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8219 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8220 running_off
= newoff
;
8223 op
= vect_get_store_rhs (next_stmt_info
);
8224 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
,
8226 unsigned int group_el
= 0;
8227 unsigned HOST_WIDE_INT
8228 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8229 for (j
= 0; j
< ncopies
; j
++)
8231 vec_oprnd
= vec_oprnds
[j
];
8232 /* Pun the vector to extract from if necessary. */
8233 if (lvectype
!= vectype
)
8235 tree tem
= make_ssa_name (lvectype
);
8237 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
8238 lvectype
, vec_oprnd
));
8239 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
8242 for (i
= 0; i
< nstores
; i
++)
8244 tree newref
, newoff
;
8245 gimple
*incr
, *assign
;
8246 tree size
= TYPE_SIZE (ltype
);
8247 /* Extract the i'th component. */
8248 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
8249 bitsize_int (i
), size
);
8250 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
8253 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
8257 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8259 newref
= build2 (MEM_REF
, ltype
,
8260 running_off
, this_off
);
8261 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
8263 /* And store it to *running_off. */
8264 assign
= gimple_build_assign (newref
, elem
);
8265 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
8269 || group_el
== group_size
)
8271 newoff
= copy_ssa_name (running_off
, NULL
);
8272 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8273 running_off
, stride_step
);
8274 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8276 running_off
= newoff
;
8279 if (g
== group_size
- 1
8282 if (j
== 0 && i
== 0)
8284 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
8288 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8289 vec_oprnds
.release ();
8297 auto_vec
<tree
> dr_chain (group_size
);
8298 oprnds
.create (group_size
);
8300 gcc_assert (alignment_support_scheme
);
8301 vec_loop_masks
*loop_masks
8302 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8303 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8305 vec_loop_lens
*loop_lens
8306 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
8307 ? &LOOP_VINFO_LENS (loop_vinfo
)
8310 /* Shouldn't go with length-based approach if fully masked. */
8311 gcc_assert (!loop_lens
|| !loop_masks
);
8313 /* Targets with store-lane instructions must not require explicit
8314 realignment. vect_supportable_dr_alignment always returns either
8315 dr_aligned or dr_unaligned_supported for masked operations. */
8316 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8319 || alignment_support_scheme
== dr_aligned
8320 || alignment_support_scheme
== dr_unaligned_supported
);
8322 tree offset
= NULL_TREE
;
8323 if (!known_eq (poffset
, 0))
8324 offset
= size_int (poffset
);
8327 tree vec_offset
= NULL_TREE
;
8328 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8330 aggr_type
= NULL_TREE
;
8333 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8335 aggr_type
= elem_type
;
8336 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8337 &bump
, &vec_offset
);
8341 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8342 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8344 aggr_type
= vectype
;
8345 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
8346 memory_access_type
);
8350 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8352 /* In case the vectorization factor (VF) is bigger than the number
8353 of elements that we can fit in a vectype (nunits), we have to generate
8354 more than one vector stmt - i.e - we need to "unroll" the
8355 vector stmt by a factor VF/nunits. */
8357 /* In case of interleaving (non-unit grouped access):
8364 We create vectorized stores starting from base address (the access of the
8365 first stmt in the chain (S2 in the above example), when the last store stmt
8366 of the chain (S4) is reached:
8369 VS2: &base + vec_size*1 = vx0
8370 VS3: &base + vec_size*2 = vx1
8371 VS4: &base + vec_size*3 = vx3
8373 Then permutation statements are generated:
8375 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8376 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8379 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8380 (the order of the data-refs in the output of vect_permute_store_chain
8381 corresponds to the order of scalar stmts in the interleaving chain - see
8382 the documentation of vect_permute_store_chain()).
8384 In case of both multiple types and interleaving, above vector stores and
8385 permutation stmts are created for every copy. The result vector stmts are
8386 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8387 STMT_VINFO_RELATED_STMT for the next copies.
8390 auto_vec
<tree
> vec_masks
;
8391 tree vec_mask
= NULL
;
8392 auto_vec
<tree
> vec_offsets
;
8393 auto_vec
<vec
<tree
> > gvec_oprnds
;
8394 gvec_oprnds
.safe_grow_cleared (group_size
, true);
8395 for (j
= 0; j
< ncopies
; j
++)
8402 /* Get vectorized arguments for SLP_NODE. */
8403 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1,
8405 vec_oprnd
= vec_oprnds
[0];
8409 /* For interleaved stores we collect vectorized defs for all the
8410 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8411 used as an input to vect_permute_store_chain().
8413 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8414 and OPRNDS are of size 1. */
8415 stmt_vec_info next_stmt_info
= first_stmt_info
;
8416 for (i
= 0; i
< group_size
; i
++)
8418 /* Since gaps are not supported for interleaved stores,
8419 DR_GROUP_SIZE is the exact number of stmts in the chain.
8420 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8421 that there is no interleaving, DR_GROUP_SIZE is 1,
8422 and only one iteration of the loop will be executed. */
8423 op
= vect_get_store_rhs (next_stmt_info
);
8424 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
8425 ncopies
, op
, &gvec_oprnds
[i
]);
8426 vec_oprnd
= gvec_oprnds
[i
][0];
8427 dr_chain
.quick_push (gvec_oprnds
[i
][0]);
8428 oprnds
.quick_push (gvec_oprnds
[i
][0]);
8429 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8433 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
8434 mask
, &vec_masks
, mask_vectype
);
8435 vec_mask
= vec_masks
[0];
8439 /* We should have catched mismatched types earlier. */
8440 gcc_assert (useless_type_conversion_p (vectype
,
8441 TREE_TYPE (vec_oprnd
)));
8442 bool simd_lane_access_p
8443 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8444 if (simd_lane_access_p
8446 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8447 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8448 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
8449 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8450 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8451 get_alias_set (TREE_TYPE (ref_type
))))
8453 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8454 dataref_offset
= build_int_cst (ref_type
, 0);
8456 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8458 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
8459 slp_node
, &gs_info
, &dataref_ptr
,
8461 vec_offset
= vec_offsets
[0];
8465 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
8466 simd_lane_access_p
? loop
: NULL
,
8467 offset
, &dummy
, gsi
, &ptr_incr
,
8468 simd_lane_access_p
, bump
);
8472 /* For interleaved stores we created vectorized defs for all the
8473 defs stored in OPRNDS in the previous iteration (previous copy).
8474 DR_CHAIN is then used as an input to vect_permute_store_chain().
8475 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8476 OPRNDS are of size 1. */
8477 for (i
= 0; i
< group_size
; i
++)
8479 vec_oprnd
= gvec_oprnds
[i
][j
];
8480 dr_chain
[i
] = gvec_oprnds
[i
][j
];
8481 oprnds
[i
] = gvec_oprnds
[i
][j
];
8484 vec_mask
= vec_masks
[j
];
8487 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8488 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8489 vec_offset
= vec_offsets
[j
];
8491 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8495 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8499 /* Get an array into which we can store the individual vectors. */
8500 vec_array
= create_vector_array (vectype
, vec_num
);
8502 /* Invalidate the current contents of VEC_ARRAY. This should
8503 become an RTL clobber too, which prevents the vector registers
8504 from being upward-exposed. */
8505 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8507 /* Store the individual vectors into the array. */
8508 for (i
= 0; i
< vec_num
; i
++)
8510 vec_oprnd
= dr_chain
[i
];
8511 write_vector_array (vinfo
, stmt_info
,
8512 gsi
, vec_oprnd
, vec_array
, i
);
8515 tree final_mask
= NULL
;
8517 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8520 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
8521 final_mask
, vec_mask
, gsi
);
8527 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8529 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8530 tree alias_ptr
= build_int_cst (ref_type
, align
);
8531 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8532 dataref_ptr
, alias_ptr
,
8533 final_mask
, vec_array
);
8538 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8539 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8540 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8542 gimple_call_set_lhs (call
, data_ref
);
8544 gimple_call_set_nothrow (call
, true);
8545 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8548 /* Record that VEC_ARRAY is now dead. */
8549 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8557 result_chain
.create (group_size
);
8559 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
8560 gsi
, &result_chain
);
8563 stmt_vec_info next_stmt_info
= first_stmt_info
;
8564 for (i
= 0; i
< vec_num
; i
++)
8567 unsigned HOST_WIDE_INT align
;
8569 tree final_mask
= NULL_TREE
;
8571 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8573 vectype
, vec_num
* j
+ i
);
8575 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
8576 final_mask
, vec_mask
, gsi
);
8578 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8580 tree scale
= size_int (gs_info
.scale
);
8583 call
= gimple_build_call_internal
8584 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8585 scale
, vec_oprnd
, final_mask
);
8587 call
= gimple_build_call_internal
8588 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8590 gimple_call_set_nothrow (call
, true);
8591 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8597 /* Bump the vector pointer. */
8598 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
8599 gsi
, stmt_info
, bump
);
8602 vec_oprnd
= vec_oprnds
[i
];
8603 else if (grouped_store
)
8604 /* For grouped stores vectorized defs are interleaved in
8605 vect_permute_store_chain(). */
8606 vec_oprnd
= result_chain
[i
];
8608 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8609 if (alignment_support_scheme
== dr_aligned
)
8611 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
8613 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
8617 misalign
= misalignment
;
8618 if (dataref_offset
== NULL_TREE
8619 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8620 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8622 align
= least_bit_hwi (misalign
| align
);
8624 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8626 tree perm_mask
= perm_mask_for_reverse (vectype
);
8627 tree perm_dest
= vect_create_destination_var
8628 (vect_get_store_rhs (stmt_info
), vectype
);
8629 tree new_temp
= make_ssa_name (perm_dest
);
8631 /* Generate the permute statement. */
8633 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8634 vec_oprnd
, perm_mask
);
8635 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8637 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8638 vec_oprnd
= new_temp
;
8641 /* Arguments are ready. Create the new vector stmt. */
8644 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8646 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8648 final_mask
, vec_oprnd
);
8649 gimple_call_set_nothrow (call
, true);
8650 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8656 = vect_get_loop_len (loop_vinfo
, loop_lens
,
8657 vec_num
* ncopies
, vec_num
* j
+ i
);
8658 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8659 machine_mode vmode
= TYPE_MODE (vectype
);
8660 opt_machine_mode new_ovmode
8661 = get_len_load_store_mode (vmode
, false);
8662 machine_mode new_vmode
= new_ovmode
.require ();
8663 /* Need conversion if it's wrapped with VnQI. */
8664 if (vmode
!= new_vmode
)
8667 = build_vector_type_for_mode (unsigned_intQI_type_node
,
8670 = vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
8672 = build1 (VIEW_CONVERT_EXPR
, new_vtype
, vec_oprnd
);
8674 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
,
8676 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
8681 signed char biasval
=
8682 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
8684 tree bias
= build_int_cst (intQI_type_node
, biasval
);
8686 = gimple_build_call_internal (IFN_LEN_STORE
, 5, dataref_ptr
,
8687 ptr
, final_len
, vec_oprnd
,
8689 gimple_call_set_nothrow (call
, true);
8690 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8695 data_ref
= fold_build2 (MEM_REF
, vectype
,
8699 : build_int_cst (ref_type
, 0));
8700 if (alignment_support_scheme
== dr_aligned
)
8703 TREE_TYPE (data_ref
)
8704 = build_aligned_type (TREE_TYPE (data_ref
),
8705 align
* BITS_PER_UNIT
);
8706 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8707 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
8708 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8714 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8715 if (!next_stmt_info
)
8722 *vec_stmt
= new_stmt
;
8723 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8727 for (i
= 0; i
< group_size
; ++i
)
8729 vec
<tree
> oprndsi
= gvec_oprnds
[i
];
8733 result_chain
.release ();
8734 vec_oprnds
.release ();
8739 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8740 VECTOR_CST mask. No checks are made that the target platform supports the
8741 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8742 vect_gen_perm_mask_checked. */
8745 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8749 poly_uint64 nunits
= sel
.length ();
8750 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8752 mask_type
= build_vector_type (ssizetype
, nunits
);
8753 return vec_perm_indices_to_tree (mask_type
, sel
);
8756 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8757 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8760 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8762 machine_mode vmode
= TYPE_MODE (vectype
);
8763 gcc_assert (can_vec_perm_const_p (vmode
, vmode
, sel
));
8764 return vect_gen_perm_mask_any (vectype
, sel
);
8767 /* Given a vector variable X and Y, that was generated for the scalar
8768 STMT_INFO, generate instructions to permute the vector elements of X and Y
8769 using permutation mask MASK_VEC, insert them at *GSI and return the
8770 permuted vector variable. */
8773 permute_vec_elements (vec_info
*vinfo
,
8774 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8775 gimple_stmt_iterator
*gsi
)
8777 tree vectype
= TREE_TYPE (x
);
8778 tree perm_dest
, data_ref
;
8781 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8782 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8783 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8785 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8786 data_ref
= make_ssa_name (perm_dest
);
8788 /* Generate the permute statement. */
8789 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8790 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8795 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8796 inserting them on the loops preheader edge. Returns true if we
8797 were successful in doing so (and thus STMT_INFO can be moved then),
8798 otherwise returns false. */
8801 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8807 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8809 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8810 if (!gimple_nop_p (def_stmt
)
8811 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8813 /* Make sure we don't need to recurse. While we could do
8814 so in simple cases when there are more complex use webs
8815 we don't have an easy way to preserve stmt order to fulfil
8816 dependencies within them. */
8819 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8821 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8823 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8824 if (!gimple_nop_p (def_stmt2
)
8825 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8835 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8837 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8838 if (!gimple_nop_p (def_stmt
)
8839 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8841 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8842 gsi_remove (&gsi
, false);
8843 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8850 /* vectorizable_load.
8852 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8853 that can be vectorized.
8854 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8855 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8856 Return true if STMT_INFO is vectorizable in this way. */
8859 vectorizable_load (vec_info
*vinfo
,
8860 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8861 gimple
**vec_stmt
, slp_tree slp_node
,
8862 stmt_vector_for_cost
*cost_vec
)
8865 tree vec_dest
= NULL
;
8866 tree data_ref
= NULL
;
8867 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8868 class loop
*loop
= NULL
;
8869 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8870 bool nested_in_vect_loop
= false;
8875 tree dataref_ptr
= NULL_TREE
;
8876 tree dataref_offset
= NULL_TREE
;
8877 gimple
*ptr_incr
= NULL
;
8880 unsigned int group_size
;
8881 poly_uint64 group_gap_adj
;
8882 tree msq
= NULL_TREE
, lsq
;
8883 tree realignment_token
= NULL_TREE
;
8885 vec
<tree
> dr_chain
= vNULL
;
8886 bool grouped_load
= false;
8887 stmt_vec_info first_stmt_info
;
8888 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8889 bool compute_in_loop
= false;
8890 class loop
*at_loop
;
8892 bool slp
= (slp_node
!= NULL
);
8893 bool slp_perm
= false;
8894 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8897 gather_scatter_info gs_info
;
8899 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8901 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8904 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8908 if (!STMT_VINFO_DATA_REF (stmt_info
))
8911 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8912 int mask_index
= -1;
8913 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8915 scalar_dest
= gimple_assign_lhs (assign
);
8916 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8919 tree_code code
= gimple_assign_rhs_code (assign
);
8920 if (code
!= ARRAY_REF
8921 && code
!= BIT_FIELD_REF
8922 && code
!= INDIRECT_REF
8923 && code
!= COMPONENT_REF
8924 && code
!= IMAGPART_EXPR
8925 && code
!= REALPART_EXPR
8927 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8932 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8933 if (!call
|| !gimple_call_internal_p (call
))
8936 internal_fn ifn
= gimple_call_internal_fn (call
);
8937 if (!internal_load_fn_p (ifn
))
8940 scalar_dest
= gimple_call_lhs (call
);
8944 mask_index
= internal_fn_mask_index (ifn
);
8945 /* ??? For SLP the mask operand is always last. */
8946 if (mask_index
>= 0 && slp_node
)
8947 mask_index
= SLP_TREE_CHILDREN (slp_node
).length () - 1;
8949 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
8950 &mask
, NULL
, &mask_dt
, &mask_vectype
))
8954 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8955 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8959 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8960 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8961 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8966 /* Multiple types in SLP are handled by creating the appropriate number of
8967 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8972 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8974 gcc_assert (ncopies
>= 1);
8976 /* FORNOW. This restriction should be relaxed. */
8977 if (nested_in_vect_loop
&& ncopies
> 1)
8979 if (dump_enabled_p ())
8980 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8981 "multiple types in nested loop.\n");
8985 /* Invalidate assumptions made by dependence analysis when vectorization
8986 on the unrolled body effectively re-orders stmts. */
8988 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8989 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8990 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8992 if (dump_enabled_p ())
8993 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8994 "cannot perform implicit CSE when unrolling "
8995 "with negative dependence distance\n");
8999 elem_type
= TREE_TYPE (vectype
);
9000 mode
= TYPE_MODE (vectype
);
9002 /* FORNOW. In some cases can vectorize even if data-type not supported
9003 (e.g. - data copies). */
9004 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
9006 if (dump_enabled_p ())
9007 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9008 "Aligned load, but unsupported type.\n");
9012 /* Check if the load is a part of an interleaving chain. */
9013 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
9015 grouped_load
= true;
9017 gcc_assert (!nested_in_vect_loop
);
9018 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
9020 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9021 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9023 /* Refuse non-SLP vectorization of SLP-only groups. */
9024 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
9026 if (dump_enabled_p ())
9027 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9028 "cannot vectorize load in non-SLP mode.\n");
9032 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9038 /* In BB vectorization we may not actually use a loaded vector
9039 accessing elements in excess of DR_GROUP_SIZE. */
9040 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9041 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
9042 unsigned HOST_WIDE_INT nunits
;
9043 unsigned j
, k
, maxk
= 0;
9044 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
9047 tree vectype
= SLP_TREE_VECTYPE (slp_node
);
9048 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
9049 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
9051 if (dump_enabled_p ())
9052 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9053 "BB vectorization with gaps at the end of "
9054 "a load is not supported\n");
9061 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
9064 if (dump_enabled_p ())
9065 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
9067 "unsupported load permutation\n");
9072 /* Invalidate assumptions made by dependence analysis when vectorization
9073 on the unrolled body effectively re-orders stmts. */
9074 if (!PURE_SLP_STMT (stmt_info
)
9075 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
9076 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
9077 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
9079 if (dump_enabled_p ())
9080 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9081 "cannot perform implicit CSE when performing "
9082 "group loads with negative dependence distance\n");
9089 vect_memory_access_type memory_access_type
;
9090 enum dr_alignment_support alignment_support_scheme
;
9093 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, VLS_LOAD
,
9094 ncopies
, &memory_access_type
, &poffset
,
9095 &alignment_support_scheme
, &misalignment
, &gs_info
))
9100 if (memory_access_type
== VMAT_CONTIGUOUS
)
9102 machine_mode vec_mode
= TYPE_MODE (vectype
);
9103 if (!VECTOR_MODE_P (vec_mode
)
9104 || !can_vec_mask_load_store_p (vec_mode
,
9105 TYPE_MODE (mask_vectype
), true))
9108 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
9109 && memory_access_type
!= VMAT_GATHER_SCATTER
)
9111 if (dump_enabled_p ())
9112 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9113 "unsupported access type for masked load.\n");
9116 else if (memory_access_type
== VMAT_GATHER_SCATTER
9117 && gs_info
.ifn
== IFN_LAST
9120 if (dump_enabled_p ())
9121 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9122 "unsupported masked emulated gather.\n");
9127 if (!vec_stmt
) /* transformation not required. */
9131 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
9134 if (dump_enabled_p ())
9135 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9136 "incompatible vector types for invariants\n");
9141 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
9144 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
9145 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
9146 VLS_LOAD
, group_size
,
9147 memory_access_type
, &gs_info
,
9150 if (dump_enabled_p ()
9151 && memory_access_type
!= VMAT_ELEMENTWISE
9152 && memory_access_type
!= VMAT_GATHER_SCATTER
9153 && alignment_support_scheme
!= dr_aligned
)
9154 dump_printf_loc (MSG_NOTE
, vect_location
,
9155 "Vectorizing an unaligned access.\n");
9157 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9158 vinfo
->any_known_not_updated_vssa
= true;
9160 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
9161 vect_model_load_cost (vinfo
, stmt_info
, ncopies
, vf
, memory_access_type
,
9162 alignment_support_scheme
, misalignment
,
9163 &gs_info
, slp_node
, cost_vec
);
9168 gcc_assert (memory_access_type
9169 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
9171 if (dump_enabled_p ())
9172 dump_printf_loc (MSG_NOTE
, vect_location
,
9173 "transform load. ncopies = %d\n", ncopies
);
9177 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
9178 ensure_base_align (dr_info
);
9180 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
9182 vect_build_gather_load_calls (vinfo
,
9183 stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
9187 if (memory_access_type
== VMAT_INVARIANT
)
9189 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
9190 /* If we have versioned for aliasing or the loop doesn't
9191 have any data dependencies that would preclude this,
9192 then we are sure this is a loop invariant load and
9193 thus we can insert it on the preheader edge. */
9194 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
9195 && !nested_in_vect_loop
9196 && hoist_defs_of_uses (stmt_info
, loop
));
9199 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
9200 if (dump_enabled_p ())
9201 dump_printf_loc (MSG_NOTE
, vect_location
,
9202 "hoisting out of the vectorized loop: %G",
9204 scalar_dest
= copy_ssa_name (scalar_dest
);
9205 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
9206 edge pe
= loop_preheader_edge (loop
);
9207 gphi
*vphi
= get_virtual_phi (loop
->header
);
9210 vuse
= PHI_ARG_DEF_FROM_EDGE (vphi
, pe
);
9212 vuse
= gimple_vuse (gsi_stmt (*gsi
));
9213 gimple
*new_stmt
= gimple_build_assign (scalar_dest
, rhs
);
9214 gimple_set_vuse (new_stmt
, vuse
);
9215 gsi_insert_on_edge_immediate (pe
, new_stmt
);
9217 /* These copies are all equivalent, but currently the representation
9218 requires a separate STMT_VINFO_VEC_STMT for each one. */
9219 gimple_stmt_iterator gsi2
= *gsi
;
9221 for (j
= 0; j
< ncopies
; j
++)
9224 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
9227 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
9229 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9231 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9235 *vec_stmt
= new_stmt
;
9236 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9242 if (memory_access_type
== VMAT_ELEMENTWISE
9243 || memory_access_type
== VMAT_STRIDED_SLP
)
9245 gimple_stmt_iterator incr_gsi
;
9250 vec
<constructor_elt
, va_gc
> *v
= NULL
;
9251 tree stride_base
, stride_step
, alias_off
;
9252 /* Checked by get_load_store_type. */
9253 unsigned int const_nunits
= nunits
.to_constant ();
9254 unsigned HOST_WIDE_INT cst_offset
= 0;
9257 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
));
9258 gcc_assert (!nested_in_vect_loop
);
9262 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9263 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9267 first_stmt_info
= stmt_info
;
9268 first_dr_info
= dr_info
;
9270 if (slp
&& grouped_load
)
9272 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9273 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9279 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
9280 * vect_get_place_in_interleaving_chain (stmt_info
,
9283 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
9286 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
9288 = fold_build_pointer_plus
9289 (DR_BASE_ADDRESS (first_dr_info
->dr
),
9290 size_binop (PLUS_EXPR
,
9291 convert_to_ptrofftype (dr_offset
),
9292 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
9293 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
9295 /* For a load with loop-invariant (but other than power-of-2)
9296 stride (i.e. not a grouped access) like so:
9298 for (i = 0; i < n; i += stride)
9301 we generate a new induction variable and new accesses to
9302 form a new vector (or vectors, depending on ncopies):
9304 for (j = 0; ; j += VF*stride)
9306 tmp2 = array[j + stride];
9308 vectemp = {tmp1, tmp2, ...}
9311 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
9312 build_int_cst (TREE_TYPE (stride_step
), vf
));
9314 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
9316 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
9317 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
9318 create_iv (stride_base
, ivstep
, NULL
,
9319 loop
, &incr_gsi
, insert_after
,
9322 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
9324 running_off
= offvar
;
9325 alias_off
= build_int_cst (ref_type
, 0);
9326 int nloads
= const_nunits
;
9328 tree ltype
= TREE_TYPE (vectype
);
9329 tree lvectype
= vectype
;
9330 auto_vec
<tree
> dr_chain
;
9331 if (memory_access_type
== VMAT_STRIDED_SLP
)
9333 if (group_size
< const_nunits
)
9335 /* First check if vec_init optab supports construction from vector
9336 elts directly. Otherwise avoid emitting a constructor of
9337 vector elements by performing the loads using an integer type
9338 of the same size, constructing a vector of those and then
9339 re-interpreting it as the original vector type. This avoids a
9340 huge runtime penalty due to the general inability to perform
9341 store forwarding from smaller stores to a larger load. */
9344 = vector_vector_composition_type (vectype
,
9345 const_nunits
/ group_size
,
9347 if (vtype
!= NULL_TREE
)
9349 nloads
= const_nunits
/ group_size
;
9358 lnel
= const_nunits
;
9361 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
9363 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9364 else if (nloads
== 1)
9369 /* For SLP permutation support we need to load the whole group,
9370 not only the number of vector stmts the permutation result
9374 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9376 unsigned int const_vf
= vf
.to_constant ();
9377 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
9378 dr_chain
.create (ncopies
);
9381 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9383 unsigned int group_el
= 0;
9384 unsigned HOST_WIDE_INT
9385 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
9386 unsigned int n_groups
= 0;
9387 for (j
= 0; j
< ncopies
; j
++)
9390 vec_alloc (v
, nloads
);
9391 gimple
*new_stmt
= NULL
;
9392 for (i
= 0; i
< nloads
; i
++)
9394 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
9395 group_el
* elsz
+ cst_offset
);
9396 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
9397 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9398 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
9399 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9401 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9402 gimple_assign_lhs (new_stmt
));
9406 || group_el
== group_size
)
9409 /* When doing SLP make sure to not load elements from
9410 the next vector iteration, those will not be accessed
9411 so just use the last element again. See PR107451. */
9412 if (!slp
|| known_lt (n_groups
, vf
))
9414 tree newoff
= copy_ssa_name (running_off
);
9416 = gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
9417 running_off
, stride_step
);
9418 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
9419 running_off
= newoff
;
9426 tree vec_inv
= build_constructor (lvectype
, v
);
9427 new_temp
= vect_init_vector (vinfo
, stmt_info
,
9428 vec_inv
, lvectype
, gsi
);
9429 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9430 if (lvectype
!= vectype
)
9432 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
9434 build1 (VIEW_CONVERT_EXPR
,
9435 vectype
, new_temp
));
9436 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9443 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
9445 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9450 *vec_stmt
= new_stmt
;
9451 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9457 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
9463 if (memory_access_type
== VMAT_GATHER_SCATTER
9464 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
9465 grouped_load
= false;
9469 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9470 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9471 /* For SLP vectorization we directly vectorize a subchain
9472 without permutation. */
9473 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9474 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9475 /* For BB vectorization always use the first stmt to base
9476 the data ref pointer on. */
9478 first_stmt_info_for_drptr
9479 = vect_find_first_scalar_stmt_in_slp (slp_node
);
9481 /* Check if the chain of loads is already vectorized. */
9482 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
9483 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9484 ??? But we can only do so if there is exactly one
9485 as we have no way to get at the rest. Leave the CSE
9487 ??? With the group load eventually participating
9488 in multiple different permutations (having multiple
9489 slp nodes which refer to the same group) the CSE
9490 is even wrong code. See PR56270. */
9493 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9496 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9499 /* VEC_NUM is the number of vect stmts to be created for this group. */
9502 grouped_load
= false;
9503 /* If an SLP permutation is from N elements to N elements,
9504 and if one vector holds a whole number of N, we can load
9505 the inputs to the permutation in the same way as an
9506 unpermuted sequence. In other cases we need to load the
9507 whole group, not only the number of vector stmts the
9508 permutation result fits in. */
9509 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
9511 && (group_size
!= scalar_lanes
9512 || !multiple_p (nunits
, group_size
)))
9514 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9515 variable VF; see vect_transform_slp_perm_load. */
9516 unsigned int const_vf
= vf
.to_constant ();
9517 unsigned int const_nunits
= nunits
.to_constant ();
9518 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
9519 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
9523 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9525 = group_size
- scalar_lanes
;
9529 vec_num
= group_size
;
9531 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9535 first_stmt_info
= stmt_info
;
9536 first_dr_info
= dr_info
;
9537 group_size
= vec_num
= 1;
9539 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
9541 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9544 gcc_assert (alignment_support_scheme
);
9545 vec_loop_masks
*loop_masks
9546 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
9547 ? &LOOP_VINFO_MASKS (loop_vinfo
)
9549 vec_loop_lens
*loop_lens
9550 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
9551 ? &LOOP_VINFO_LENS (loop_vinfo
)
9554 /* Shouldn't go with length-based approach if fully masked. */
9555 gcc_assert (!loop_lens
|| !loop_masks
);
9557 /* Targets with store-lane instructions must not require explicit
9558 realignment. vect_supportable_dr_alignment always returns either
9559 dr_aligned or dr_unaligned_supported for masked operations. */
9560 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
9563 || alignment_support_scheme
== dr_aligned
9564 || alignment_support_scheme
== dr_unaligned_supported
);
9566 /* In case the vectorization factor (VF) is bigger than the number
9567 of elements that we can fit in a vectype (nunits), we have to generate
9568 more than one vector stmt - i.e - we need to "unroll" the
9569 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9570 from one copy of the vector stmt to the next, in the field
9571 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9572 stages to find the correct vector defs to be used when vectorizing
9573 stmts that use the defs of the current stmt. The example below
9574 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9575 need to create 4 vectorized stmts):
9577 before vectorization:
9578 RELATED_STMT VEC_STMT
9582 step 1: vectorize stmt S1:
9583 We first create the vector stmt VS1_0, and, as usual, record a
9584 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9585 Next, we create the vector stmt VS1_1, and record a pointer to
9586 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9587 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9589 RELATED_STMT VEC_STMT
9590 VS1_0: vx0 = memref0 VS1_1 -
9591 VS1_1: vx1 = memref1 VS1_2 -
9592 VS1_2: vx2 = memref2 VS1_3 -
9593 VS1_3: vx3 = memref3 - -
9594 S1: x = load - VS1_0
9598 /* In case of interleaving (non-unit grouped access):
9605 Vectorized loads are created in the order of memory accesses
9606 starting from the access of the first stmt of the chain:
9609 VS2: vx1 = &base + vec_size*1
9610 VS3: vx3 = &base + vec_size*2
9611 VS4: vx4 = &base + vec_size*3
9613 Then permutation statements are generated:
9615 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9616 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9619 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9620 (the order of the data-refs in the output of vect_permute_load_chain
9621 corresponds to the order of scalar stmts in the interleaving chain - see
9622 the documentation of vect_permute_load_chain()).
9623 The generation of permutation stmts and recording them in
9624 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9626 In case of both multiple types and interleaving, the vector loads and
9627 permutation stmts above are created for every copy. The result vector
9628 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9629 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9631 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9632 on a target that supports unaligned accesses (dr_unaligned_supported)
9633 we generate the following code:
9637 p = p + indx * vectype_size;
9642 Otherwise, the data reference is potentially unaligned on a target that
9643 does not support unaligned accesses (dr_explicit_realign_optimized) -
9644 then generate the following code, in which the data in each iteration is
9645 obtained by two vector loads, one from the previous iteration, and one
9646 from the current iteration:
9648 msq_init = *(floor(p1))
9649 p2 = initial_addr + VS - 1;
9650 realignment_token = call target_builtin;
9653 p2 = p2 + indx * vectype_size
9655 vec_dest = realign_load (msq, lsq, realignment_token)
9660 /* If the misalignment remains the same throughout the execution of the
9661 loop, we can create the init_addr and permutation mask at the loop
9662 preheader. Otherwise, it needs to be created inside the loop.
9663 This can only occur when vectorizing memory accesses in the inner-loop
9664 nested within an outer-loop that is being vectorized. */
9666 if (nested_in_vect_loop
9667 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9668 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9670 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9671 compute_in_loop
= true;
9674 bool diff_first_stmt_info
9675 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
9677 tree offset
= NULL_TREE
;
9678 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9679 || alignment_support_scheme
== dr_explicit_realign
)
9680 && !compute_in_loop
)
9682 /* If we have different first_stmt_info, we can't set up realignment
9683 here, since we can't guarantee first_stmt_info DR has been
9684 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9685 distance from first_stmt_info DR instead as below. */
9686 if (!diff_first_stmt_info
)
9687 msq
= vect_setup_realignment (vinfo
,
9688 first_stmt_info
, gsi
, &realignment_token
,
9689 alignment_support_scheme
, NULL_TREE
,
9691 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9693 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9694 offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9696 gcc_assert (!first_stmt_info_for_drptr
);
9702 if (!known_eq (poffset
, 0))
9704 ? size_binop (PLUS_EXPR
, offset
, size_int (poffset
))
9705 : size_int (poffset
));
9708 tree vec_offset
= NULL_TREE
;
9709 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9711 aggr_type
= NULL_TREE
;
9714 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9716 aggr_type
= elem_type
;
9717 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9718 &bump
, &vec_offset
);
9722 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9723 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9725 aggr_type
= vectype
;
9726 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
9727 memory_access_type
);
9730 auto_vec
<tree
> vec_offsets
;
9731 auto_vec
<tree
> vec_masks
;
9735 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[mask_index
],
9738 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
, mask
,
9739 &vec_masks
, mask_vectype
);
9741 tree vec_mask
= NULL_TREE
;
9742 poly_uint64 group_elt
= 0;
9743 for (j
= 0; j
< ncopies
; j
++)
9745 /* 1. Create the vector or array pointer update chain. */
9748 bool simd_lane_access_p
9749 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9750 if (simd_lane_access_p
9751 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9752 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9753 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9754 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9755 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9756 get_alias_set (TREE_TYPE (ref_type
)))
9757 && (alignment_support_scheme
== dr_aligned
9758 || alignment_support_scheme
== dr_unaligned_supported
))
9760 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9761 dataref_offset
= build_int_cst (ref_type
, 0);
9763 else if (diff_first_stmt_info
)
9766 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
9767 aggr_type
, at_loop
, offset
, &dummy
,
9768 gsi
, &ptr_incr
, simd_lane_access_p
,
9770 /* Adjust the pointer by the difference to first_stmt. */
9771 data_reference_p ptrdr
9772 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9774 = fold_convert (sizetype
,
9775 size_binop (MINUS_EXPR
,
9776 DR_INIT (first_dr_info
->dr
),
9778 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9780 if (alignment_support_scheme
== dr_explicit_realign
)
9782 msq
= vect_setup_realignment (vinfo
,
9783 first_stmt_info_for_drptr
, gsi
,
9785 alignment_support_scheme
,
9786 dataref_ptr
, &at_loop
);
9787 gcc_assert (!compute_in_loop
);
9790 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9792 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
9793 slp_node
, &gs_info
, &dataref_ptr
,
9798 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9800 offset
, &dummy
, gsi
, &ptr_incr
,
9801 simd_lane_access_p
, bump
);
9803 vec_mask
= vec_masks
[0];
9808 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9810 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9811 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9814 vec_mask
= vec_masks
[j
];
9817 if (grouped_load
|| slp_perm
)
9818 dr_chain
.create (vec_num
);
9820 gimple
*new_stmt
= NULL
;
9821 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9825 vec_array
= create_vector_array (vectype
, vec_num
);
9827 tree final_mask
= NULL_TREE
;
9829 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9832 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
9833 final_mask
, vec_mask
, gsi
);
9839 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9841 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
9842 tree alias_ptr
= build_int_cst (ref_type
, align
);
9843 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9844 dataref_ptr
, alias_ptr
,
9850 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9851 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9852 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9854 gimple_call_set_lhs (call
, vec_array
);
9855 gimple_call_set_nothrow (call
, true);
9856 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9859 /* Extract each vector into an SSA_NAME. */
9860 for (i
= 0; i
< vec_num
; i
++)
9862 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
9864 dr_chain
.quick_push (new_temp
);
9867 /* Record the mapping between SSA_NAMEs and statements. */
9868 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
9870 /* Record that VEC_ARRAY is now dead. */
9871 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
9875 for (i
= 0; i
< vec_num
; i
++)
9877 tree final_mask
= NULL_TREE
;
9879 && memory_access_type
!= VMAT_INVARIANT
)
9880 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9882 vectype
, vec_num
* j
+ i
);
9884 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
9885 final_mask
, vec_mask
, gsi
);
9887 if (i
> 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9888 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9889 gsi
, stmt_info
, bump
);
9891 /* 2. Create the vector-load in the loop. */
9892 switch (alignment_support_scheme
)
9895 case dr_unaligned_supported
:
9897 unsigned int misalign
;
9898 unsigned HOST_WIDE_INT align
;
9900 if (memory_access_type
== VMAT_GATHER_SCATTER
9901 && gs_info
.ifn
!= IFN_LAST
)
9903 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9904 vec_offset
= vec_offsets
[vec_num
* j
+ i
];
9905 tree zero
= build_zero_cst (vectype
);
9906 tree scale
= size_int (gs_info
.scale
);
9909 call
= gimple_build_call_internal
9910 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
9911 vec_offset
, scale
, zero
, final_mask
);
9913 call
= gimple_build_call_internal
9914 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
9915 vec_offset
, scale
, zero
);
9916 gimple_call_set_nothrow (call
, true);
9918 data_ref
= NULL_TREE
;
9921 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9923 /* Emulated gather-scatter. */
9924 gcc_assert (!final_mask
);
9925 unsigned HOST_WIDE_INT const_nunits
9926 = nunits
.to_constant ();
9927 unsigned HOST_WIDE_INT const_offset_nunits
9928 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
)
9930 vec
<constructor_elt
, va_gc
> *ctor_elts
;
9931 vec_alloc (ctor_elts
, const_nunits
);
9932 gimple_seq stmts
= NULL
;
9933 /* We support offset vectors with more elements
9934 than the data vector for now. */
9935 unsigned HOST_WIDE_INT factor
9936 = const_offset_nunits
/ const_nunits
;
9937 vec_offset
= vec_offsets
[j
/ factor
];
9938 unsigned elt_offset
= (j
% factor
) * const_nunits
;
9939 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
9940 tree scale
= size_int (gs_info
.scale
);
9942 = get_object_alignment (DR_REF (first_dr_info
->dr
));
9943 tree ltype
= build_aligned_type (TREE_TYPE (vectype
),
9945 for (unsigned k
= 0; k
< const_nunits
; ++k
)
9947 tree boff
= size_binop (MULT_EXPR
,
9948 TYPE_SIZE (idx_type
),
9951 tree idx
= gimple_build (&stmts
, BIT_FIELD_REF
,
9952 idx_type
, vec_offset
,
9953 TYPE_SIZE (idx_type
),
9955 idx
= gimple_convert (&stmts
, sizetype
, idx
);
9956 idx
= gimple_build (&stmts
, MULT_EXPR
,
9957 sizetype
, idx
, scale
);
9958 tree ptr
= gimple_build (&stmts
, PLUS_EXPR
,
9959 TREE_TYPE (dataref_ptr
),
9961 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
9962 tree elt
= make_ssa_name (TREE_TYPE (vectype
));
9963 tree ref
= build2 (MEM_REF
, ltype
, ptr
,
9964 build_int_cst (ref_type
, 0));
9965 new_stmt
= gimple_build_assign (elt
, ref
);
9966 gimple_set_vuse (new_stmt
,
9967 gimple_vuse (gsi_stmt (*gsi
)));
9968 gimple_seq_add_stmt (&stmts
, new_stmt
);
9969 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
, elt
);
9971 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
9972 new_stmt
= gimple_build_assign (NULL_TREE
,
9974 (vectype
, ctor_elts
));
9975 data_ref
= NULL_TREE
;
9980 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9981 if (alignment_support_scheme
== dr_aligned
)
9983 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
9985 align
= dr_alignment
9986 (vect_dr_behavior (vinfo
, first_dr_info
));
9990 misalign
= misalignment
;
9991 if (dataref_offset
== NULL_TREE
9992 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9993 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9995 align
= least_bit_hwi (misalign
| align
);
9999 tree ptr
= build_int_cst (ref_type
,
10000 align
* BITS_PER_UNIT
);
10002 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
10005 gimple_call_set_nothrow (call
, true);
10007 data_ref
= NULL_TREE
;
10009 else if (loop_lens
&& memory_access_type
!= VMAT_INVARIANT
)
10012 = vect_get_loop_len (loop_vinfo
, loop_lens
,
10015 tree ptr
= build_int_cst (ref_type
,
10016 align
* BITS_PER_UNIT
);
10018 machine_mode vmode
= TYPE_MODE (vectype
);
10019 opt_machine_mode new_ovmode
10020 = get_len_load_store_mode (vmode
, true);
10021 machine_mode new_vmode
= new_ovmode
.require ();
10022 tree qi_type
= unsigned_intQI_type_node
;
10024 signed char biasval
=
10025 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
10027 tree bias
= build_int_cst (intQI_type_node
, biasval
);
10030 = gimple_build_call_internal (IFN_LEN_LOAD
, 4,
10033 gimple_call_set_nothrow (call
, true);
10035 data_ref
= NULL_TREE
;
10037 /* Need conversion if it's wrapped with VnQI. */
10038 if (vmode
!= new_vmode
)
10041 = build_vector_type_for_mode (qi_type
, new_vmode
);
10042 tree var
= vect_get_new_ssa_name (new_vtype
,
10044 gimple_set_lhs (call
, var
);
10045 vect_finish_stmt_generation (vinfo
, stmt_info
, call
,
10047 tree op
= build1 (VIEW_CONVERT_EXPR
, vectype
, var
);
10049 = gimple_build_assign (vec_dest
,
10050 VIEW_CONVERT_EXPR
, op
);
10055 tree ltype
= vectype
;
10056 tree new_vtype
= NULL_TREE
;
10057 unsigned HOST_WIDE_INT gap
10058 = DR_GROUP_GAP (first_stmt_info
);
10059 unsigned int vect_align
10060 = vect_known_alignment_in_bytes (first_dr_info
,
10062 unsigned int scalar_dr_size
10063 = vect_get_scalar_dr_size (first_dr_info
);
10064 /* If there's no peeling for gaps but we have a gap
10065 with slp loads then load the lower half of the
10066 vector only. See get_group_load_store_type for
10067 when we apply this optimization. */
10070 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
10072 && known_eq (nunits
, (group_size
- gap
) * 2)
10073 && known_eq (nunits
, group_size
)
10074 && gap
>= (vect_align
/ scalar_dr_size
))
10078 = vector_vector_composition_type (vectype
, 2,
10080 if (new_vtype
!= NULL_TREE
)
10081 ltype
= half_vtype
;
10084 = (dataref_offset
? dataref_offset
10085 : build_int_cst (ref_type
, 0));
10086 if (ltype
!= vectype
10087 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
10089 unsigned HOST_WIDE_INT gap_offset
10090 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
10091 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
10092 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
10095 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
10096 if (alignment_support_scheme
== dr_aligned
)
10099 TREE_TYPE (data_ref
)
10100 = build_aligned_type (TREE_TYPE (data_ref
),
10101 align
* BITS_PER_UNIT
);
10102 if (ltype
!= vectype
)
10104 vect_copy_ref_info (data_ref
,
10105 DR_REF (first_dr_info
->dr
));
10106 tree tem
= make_ssa_name (ltype
);
10107 new_stmt
= gimple_build_assign (tem
, data_ref
);
10108 vect_finish_stmt_generation (vinfo
, stmt_info
,
10111 vec
<constructor_elt
, va_gc
> *v
;
10113 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
10115 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
10116 build_zero_cst (ltype
));
10117 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
10121 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
10122 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
10123 build_zero_cst (ltype
));
10125 gcc_assert (new_vtype
!= NULL_TREE
);
10126 if (new_vtype
== vectype
)
10127 new_stmt
= gimple_build_assign (
10128 vec_dest
, build_constructor (vectype
, v
));
10131 tree new_vname
= make_ssa_name (new_vtype
);
10132 new_stmt
= gimple_build_assign (
10133 new_vname
, build_constructor (new_vtype
, v
));
10134 vect_finish_stmt_generation (vinfo
, stmt_info
,
10136 new_stmt
= gimple_build_assign (
10137 vec_dest
, build1 (VIEW_CONVERT_EXPR
, vectype
,
10144 case dr_explicit_realign
:
10148 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
10150 if (compute_in_loop
)
10151 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
10152 &realignment_token
,
10153 dr_explicit_realign
,
10154 dataref_ptr
, NULL
);
10156 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
10157 ptr
= copy_ssa_name (dataref_ptr
);
10159 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
10160 // For explicit realign the target alignment should be
10161 // known at compile time.
10162 unsigned HOST_WIDE_INT align
=
10163 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
10164 new_stmt
= gimple_build_assign
10165 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
10167 (TREE_TYPE (dataref_ptr
),
10168 -(HOST_WIDE_INT
) align
));
10169 vect_finish_stmt_generation (vinfo
, stmt_info
,
10172 = build2 (MEM_REF
, vectype
, ptr
,
10173 build_int_cst (ref_type
, 0));
10174 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
10175 vec_dest
= vect_create_destination_var (scalar_dest
,
10177 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
10178 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
10179 gimple_assign_set_lhs (new_stmt
, new_temp
);
10180 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
10181 vect_finish_stmt_generation (vinfo
, stmt_info
,
10185 bump
= size_binop (MULT_EXPR
, vs
,
10186 TYPE_SIZE_UNIT (elem_type
));
10187 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
10188 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
,
10190 new_stmt
= gimple_build_assign
10191 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
10193 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
10194 if (TREE_CODE (ptr
) == SSA_NAME
)
10195 ptr
= copy_ssa_name (ptr
, new_stmt
);
10197 ptr
= make_ssa_name (TREE_TYPE (ptr
), new_stmt
);
10198 gimple_assign_set_lhs (new_stmt
, ptr
);
10199 vect_finish_stmt_generation (vinfo
, stmt_info
,
10202 = build2 (MEM_REF
, vectype
, ptr
,
10203 build_int_cst (ref_type
, 0));
10206 case dr_explicit_realign_optimized
:
10208 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
10209 new_temp
= copy_ssa_name (dataref_ptr
);
10211 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
10212 // We should only be doing this if we know the target
10213 // alignment at compile time.
10214 unsigned HOST_WIDE_INT align
=
10215 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
10216 new_stmt
= gimple_build_assign
10217 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
10218 build_int_cst (TREE_TYPE (dataref_ptr
),
10219 -(HOST_WIDE_INT
) align
));
10220 vect_finish_stmt_generation (vinfo
, stmt_info
,
10223 = build2 (MEM_REF
, vectype
, new_temp
,
10224 build_int_cst (ref_type
, 0));
10228 gcc_unreachable ();
10230 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10231 /* DATA_REF is null if we've already built the statement. */
10234 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
10235 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
10237 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
10238 gimple_set_lhs (new_stmt
, new_temp
);
10239 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10241 /* 3. Handle explicit realignment if necessary/supported.
10243 vec_dest = realign_load (msq, lsq, realignment_token) */
10244 if (alignment_support_scheme
== dr_explicit_realign_optimized
10245 || alignment_support_scheme
== dr_explicit_realign
)
10247 lsq
= gimple_assign_lhs (new_stmt
);
10248 if (!realignment_token
)
10249 realignment_token
= dataref_ptr
;
10250 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10251 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
10252 msq
, lsq
, realignment_token
);
10253 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
10254 gimple_assign_set_lhs (new_stmt
, new_temp
);
10255 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10257 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
10260 if (i
== vec_num
- 1 && j
== ncopies
- 1)
10261 add_phi_arg (phi
, lsq
,
10262 loop_latch_edge (containing_loop
),
10268 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
10270 tree perm_mask
= perm_mask_for_reverse (vectype
);
10271 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
10272 perm_mask
, stmt_info
, gsi
);
10273 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
10276 /* Collect vector loads and later create their permutation in
10277 vect_transform_grouped_load (). */
10278 if (grouped_load
|| slp_perm
)
10279 dr_chain
.quick_push (new_temp
);
10281 /* Store vector loads in the corresponding SLP_NODE. */
10282 if (slp
&& !slp_perm
)
10283 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10285 /* With SLP permutation we load the gaps as well, without
10286 we need to skip the gaps after we manage to fully load
10287 all elements. group_gap_adj is DR_GROUP_SIZE here. */
10288 group_elt
+= nunits
;
10289 if (maybe_ne (group_gap_adj
, 0U)
10291 && known_eq (group_elt
, group_size
- group_gap_adj
))
10293 poly_wide_int bump_val
10294 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
10296 if (tree_int_cst_sgn
10297 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
10298 bump_val
= -bump_val
;
10299 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
10300 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10301 gsi
, stmt_info
, bump
);
10305 /* Bump the vector pointer to account for a gap or for excess
10306 elements loaded for a permuted SLP load. */
10307 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
10309 poly_wide_int bump_val
10310 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
10312 if (tree_int_cst_sgn
10313 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
10314 bump_val
= -bump_val
;
10315 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
10316 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
10321 if (slp
&& !slp_perm
)
10327 /* For SLP we know we've seen all possible uses of dr_chain so
10328 direct vect_transform_slp_perm_load to DCE the unused parts.
10329 ??? This is a hack to prevent compile-time issues as seen
10330 in PR101120 and friends. */
10331 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
10332 gsi
, vf
, false, &n_perms
,
10340 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
10341 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
10343 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10347 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10350 dr_chain
.release ();
10353 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10358 /* Function vect_is_simple_cond.
10361 LOOP - the loop that is being vectorized.
10362 COND - Condition that is checked for simple use.
10365 *COMP_VECTYPE - the vector type for the comparison.
10366 *DTS - The def types for the arguments of the comparison
10368 Returns whether a COND can be vectorized. Checks whether
10369 condition operands are supportable using vec_is_simple_use. */
10372 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
10373 slp_tree slp_node
, tree
*comp_vectype
,
10374 enum vect_def_type
*dts
, tree vectype
)
10377 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10381 if (TREE_CODE (cond
) == SSA_NAME
10382 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
10384 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
10385 &slp_op
, &dts
[0], comp_vectype
)
10387 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
10392 if (!COMPARISON_CLASS_P (cond
))
10395 lhs
= TREE_OPERAND (cond
, 0);
10396 rhs
= TREE_OPERAND (cond
, 1);
10398 if (TREE_CODE (lhs
) == SSA_NAME
)
10400 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
10401 &lhs
, &slp_op
, &dts
[0], &vectype1
))
10404 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
10405 || TREE_CODE (lhs
) == FIXED_CST
)
10406 dts
[0] = vect_constant_def
;
10410 if (TREE_CODE (rhs
) == SSA_NAME
)
10412 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
10413 &rhs
, &slp_op
, &dts
[1], &vectype2
))
10416 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
10417 || TREE_CODE (rhs
) == FIXED_CST
)
10418 dts
[1] = vect_constant_def
;
10422 if (vectype1
&& vectype2
10423 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10424 TYPE_VECTOR_SUBPARTS (vectype2
)))
10427 *comp_vectype
= vectype1
? vectype1
: vectype2
;
10428 /* Invariant comparison. */
10429 if (! *comp_vectype
)
10431 tree scalar_type
= TREE_TYPE (lhs
);
10432 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10433 *comp_vectype
= truth_type_for (vectype
);
10436 /* If we can widen the comparison to match vectype do so. */
10437 if (INTEGRAL_TYPE_P (scalar_type
)
10439 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
10440 TYPE_SIZE (TREE_TYPE (vectype
))))
10441 scalar_type
= build_nonstandard_integer_type
10442 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
10443 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
10451 /* vectorizable_condition.
10453 Check if STMT_INFO is conditional modify expression that can be vectorized.
10454 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10455 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10458 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10460 Return true if STMT_INFO is vectorizable in this way. */
10463 vectorizable_condition (vec_info
*vinfo
,
10464 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10466 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10468 tree scalar_dest
= NULL_TREE
;
10469 tree vec_dest
= NULL_TREE
;
10470 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
10471 tree then_clause
, else_clause
;
10472 tree comp_vectype
= NULL_TREE
;
10473 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
10474 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
10477 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10478 enum vect_def_type dts
[4]
10479 = {vect_unknown_def_type
, vect_unknown_def_type
,
10480 vect_unknown_def_type
, vect_unknown_def_type
};
10484 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10486 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10487 vec
<tree
> vec_oprnds0
= vNULL
;
10488 vec
<tree
> vec_oprnds1
= vNULL
;
10489 vec
<tree
> vec_oprnds2
= vNULL
;
10490 vec
<tree
> vec_oprnds3
= vNULL
;
10492 bool masked
= false;
10494 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10497 /* Is vectorizable conditional operation? */
10498 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10502 code
= gimple_assign_rhs_code (stmt
);
10503 if (code
!= COND_EXPR
)
10506 stmt_vec_info reduc_info
= NULL
;
10507 int reduc_index
= -1;
10508 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
10510 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
10515 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
10516 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
10517 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
10518 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
10519 || reduc_index
!= -1);
10523 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10527 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10528 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10533 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10537 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10541 gcc_assert (ncopies
>= 1);
10542 if (for_reduction
&& ncopies
> 1)
10543 return false; /* FORNOW */
10545 cond_expr
= gimple_assign_rhs1 (stmt
);
10547 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
10548 &comp_vectype
, &dts
[0], vectype
)
10552 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
10553 slp_tree then_slp_node
, else_slp_node
;
10554 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
10555 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
10557 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
10558 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
10561 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
10564 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
10567 masked
= !COMPARISON_CLASS_P (cond_expr
);
10568 vec_cmp_type
= truth_type_for (comp_vectype
);
10570 if (vec_cmp_type
== NULL_TREE
)
10573 cond_code
= TREE_CODE (cond_expr
);
10576 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
10577 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
10580 /* For conditional reductions, the "then" value needs to be the candidate
10581 value calculated by this iteration while the "else" value needs to be
10582 the result carried over from previous iterations. If the COND_EXPR
10583 is the other way around, we need to swap it. */
10584 bool must_invert_cmp_result
= false;
10585 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
10588 must_invert_cmp_result
= true;
10591 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
10592 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
10593 if (new_code
== ERROR_MARK
)
10594 must_invert_cmp_result
= true;
10597 cond_code
= new_code
;
10598 /* Make sure we don't accidentally use the old condition. */
10599 cond_expr
= NULL_TREE
;
10602 std::swap (then_clause
, else_clause
);
10605 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
10607 /* Boolean values may have another representation in vectors
10608 and therefore we prefer bit operations over comparison for
10609 them (which also works for scalar masks). We store opcodes
10610 to use in bitop1 and bitop2. Statement is vectorized as
10611 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10612 depending on bitop1 and bitop2 arity. */
10616 bitop1
= BIT_NOT_EXPR
;
10617 bitop2
= BIT_AND_EXPR
;
10620 bitop1
= BIT_NOT_EXPR
;
10621 bitop2
= BIT_IOR_EXPR
;
10624 bitop1
= BIT_NOT_EXPR
;
10625 bitop2
= BIT_AND_EXPR
;
10626 std::swap (cond_expr0
, cond_expr1
);
10629 bitop1
= BIT_NOT_EXPR
;
10630 bitop2
= BIT_IOR_EXPR
;
10631 std::swap (cond_expr0
, cond_expr1
);
10634 bitop1
= BIT_XOR_EXPR
;
10637 bitop1
= BIT_XOR_EXPR
;
10638 bitop2
= BIT_NOT_EXPR
;
10643 cond_code
= SSA_NAME
;
10646 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
10647 && reduction_type
== EXTRACT_LAST_REDUCTION
10648 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
10650 if (dump_enabled_p ())
10651 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10652 "reduction comparison operation not supported.\n");
10658 if (bitop1
!= NOP_EXPR
)
10660 machine_mode mode
= TYPE_MODE (comp_vectype
);
10663 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
10664 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10667 if (bitop2
!= NOP_EXPR
)
10669 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
10671 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10676 vect_cost_for_stmt kind
= vector_stmt
;
10677 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10678 /* Count one reduction-like operation per vector. */
10679 kind
= vec_to_scalar
;
10680 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
))
10684 && (!vect_maybe_update_slp_op_vectype
10685 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
10687 && !vect_maybe_update_slp_op_vectype
10688 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
10689 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
10690 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
10692 if (dump_enabled_p ())
10693 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10694 "incompatible vector types for invariants\n");
10698 if (loop_vinfo
&& for_reduction
10699 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
10701 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10702 vect_record_loop_mask (loop_vinfo
, &LOOP_VINFO_MASKS (loop_vinfo
),
10703 ncopies
* vec_num
, vectype
, NULL
);
10704 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10705 else if (STMT_VINFO_DEF_TYPE (reduc_info
) != vect_nested_cycle
)
10707 if (dump_enabled_p ())
10708 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10709 "conditional reduction prevents the use"
10710 " of partial vectors.\n");
10711 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
10715 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10716 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10724 scalar_dest
= gimple_assign_lhs (stmt
);
10725 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10726 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10728 bool swap_cond_operands
= false;
10730 /* See whether another part of the vectorized code applies a loop
10731 mask to the condition, or to its inverse. */
10733 vec_loop_masks
*masks
= NULL
;
10734 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10736 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10737 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10740 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10741 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10742 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10745 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10746 tree_code orig_code
= cond
.code
;
10747 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10748 if (!masked
&& loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10750 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10751 cond_code
= cond
.code
;
10752 swap_cond_operands
= true;
10756 /* Try the inverse of the current mask. We check if the
10757 inverse mask is live and if so we generate a negate of
10758 the current mask such that we still honor NaNs. */
10759 cond
.inverted_p
= true;
10760 cond
.code
= orig_code
;
10761 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10763 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10764 cond_code
= cond
.code
;
10765 swap_cond_operands
= true;
10766 must_invert_cmp_result
= true;
10773 /* Handle cond expr. */
10775 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10776 cond_expr
, &vec_oprnds0
, comp_vectype
,
10777 then_clause
, &vec_oprnds2
, vectype
,
10778 reduction_type
!= EXTRACT_LAST_REDUCTION
10779 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10781 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10782 cond_expr0
, &vec_oprnds0
, comp_vectype
,
10783 cond_expr1
, &vec_oprnds1
, comp_vectype
,
10784 then_clause
, &vec_oprnds2
, vectype
,
10785 reduction_type
!= EXTRACT_LAST_REDUCTION
10786 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10788 /* Arguments are ready. Create the new vector stmt. */
10789 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10791 vec_then_clause
= vec_oprnds2
[i
];
10792 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10793 vec_else_clause
= vec_oprnds3
[i
];
10795 if (swap_cond_operands
)
10796 std::swap (vec_then_clause
, vec_else_clause
);
10799 vec_compare
= vec_cond_lhs
;
10802 vec_cond_rhs
= vec_oprnds1
[i
];
10803 if (bitop1
== NOP_EXPR
)
10805 gimple_seq stmts
= NULL
;
10806 vec_compare
= gimple_build (&stmts
, cond_code
, vec_cmp_type
,
10807 vec_cond_lhs
, vec_cond_rhs
);
10808 gsi_insert_before (gsi
, stmts
, GSI_SAME_STMT
);
10812 new_temp
= make_ssa_name (vec_cmp_type
);
10814 if (bitop1
== BIT_NOT_EXPR
)
10815 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10819 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10821 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10822 if (bitop2
== NOP_EXPR
)
10823 vec_compare
= new_temp
;
10824 else if (bitop2
== BIT_NOT_EXPR
10825 && reduction_type
!= EXTRACT_LAST_REDUCTION
)
10827 /* Instead of doing ~x ? y : z do x ? z : y. */
10828 vec_compare
= new_temp
;
10829 std::swap (vec_then_clause
, vec_else_clause
);
10833 vec_compare
= make_ssa_name (vec_cmp_type
);
10834 if (bitop2
== BIT_NOT_EXPR
)
10836 = gimple_build_assign (vec_compare
, bitop2
, new_temp
);
10839 = gimple_build_assign (vec_compare
, bitop2
,
10840 vec_cond_lhs
, new_temp
);
10841 vect_finish_stmt_generation (vinfo
, stmt_info
,
10847 /* If we decided to apply a loop mask to the result of the vector
10848 comparison, AND the comparison with the mask now. Later passes
10849 should then be able to reuse the AND results between mulitple
10853 for (int i = 0; i < 100; ++i)
10854 x[i] = y[i] ? z[i] : 10;
10856 results in following optimized GIMPLE:
10858 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10859 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10860 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10861 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10862 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10863 vect_iftmp.11_47, { 10, ... }>;
10865 instead of using a masked and unmasked forms of
10866 vec != { 0, ... } (masked in the MASK_LOAD,
10867 unmasked in the VEC_COND_EXPR). */
10869 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10870 in cases where that's necessary. */
10872 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
10874 if (!is_gimple_val (vec_compare
))
10876 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10877 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10879 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10880 vec_compare
= vec_compare_name
;
10883 if (must_invert_cmp_result
)
10885 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10886 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10889 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10890 vec_compare
= vec_compare_name
;
10896 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10898 tree tmp2
= make_ssa_name (vec_cmp_type
);
10900 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10902 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
10903 vec_compare
= tmp2
;
10908 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10910 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
10911 tree lhs
= gimple_get_lhs (old_stmt
);
10912 new_stmt
= gimple_build_call_internal
10913 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10915 gimple_call_set_lhs (new_stmt
, lhs
);
10916 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
10917 if (old_stmt
== gsi_stmt (*gsi
))
10918 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
10921 /* In this case we're moving the definition to later in the
10922 block. That doesn't matter because the only uses of the
10923 lhs are in phi statements. */
10924 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
10925 gsi_remove (&old_gsi
, true);
10926 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10931 new_temp
= make_ssa_name (vec_dest
);
10932 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10933 vec_then_clause
, vec_else_clause
);
10934 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10937 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10939 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10943 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10945 vec_oprnds0
.release ();
10946 vec_oprnds1
.release ();
10947 vec_oprnds2
.release ();
10948 vec_oprnds3
.release ();
10953 /* vectorizable_comparison.
10955 Check if STMT_INFO is comparison expression that can be vectorized.
10956 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10957 comparison, put it in VEC_STMT, and insert it at GSI.
10959 Return true if STMT_INFO is vectorizable in this way. */
10962 vectorizable_comparison (vec_info
*vinfo
,
10963 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10965 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10967 tree lhs
, rhs1
, rhs2
;
10968 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10969 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10970 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10972 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10973 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10975 poly_uint64 nunits
;
10977 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10979 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10980 vec
<tree
> vec_oprnds0
= vNULL
;
10981 vec
<tree
> vec_oprnds1
= vNULL
;
10985 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10988 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10991 mask_type
= vectype
;
10992 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10997 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10999 gcc_assert (ncopies
>= 1);
11000 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
11003 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
11007 code
= gimple_assign_rhs_code (stmt
);
11009 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
11012 slp_tree slp_rhs1
, slp_rhs2
;
11013 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
11014 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
11017 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
11018 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
11021 if (vectype1
&& vectype2
11022 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
11023 TYPE_VECTOR_SUBPARTS (vectype2
)))
11026 vectype
= vectype1
? vectype1
: vectype2
;
11028 /* Invariant comparison. */
11031 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
11032 vectype
= mask_type
;
11034 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
11036 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
11039 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
11042 /* Can't compare mask and non-mask types. */
11043 if (vectype1
&& vectype2
11044 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
11047 /* Boolean values may have another representation in vectors
11048 and therefore we prefer bit operations over comparison for
11049 them (which also works for scalar masks). We store opcodes
11050 to use in bitop1 and bitop2. Statement is vectorized as
11051 BITOP2 (rhs1 BITOP1 rhs2) or
11052 rhs1 BITOP2 (BITOP1 rhs2)
11053 depending on bitop1 and bitop2 arity. */
11054 bool swap_p
= false;
11055 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
11057 if (code
== GT_EXPR
)
11059 bitop1
= BIT_NOT_EXPR
;
11060 bitop2
= BIT_AND_EXPR
;
11062 else if (code
== GE_EXPR
)
11064 bitop1
= BIT_NOT_EXPR
;
11065 bitop2
= BIT_IOR_EXPR
;
11067 else if (code
== LT_EXPR
)
11069 bitop1
= BIT_NOT_EXPR
;
11070 bitop2
= BIT_AND_EXPR
;
11073 else if (code
== LE_EXPR
)
11075 bitop1
= BIT_NOT_EXPR
;
11076 bitop2
= BIT_IOR_EXPR
;
11081 bitop1
= BIT_XOR_EXPR
;
11082 if (code
== EQ_EXPR
)
11083 bitop2
= BIT_NOT_EXPR
;
11089 if (bitop1
== NOP_EXPR
)
11091 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
11096 machine_mode mode
= TYPE_MODE (vectype
);
11099 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
11100 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
11103 if (bitop2
!= NOP_EXPR
)
11105 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
11106 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
11111 /* Put types on constant and invariant SLP children. */
11113 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
11114 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
11116 if (dump_enabled_p ())
11117 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11118 "incompatible vector types for invariants\n");
11122 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
11123 vect_model_simple_cost (vinfo
, stmt_info
,
11124 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
11125 dts
, ndts
, slp_node
, cost_vec
);
11132 lhs
= gimple_assign_lhs (stmt
);
11133 mask
= vect_create_destination_var (lhs
, mask_type
);
11135 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
11136 rhs1
, &vec_oprnds0
, vectype
,
11137 rhs2
, &vec_oprnds1
, vectype
);
11139 std::swap (vec_oprnds0
, vec_oprnds1
);
11141 /* Arguments are ready. Create the new vector stmt. */
11142 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
11145 vec_rhs2
= vec_oprnds1
[i
];
11147 new_temp
= make_ssa_name (mask
);
11148 if (bitop1
== NOP_EXPR
)
11150 new_stmt
= gimple_build_assign (new_temp
, code
,
11151 vec_rhs1
, vec_rhs2
);
11152 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11156 if (bitop1
== BIT_NOT_EXPR
)
11157 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
11159 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
11161 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11162 if (bitop2
!= NOP_EXPR
)
11164 tree res
= make_ssa_name (mask
);
11165 if (bitop2
== BIT_NOT_EXPR
)
11166 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
11168 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
11170 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11174 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
11176 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
11180 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11182 vec_oprnds0
.release ();
11183 vec_oprnds1
.release ();
11188 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
11189 can handle all live statements in the node. Otherwise return true
11190 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
11191 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
11194 can_vectorize_live_stmts (vec_info
*vinfo
,
11195 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11196 slp_tree slp_node
, slp_instance slp_node_instance
,
11198 stmt_vector_for_cost
*cost_vec
)
11202 stmt_vec_info slp_stmt_info
;
11204 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
11206 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
11207 && !vectorizable_live_operation (vinfo
,
11208 slp_stmt_info
, gsi
, slp_node
,
11209 slp_node_instance
, i
,
11210 vec_stmt_p
, cost_vec
))
11214 else if (STMT_VINFO_LIVE_P (stmt_info
)
11215 && !vectorizable_live_operation (vinfo
, stmt_info
, gsi
,
11216 slp_node
, slp_node_instance
, -1,
11217 vec_stmt_p
, cost_vec
))
11223 /* Make sure the statement is vectorizable. */
11226 vect_analyze_stmt (vec_info
*vinfo
,
11227 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
11228 slp_tree node
, slp_instance node_instance
,
11229 stmt_vector_for_cost
*cost_vec
)
11231 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
11232 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
11234 gimple_seq pattern_def_seq
;
11236 if (dump_enabled_p ())
11237 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
11240 if (gimple_has_volatile_ops (stmt_info
->stmt
))
11241 return opt_result::failure_at (stmt_info
->stmt
,
11243 " stmt has volatile operands: %G\n",
11246 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11248 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
11250 gimple_stmt_iterator si
;
11252 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
11254 stmt_vec_info pattern_def_stmt_info
11255 = vinfo
->lookup_stmt (gsi_stmt (si
));
11256 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
11257 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
11259 /* Analyze def stmt of STMT if it's a pattern stmt. */
11260 if (dump_enabled_p ())
11261 dump_printf_loc (MSG_NOTE
, vect_location
,
11262 "==> examining pattern def statement: %G",
11263 pattern_def_stmt_info
->stmt
);
11266 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
11267 need_to_vectorize
, node
, node_instance
,
11275 /* Skip stmts that do not need to be vectorized. In loops this is expected
11277 - the COND_EXPR which is the loop exit condition
11278 - any LABEL_EXPRs in the loop
11279 - computations that are used only for array indexing or loop control.
11280 In basic blocks we only analyze statements that are a part of some SLP
11281 instance, therefore, all the statements are relevant.
11283 Pattern statement needs to be analyzed instead of the original statement
11284 if the original statement is not relevant. Otherwise, we analyze both
11285 statements. In basic blocks we are called from some SLP instance
11286 traversal, don't analyze pattern stmts instead, the pattern stmts
11287 already will be part of SLP instance. */
11289 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
11290 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
11291 && !STMT_VINFO_LIVE_P (stmt_info
))
11293 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11294 && pattern_stmt_info
11295 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
11296 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
11298 /* Analyze PATTERN_STMT instead of the original stmt. */
11299 stmt_info
= pattern_stmt_info
;
11300 if (dump_enabled_p ())
11301 dump_printf_loc (MSG_NOTE
, vect_location
,
11302 "==> examining pattern statement: %G",
11307 if (dump_enabled_p ())
11308 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
11310 return opt_result::success ();
11313 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11315 && pattern_stmt_info
11316 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
11317 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
11319 /* Analyze PATTERN_STMT too. */
11320 if (dump_enabled_p ())
11321 dump_printf_loc (MSG_NOTE
, vect_location
,
11322 "==> examining pattern statement: %G",
11323 pattern_stmt_info
->stmt
);
11326 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
11327 node_instance
, cost_vec
);
11332 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
11334 case vect_internal_def
:
11337 case vect_reduction_def
:
11338 case vect_nested_cycle
:
11339 gcc_assert (!bb_vinfo
11340 && (relevance
== vect_used_in_outer
11341 || relevance
== vect_used_in_outer_by_reduction
11342 || relevance
== vect_used_by_reduction
11343 || relevance
== vect_unused_in_scope
11344 || relevance
== vect_used_only_live
));
11347 case vect_induction_def
:
11348 case vect_first_order_recurrence
:
11349 gcc_assert (!bb_vinfo
);
11352 case vect_constant_def
:
11353 case vect_external_def
:
11354 case vect_unknown_def_type
:
11356 gcc_unreachable ();
11359 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11361 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (node
);
11363 if (STMT_VINFO_RELEVANT_P (stmt_info
))
11365 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
11366 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
11367 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
11368 *need_to_vectorize
= true;
11371 if (PURE_SLP_STMT (stmt_info
) && !node
)
11373 if (dump_enabled_p ())
11374 dump_printf_loc (MSG_NOTE
, vect_location
,
11375 "handled only by SLP analysis\n");
11376 return opt_result::success ();
11381 && (STMT_VINFO_RELEVANT_P (stmt_info
)
11382 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
11383 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11384 -mveclibabi= takes preference over library functions with
11385 the simd attribute. */
11386 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11387 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
11389 || vectorizable_conversion (vinfo
, stmt_info
,
11390 NULL
, NULL
, node
, cost_vec
)
11391 || vectorizable_operation (vinfo
, stmt_info
,
11392 NULL
, NULL
, node
, cost_vec
)
11393 || vectorizable_assignment (vinfo
, stmt_info
,
11394 NULL
, NULL
, node
, cost_vec
)
11395 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11396 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11397 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11398 node
, node_instance
, cost_vec
)
11399 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11400 NULL
, node
, cost_vec
)
11401 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11402 || vectorizable_condition (vinfo
, stmt_info
,
11403 NULL
, NULL
, node
, cost_vec
)
11404 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11406 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11407 stmt_info
, NULL
, node
)
11408 || vectorizable_recurr (as_a
<loop_vec_info
> (vinfo
),
11409 stmt_info
, NULL
, node
, cost_vec
));
11413 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11414 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
11415 NULL
, NULL
, node
, cost_vec
)
11416 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
11418 || vectorizable_shift (vinfo
, stmt_info
,
11419 NULL
, NULL
, node
, cost_vec
)
11420 || vectorizable_operation (vinfo
, stmt_info
,
11421 NULL
, NULL
, node
, cost_vec
)
11422 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
11424 || vectorizable_load (vinfo
, stmt_info
,
11425 NULL
, NULL
, node
, cost_vec
)
11426 || vectorizable_store (vinfo
, stmt_info
,
11427 NULL
, NULL
, node
, cost_vec
)
11428 || vectorizable_condition (vinfo
, stmt_info
,
11429 NULL
, NULL
, node
, cost_vec
)
11430 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11432 || vectorizable_phi (vinfo
, stmt_info
, NULL
, node
, cost_vec
));
11436 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11439 return opt_result::failure_at (stmt_info
->stmt
,
11441 " relevant stmt not supported: %G",
11444 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11445 need extra handling, except for vectorizable reductions. */
11447 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11448 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
11449 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
11450 stmt_info
, NULL
, node
, node_instance
,
11452 return opt_result::failure_at (stmt_info
->stmt
,
11454 " live stmt not supported: %G",
11457 return opt_result::success ();
11461 /* Function vect_transform_stmt.
11463 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11466 vect_transform_stmt (vec_info
*vinfo
,
11467 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11468 slp_tree slp_node
, slp_instance slp_node_instance
)
11470 bool is_store
= false;
11471 gimple
*vec_stmt
= NULL
;
11474 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
11476 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11478 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (slp_node
);
11480 switch (STMT_VINFO_TYPE (stmt_info
))
11482 case type_demotion_vec_info_type
:
11483 case type_promotion_vec_info_type
:
11484 case type_conversion_vec_info_type
:
11485 done
= vectorizable_conversion (vinfo
, stmt_info
,
11486 gsi
, &vec_stmt
, slp_node
, NULL
);
11490 case induc_vec_info_type
:
11491 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
11492 stmt_info
, &vec_stmt
, slp_node
,
11497 case shift_vec_info_type
:
11498 done
= vectorizable_shift (vinfo
, stmt_info
,
11499 gsi
, &vec_stmt
, slp_node
, NULL
);
11503 case op_vec_info_type
:
11504 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11509 case assignment_vec_info_type
:
11510 done
= vectorizable_assignment (vinfo
, stmt_info
,
11511 gsi
, &vec_stmt
, slp_node
, NULL
);
11515 case load_vec_info_type
:
11516 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11521 case store_vec_info_type
:
11522 done
= vectorizable_store (vinfo
, stmt_info
,
11523 gsi
, &vec_stmt
, slp_node
, NULL
);
11525 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
11527 /* In case of interleaving, the whole chain is vectorized when the
11528 last store in the chain is reached. Store stmts before the last
11529 one are skipped, and there vec_stmt_info shouldn't be freed
11531 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
11532 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
11539 case condition_vec_info_type
:
11540 done
= vectorizable_condition (vinfo
, stmt_info
,
11541 gsi
, &vec_stmt
, slp_node
, NULL
);
11545 case comparison_vec_info_type
:
11546 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11551 case call_vec_info_type
:
11552 done
= vectorizable_call (vinfo
, stmt_info
,
11553 gsi
, &vec_stmt
, slp_node
, NULL
);
11556 case call_simd_clone_vec_info_type
:
11557 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11561 case reduc_vec_info_type
:
11562 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11563 gsi
, &vec_stmt
, slp_node
);
11567 case cycle_phi_info_type
:
11568 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11569 &vec_stmt
, slp_node
, slp_node_instance
);
11573 case lc_phi_info_type
:
11574 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11575 stmt_info
, &vec_stmt
, slp_node
);
11579 case recurr_info_type
:
11580 done
= vectorizable_recurr (as_a
<loop_vec_info
> (vinfo
),
11581 stmt_info
, &vec_stmt
, slp_node
, NULL
);
11585 case phi_info_type
:
11586 done
= vectorizable_phi (vinfo
, stmt_info
, &vec_stmt
, slp_node
, NULL
);
11591 if (!STMT_VINFO_LIVE_P (stmt_info
))
11593 if (dump_enabled_p ())
11594 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11595 "stmt not supported.\n");
11596 gcc_unreachable ();
11601 if (!slp_node
&& vec_stmt
)
11602 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
11604 if (STMT_VINFO_TYPE (stmt_info
) != store_vec_info_type
)
11606 /* Handle stmts whose DEF is used outside the loop-nest that is
11607 being vectorized. */
11608 done
= can_vectorize_live_stmts (vinfo
, stmt_info
, gsi
, slp_node
,
11609 slp_node_instance
, true, NULL
);
11614 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11620 /* Remove a group of stores (for SLP or interleaving), free their
11624 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
11626 stmt_vec_info next_stmt_info
= first_stmt_info
;
11628 while (next_stmt_info
)
11630 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
11631 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
11632 /* Free the attached stmt_vec_info and remove the stmt. */
11633 vinfo
->remove_stmt (next_stmt_info
);
11634 next_stmt_info
= tmp
;
11638 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11639 elements of type SCALAR_TYPE, or null if the target doesn't support
11642 If NUNITS is zero, return a vector type that contains elements of
11643 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11645 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11646 for this vectorization region and want to "autodetect" the best choice.
11647 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11648 and we want the new type to be interoperable with it. PREVAILING_MODE
11649 in this case can be a scalar integer mode or a vector mode; when it
11650 is a vector mode, the function acts like a tree-level version of
11651 related_vector_mode. */
11654 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
11655 tree scalar_type
, poly_uint64 nunits
)
11657 tree orig_scalar_type
= scalar_type
;
11658 scalar_mode inner_mode
;
11659 machine_mode simd_mode
;
11662 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
11663 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
11666 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
11668 /* Interoperability between modes requires one to be a constant multiple
11669 of the other, so that the number of vectors required for each operation
11670 is a compile-time constant. */
11671 if (prevailing_mode
!= VOIDmode
11672 && !constant_multiple_p (nunits
* nbytes
,
11673 GET_MODE_SIZE (prevailing_mode
))
11674 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode
),
11678 /* For vector types of elements whose mode precision doesn't
11679 match their types precision we use a element type of mode
11680 precision. The vectorization routines will have to make sure
11681 they support the proper result truncation/extension.
11682 We also make sure to build vector types with INTEGER_TYPE
11683 component type only. */
11684 if (INTEGRAL_TYPE_P (scalar_type
)
11685 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
11686 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
11687 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
11688 TYPE_UNSIGNED (scalar_type
));
11690 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11691 When the component mode passes the above test simply use a type
11692 corresponding to that mode. The theory is that any use that
11693 would cause problems with this will disable vectorization anyway. */
11694 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
11695 && !INTEGRAL_TYPE_P (scalar_type
))
11696 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
11698 /* We can't build a vector type of elements with alignment bigger than
11700 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
11701 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
11702 TYPE_UNSIGNED (scalar_type
));
11704 /* If we felt back to using the mode fail if there was
11705 no scalar type for it. */
11706 if (scalar_type
== NULL_TREE
)
11709 /* If no prevailing mode was supplied, use the mode the target prefers.
11710 Otherwise lookup a vector mode based on the prevailing mode. */
11711 if (prevailing_mode
== VOIDmode
)
11713 gcc_assert (known_eq (nunits
, 0U));
11714 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
11715 if (SCALAR_INT_MODE_P (simd_mode
))
11717 /* Traditional behavior is not to take the integer mode
11718 literally, but simply to use it as a way of determining
11719 the vector size. It is up to mode_for_vector to decide
11720 what the TYPE_MODE should be.
11722 Note that nunits == 1 is allowed in order to support single
11723 element vector types. */
11724 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
11725 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11729 else if (SCALAR_INT_MODE_P (prevailing_mode
)
11730 || !related_vector_mode (prevailing_mode
,
11731 inner_mode
, nunits
).exists (&simd_mode
))
11733 /* Fall back to using mode_for_vector, mostly in the hope of being
11734 able to use an integer mode. */
11735 if (known_eq (nunits
, 0U)
11736 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
11739 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11743 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
11745 /* In cases where the mode was chosen by mode_for_vector, check that
11746 the target actually supports the chosen mode, or that it at least
11747 allows the vector mode to be replaced by a like-sized integer. */
11748 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11749 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11752 /* Re-attach the address-space qualifier if we canonicalized the scalar
11754 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11755 return build_qualified_type
11756 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11761 /* Function get_vectype_for_scalar_type.
11763 Returns the vector type corresponding to SCALAR_TYPE as supported
11764 by the target. If GROUP_SIZE is nonzero and we're performing BB
11765 vectorization, make sure that the number of elements in the vector
11766 is no bigger than GROUP_SIZE. */
11769 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11770 unsigned int group_size
)
11772 /* For BB vectorization, we should always have a group size once we've
11773 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11774 are tentative requests during things like early data reference
11775 analysis and pattern recognition. */
11776 if (is_a
<bb_vec_info
> (vinfo
))
11777 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11781 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11783 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
11784 vinfo
->vector_mode
= TYPE_MODE (vectype
);
11786 /* Register the natural choice of vector type, before the group size
11787 has been applied. */
11789 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
11791 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11792 try again with an explicit number of elements. */
11795 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
11797 /* Start with the biggest number of units that fits within
11798 GROUP_SIZE and halve it until we find a valid vector type.
11799 Usually either the first attempt will succeed or all will
11800 fail (in the latter case because GROUP_SIZE is too small
11801 for the target), but it's possible that a target could have
11802 a hole between supported vector types.
11804 If GROUP_SIZE is not a power of 2, this has the effect of
11805 trying the largest power of 2 that fits within the group,
11806 even though the group is not a multiple of that vector size.
11807 The BB vectorizer will then try to carve up the group into
11809 unsigned int nunits
= 1 << floor_log2 (group_size
);
11812 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11813 scalar_type
, nunits
);
11816 while (nunits
> 1 && !vectype
);
11822 /* Return the vector type corresponding to SCALAR_TYPE as supported
11823 by the target. NODE, if nonnull, is the SLP tree node that will
11824 use the returned vector type. */
11827 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
11829 unsigned int group_size
= 0;
11831 group_size
= SLP_TREE_LANES (node
);
11832 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11835 /* Function get_mask_type_for_scalar_type.
11837 Returns the mask type corresponding to a result of comparison
11838 of vectors of specified SCALAR_TYPE as supported by target.
11839 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11840 make sure that the number of elements in the vector is no bigger
11841 than GROUP_SIZE. */
11844 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11845 unsigned int group_size
)
11847 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11852 return truth_type_for (vectype
);
11855 /* Function get_same_sized_vectype
11857 Returns a vector type corresponding to SCALAR_TYPE of size
11858 VECTOR_TYPE if supported by the target. */
11861 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11863 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11864 return truth_type_for (vector_type
);
11866 poly_uint64 nunits
;
11867 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
11868 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
11871 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
11872 scalar_type
, nunits
);
11875 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11876 would not change the chosen vector modes. */
11879 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
11881 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
11882 i
!= vinfo
->used_vector_modes
.end (); ++i
)
11883 if (!VECTOR_MODE_P (*i
)
11884 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
11889 /* Function vect_is_simple_use.
11892 VINFO - the vect info of the loop or basic block that is being vectorized.
11893 OPERAND - operand in the loop or bb.
11895 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11896 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11897 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11898 the definition could be anywhere in the function
11899 DT - the type of definition
11901 Returns whether a stmt with OPERAND can be vectorized.
11902 For loops, supportable operands are constants, loop invariants, and operands
11903 that are defined by the current iteration of the loop. Unsupportable
11904 operands are those that are defined by a previous iteration of the loop (as
11905 is the case in reduction/induction computations).
11906 For basic blocks, supportable operands are constants and bb invariants.
11907 For now, operands defined outside the basic block are not supported. */
11910 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11911 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11913 if (def_stmt_info_out
)
11914 *def_stmt_info_out
= NULL
;
11916 *def_stmt_out
= NULL
;
11917 *dt
= vect_unknown_def_type
;
11919 if (dump_enabled_p ())
11921 dump_printf_loc (MSG_NOTE
, vect_location
,
11922 "vect_is_simple_use: operand ");
11923 if (TREE_CODE (operand
) == SSA_NAME
11924 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11925 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11927 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11930 if (CONSTANT_CLASS_P (operand
))
11931 *dt
= vect_constant_def
;
11932 else if (is_gimple_min_invariant (operand
))
11933 *dt
= vect_external_def
;
11934 else if (TREE_CODE (operand
) != SSA_NAME
)
11935 *dt
= vect_unknown_def_type
;
11936 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11937 *dt
= vect_external_def
;
11940 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11941 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11943 *dt
= vect_external_def
;
11946 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11947 def_stmt
= stmt_vinfo
->stmt
;
11948 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11949 if (def_stmt_info_out
)
11950 *def_stmt_info_out
= stmt_vinfo
;
11953 *def_stmt_out
= def_stmt
;
11956 if (dump_enabled_p ())
11958 dump_printf (MSG_NOTE
, ", type of def: ");
11961 case vect_uninitialized_def
:
11962 dump_printf (MSG_NOTE
, "uninitialized\n");
11964 case vect_constant_def
:
11965 dump_printf (MSG_NOTE
, "constant\n");
11967 case vect_external_def
:
11968 dump_printf (MSG_NOTE
, "external\n");
11970 case vect_internal_def
:
11971 dump_printf (MSG_NOTE
, "internal\n");
11973 case vect_induction_def
:
11974 dump_printf (MSG_NOTE
, "induction\n");
11976 case vect_reduction_def
:
11977 dump_printf (MSG_NOTE
, "reduction\n");
11979 case vect_double_reduction_def
:
11980 dump_printf (MSG_NOTE
, "double reduction\n");
11982 case vect_nested_cycle
:
11983 dump_printf (MSG_NOTE
, "nested cycle\n");
11985 case vect_first_order_recurrence
:
11986 dump_printf (MSG_NOTE
, "first order recurrence\n");
11988 case vect_unknown_def_type
:
11989 dump_printf (MSG_NOTE
, "unknown\n");
11994 if (*dt
== vect_unknown_def_type
)
11996 if (dump_enabled_p ())
11997 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11998 "Unsupported pattern.\n");
12005 /* Function vect_is_simple_use.
12007 Same as vect_is_simple_use but also determines the vector operand
12008 type of OPERAND and stores it to *VECTYPE. If the definition of
12009 OPERAND is vect_uninitialized_def, vect_constant_def or
12010 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
12011 is responsible to compute the best suited vector type for the
12015 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
12016 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
12017 gimple
**def_stmt_out
)
12019 stmt_vec_info def_stmt_info
;
12021 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
12025 *def_stmt_out
= def_stmt
;
12026 if (def_stmt_info_out
)
12027 *def_stmt_info_out
= def_stmt_info
;
12029 /* Now get a vector type if the def is internal, otherwise supply
12030 NULL_TREE and leave it up to the caller to figure out a proper
12031 type for the use stmt. */
12032 if (*dt
== vect_internal_def
12033 || *dt
== vect_induction_def
12034 || *dt
== vect_reduction_def
12035 || *dt
== vect_double_reduction_def
12036 || *dt
== vect_nested_cycle
12037 || *dt
== vect_first_order_recurrence
)
12039 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
12040 gcc_assert (*vectype
!= NULL_TREE
);
12041 if (dump_enabled_p ())
12042 dump_printf_loc (MSG_NOTE
, vect_location
,
12043 "vect_is_simple_use: vectype %T\n", *vectype
);
12045 else if (*dt
== vect_uninitialized_def
12046 || *dt
== vect_constant_def
12047 || *dt
== vect_external_def
)
12048 *vectype
= NULL_TREE
;
12050 gcc_unreachable ();
12055 /* Function vect_is_simple_use.
12057 Same as vect_is_simple_use but determines the operand by operand
12058 position OPERAND from either STMT or SLP_NODE, filling in *OP
12059 and *SLP_DEF (when SLP_NODE is not NULL). */
12062 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
12063 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
12064 enum vect_def_type
*dt
,
12065 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
12069 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
12071 *vectype
= SLP_TREE_VECTYPE (child
);
12072 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
12074 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
12075 return vect_is_simple_use (*op
, vinfo
, dt
, def_stmt_info_out
);
12079 if (def_stmt_info_out
)
12080 *def_stmt_info_out
= NULL
;
12081 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
12082 *dt
= SLP_TREE_DEF_TYPE (child
);
12089 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
12091 if (gimple_assign_rhs_code (ass
) == COND_EXPR
12092 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
12095 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
12097 *op
= gimple_op (ass
, operand
);
12099 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
12100 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
12102 *op
= gimple_op (ass
, operand
+ 1);
12104 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
12105 *op
= gimple_call_arg (call
, operand
);
12107 gcc_unreachable ();
12108 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
12112 /* If OP is not NULL and is external or constant update its vector
12113 type with VECTYPE. Returns true if successful or false if not,
12114 for example when conflicting vector types are present. */
12117 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
12119 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
12121 if (SLP_TREE_VECTYPE (op
))
12122 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
12123 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
12124 should be handled by patters. Allow vect_constant_def for now. */
12125 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
12126 && SLP_TREE_DEF_TYPE (op
) == vect_external_def
)
12128 SLP_TREE_VECTYPE (op
) = vectype
;
12132 /* Function supportable_widening_operation
12134 Check whether an operation represented by the code CODE is a
12135 widening operation that is supported by the target platform in
12136 vector form (i.e., when operating on arguments of type VECTYPE_IN
12137 producing a result of type VECTYPE_OUT).
12139 Widening operations we currently support are NOP (CONVERT), FLOAT,
12140 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
12141 are supported by the target platform either directly (via vector
12142 tree-codes), or via target builtins.
12145 - CODE1 and CODE2 are codes of vector operations to be used when
12146 vectorizing the operation, if available.
12147 - MULTI_STEP_CVT determines the number of required intermediate steps in
12148 case of multi-step conversion (like char->short->int - in that case
12149 MULTI_STEP_CVT will be 1).
12150 - INTERM_TYPES contains the intermediate type required to perform the
12151 widening operation (short in the above example). */
12154 supportable_widening_operation (vec_info
*vinfo
,
12155 enum tree_code code
, stmt_vec_info stmt_info
,
12156 tree vectype_out
, tree vectype_in
,
12157 enum tree_code
*code1
, enum tree_code
*code2
,
12158 int *multi_step_cvt
,
12159 vec
<tree
> *interm_types
)
12161 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
12162 class loop
*vect_loop
= NULL
;
12163 machine_mode vec_mode
;
12164 enum insn_code icode1
, icode2
;
12165 optab optab1
, optab2
;
12166 tree vectype
= vectype_in
;
12167 tree wide_vectype
= vectype_out
;
12168 enum tree_code c1
, c2
;
12170 tree prev_type
, intermediate_type
;
12171 machine_mode intermediate_mode
, prev_mode
;
12172 optab optab3
, optab4
;
12174 *multi_step_cvt
= 0;
12176 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
12180 case WIDEN_MULT_EXPR
:
12181 /* The result of a vectorized widening operation usually requires
12182 two vectors (because the widened results do not fit into one vector).
12183 The generated vector results would normally be expected to be
12184 generated in the same order as in the original scalar computation,
12185 i.e. if 8 results are generated in each vector iteration, they are
12186 to be organized as follows:
12187 vect1: [res1,res2,res3,res4],
12188 vect2: [res5,res6,res7,res8].
12190 However, in the special case that the result of the widening
12191 operation is used in a reduction computation only, the order doesn't
12192 matter (because when vectorizing a reduction we change the order of
12193 the computation). Some targets can take advantage of this and
12194 generate more efficient code. For example, targets like Altivec,
12195 that support widen_mult using a sequence of {mult_even,mult_odd}
12196 generate the following vectors:
12197 vect1: [res1,res3,res5,res7],
12198 vect2: [res2,res4,res6,res8].
12200 When vectorizing outer-loops, we execute the inner-loop sequentially
12201 (each vectorized inner-loop iteration contributes to VF outer-loop
12202 iterations in parallel). We therefore don't allow to change the
12203 order of the computation in the inner-loop during outer-loop
12205 /* TODO: Another case in which order doesn't *really* matter is when we
12206 widen and then contract again, e.g. (short)((int)x * y >> 8).
12207 Normally, pack_trunc performs an even/odd permute, whereas the
12208 repack from an even/odd expansion would be an interleave, which
12209 would be significantly simpler for e.g. AVX2. */
12210 /* In any case, in order to avoid duplicating the code below, recurse
12211 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
12212 are properly set up for the caller. If we fail, we'll continue with
12213 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
12215 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
12216 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
12217 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
12218 stmt_info
, vectype_out
,
12219 vectype_in
, code1
, code2
,
12220 multi_step_cvt
, interm_types
))
12222 /* Elements in a vector with vect_used_by_reduction property cannot
12223 be reordered if the use chain with this property does not have the
12224 same operation. One such an example is s += a * b, where elements
12225 in a and b cannot be reordered. Here we check if the vector defined
12226 by STMT is only directly used in the reduction statement. */
12227 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
12228 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
12230 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
12233 c1
= VEC_WIDEN_MULT_LO_EXPR
;
12234 c2
= VEC_WIDEN_MULT_HI_EXPR
;
12237 case DOT_PROD_EXPR
:
12238 c1
= DOT_PROD_EXPR
;
12239 c2
= DOT_PROD_EXPR
;
12247 case VEC_WIDEN_MULT_EVEN_EXPR
:
12248 /* Support the recursion induced just above. */
12249 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
12250 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
12253 case WIDEN_LSHIFT_EXPR
:
12254 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
12255 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
12258 case WIDEN_PLUS_EXPR
:
12259 c1
= VEC_WIDEN_PLUS_LO_EXPR
;
12260 c2
= VEC_WIDEN_PLUS_HI_EXPR
;
12263 case WIDEN_MINUS_EXPR
:
12264 c1
= VEC_WIDEN_MINUS_LO_EXPR
;
12265 c2
= VEC_WIDEN_MINUS_HI_EXPR
;
12269 c1
= VEC_UNPACK_LO_EXPR
;
12270 c2
= VEC_UNPACK_HI_EXPR
;
12274 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
12275 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
12278 case FIX_TRUNC_EXPR
:
12279 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
12280 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
12284 gcc_unreachable ();
12287 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
12288 std::swap (c1
, c2
);
12290 if (code
== FIX_TRUNC_EXPR
)
12292 /* The signedness is determined from output operand. */
12293 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12294 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
12296 else if (CONVERT_EXPR_CODE_P (code
)
12297 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
12298 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12299 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
12300 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
12302 /* If the input and result modes are the same, a different optab
12303 is needed where we pass in the number of units in vectype. */
12304 optab1
= vec_unpacks_sbool_lo_optab
;
12305 optab2
= vec_unpacks_sbool_hi_optab
;
12309 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12310 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
12313 if (!optab1
|| !optab2
)
12316 vec_mode
= TYPE_MODE (vectype
);
12317 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
12318 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
12324 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12325 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12327 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12329 /* For scalar masks we may have different boolean
12330 vector types having the same QImode. Thus we
12331 add additional check for elements number. */
12332 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
12333 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12337 /* Check if it's a multi-step conversion that can be done using intermediate
12340 prev_type
= vectype
;
12341 prev_mode
= vec_mode
;
12343 if (!CONVERT_EXPR_CODE_P (code
))
12346 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12347 intermediate steps in promotion sequence. We try
12348 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12350 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12351 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12353 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12354 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12356 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
12357 else if (VECTOR_MODE_P (intermediate_mode
))
12359 tree intermediate_element_type
12360 = lang_hooks
.types
.type_for_mode (GET_MODE_INNER (intermediate_mode
),
12361 TYPE_UNSIGNED (prev_type
));
12363 = build_vector_type_for_mode (intermediate_element_type
,
12364 intermediate_mode
);
12368 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
12369 TYPE_UNSIGNED (prev_type
));
12371 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12372 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12373 && intermediate_mode
== prev_mode
12374 && SCALAR_INT_MODE_P (prev_mode
))
12376 /* If the input and result modes are the same, a different optab
12377 is needed where we pass in the number of units in vectype. */
12378 optab3
= vec_unpacks_sbool_lo_optab
;
12379 optab4
= vec_unpacks_sbool_hi_optab
;
12383 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12384 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
12387 if (!optab3
|| !optab4
12388 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
12389 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12390 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
12391 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
12392 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
12393 == CODE_FOR_nothing
)
12394 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
12395 == CODE_FOR_nothing
))
12398 interm_types
->quick_push (intermediate_type
);
12399 (*multi_step_cvt
)++;
12401 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12402 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12404 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12406 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
12407 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12411 prev_type
= intermediate_type
;
12412 prev_mode
= intermediate_mode
;
12415 interm_types
->release ();
12420 /* Function supportable_narrowing_operation
12422 Check whether an operation represented by the code CODE is a
12423 narrowing operation that is supported by the target platform in
12424 vector form (i.e., when operating on arguments of type VECTYPE_IN
12425 and producing a result of type VECTYPE_OUT).
12427 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12428 and FLOAT. This function checks if these operations are supported by
12429 the target platform directly via vector tree-codes.
12432 - CODE1 is the code of a vector operation to be used when
12433 vectorizing the operation, if available.
12434 - MULTI_STEP_CVT determines the number of required intermediate steps in
12435 case of multi-step conversion (like int->short->char - in that case
12436 MULTI_STEP_CVT will be 1).
12437 - INTERM_TYPES contains the intermediate type required to perform the
12438 narrowing operation (short in the above example). */
12441 supportable_narrowing_operation (enum tree_code code
,
12442 tree vectype_out
, tree vectype_in
,
12443 enum tree_code
*code1
, int *multi_step_cvt
,
12444 vec
<tree
> *interm_types
)
12446 machine_mode vec_mode
;
12447 enum insn_code icode1
;
12448 optab optab1
, interm_optab
;
12449 tree vectype
= vectype_in
;
12450 tree narrow_vectype
= vectype_out
;
12452 tree intermediate_type
, prev_type
;
12453 machine_mode intermediate_mode
, prev_mode
;
12455 unsigned HOST_WIDE_INT n_elts
;
12458 *multi_step_cvt
= 0;
12462 c1
= VEC_PACK_TRUNC_EXPR
;
12463 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
12464 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12465 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
))
12466 && TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&n_elts
)
12467 && n_elts
< BITS_PER_UNIT
)
12468 optab1
= vec_pack_sbool_trunc_optab
;
12470 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12473 case FIX_TRUNC_EXPR
:
12474 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
12475 /* The signedness is determined from output operand. */
12476 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12480 c1
= VEC_PACK_FLOAT_EXPR
;
12481 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12485 gcc_unreachable ();
12491 vec_mode
= TYPE_MODE (vectype
);
12492 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
12497 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12499 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12501 /* For scalar masks we may have different boolean
12502 vector types having the same QImode. Thus we
12503 add additional check for elements number. */
12504 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
12505 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12509 if (code
== FLOAT_EXPR
)
12512 /* Check if it's a multi-step conversion that can be done using intermediate
12514 prev_mode
= vec_mode
;
12515 prev_type
= vectype
;
12516 if (code
== FIX_TRUNC_EXPR
)
12517 uns
= TYPE_UNSIGNED (vectype_out
);
12519 uns
= TYPE_UNSIGNED (vectype
);
12521 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12522 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12523 costly than signed. */
12524 if (code
== FIX_TRUNC_EXPR
&& uns
)
12526 enum insn_code icode2
;
12529 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
12531 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12532 if (interm_optab
!= unknown_optab
12533 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
12534 && insn_data
[icode1
].operand
[0].mode
12535 == insn_data
[icode2
].operand
[0].mode
)
12538 optab1
= interm_optab
;
12543 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12544 intermediate steps in promotion sequence. We try
12545 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12546 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12547 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12549 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12550 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12552 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
12555 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
12556 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12557 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12558 && SCALAR_INT_MODE_P (prev_mode
)
12559 && TYPE_VECTOR_SUBPARTS (intermediate_type
).is_constant (&n_elts
)
12560 && n_elts
< BITS_PER_UNIT
)
12561 interm_optab
= vec_pack_sbool_trunc_optab
;
12564 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
12567 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
12568 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12569 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
12570 == CODE_FOR_nothing
))
12573 interm_types
->quick_push (intermediate_type
);
12574 (*multi_step_cvt
)++;
12576 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12578 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12580 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
12581 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12585 prev_mode
= intermediate_mode
;
12586 prev_type
= intermediate_type
;
12587 optab1
= interm_optab
;
12590 interm_types
->release ();
12594 /* Generate and return a vector mask of MASK_TYPE such that
12595 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12596 Add the statements to SEQ. */
12599 vect_gen_while (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12600 tree end_index
, const char *name
)
12602 tree cmp_type
= TREE_TYPE (start_index
);
12603 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
12604 cmp_type
, mask_type
,
12605 OPTIMIZE_FOR_SPEED
));
12606 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
12607 start_index
, end_index
,
12608 build_zero_cst (mask_type
));
12611 tmp
= make_temp_ssa_name (mask_type
, NULL
, name
);
12613 tmp
= make_ssa_name (mask_type
);
12614 gimple_call_set_lhs (call
, tmp
);
12615 gimple_seq_add_stmt (seq
, call
);
12619 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12620 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12623 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12626 tree tmp
= vect_gen_while (seq
, mask_type
, start_index
, end_index
);
12627 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
12630 /* Try to compute the vector types required to vectorize STMT_INFO,
12631 returning true on success and false if vectorization isn't possible.
12632 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12633 take sure that the number of elements in the vectors is no bigger
12638 - Set *STMT_VECTYPE_OUT to:
12639 - NULL_TREE if the statement doesn't need to be vectorized;
12640 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12642 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12643 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12644 statement does not help to determine the overall number of units. */
12647 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12648 tree
*stmt_vectype_out
,
12649 tree
*nunits_vectype_out
,
12650 unsigned int group_size
)
12652 gimple
*stmt
= stmt_info
->stmt
;
12654 /* For BB vectorization, we should always have a group size once we've
12655 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12656 are tentative requests during things like early data reference
12657 analysis and pattern recognition. */
12658 if (is_a
<bb_vec_info
> (vinfo
))
12659 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
12663 *stmt_vectype_out
= NULL_TREE
;
12664 *nunits_vectype_out
= NULL_TREE
;
12666 if (gimple_get_lhs (stmt
) == NULL_TREE
12667 /* MASK_STORE has no lhs, but is ok. */
12668 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12670 if (is_a
<gcall
*> (stmt
))
12672 /* Ignore calls with no lhs. These must be calls to
12673 #pragma omp simd functions, and what vectorization factor
12674 it really needs can't be determined until
12675 vectorizable_simd_clone_call. */
12676 if (dump_enabled_p ())
12677 dump_printf_loc (MSG_NOTE
, vect_location
,
12678 "defer to SIMD clone analysis.\n");
12679 return opt_result::success ();
12682 return opt_result::failure_at (stmt
,
12683 "not vectorized: irregular stmt.%G", stmt
);
12687 tree scalar_type
= NULL_TREE
;
12688 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
12690 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12691 if (dump_enabled_p ())
12692 dump_printf_loc (MSG_NOTE
, vect_location
,
12693 "precomputed vectype: %T\n", vectype
);
12695 else if (vect_use_mask_type_p (stmt_info
))
12697 unsigned int precision
= stmt_info
->mask_precision
;
12698 scalar_type
= build_nonstandard_integer_type (precision
, 1);
12699 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
12701 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
12702 " data-type %T\n", scalar_type
);
12703 if (dump_enabled_p ())
12704 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12708 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
12709 scalar_type
= TREE_TYPE (DR_REF (dr
));
12710 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12711 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
12713 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
12715 if (dump_enabled_p ())
12718 dump_printf_loc (MSG_NOTE
, vect_location
,
12719 "get vectype for scalar type (group size %d):"
12720 " %T\n", group_size
, scalar_type
);
12722 dump_printf_loc (MSG_NOTE
, vect_location
,
12723 "get vectype for scalar type: %T\n", scalar_type
);
12725 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
12727 return opt_result::failure_at (stmt
,
12729 " unsupported data-type %T\n",
12732 if (dump_enabled_p ())
12733 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12736 if (scalar_type
&& VECTOR_MODE_P (TYPE_MODE (scalar_type
)))
12737 return opt_result::failure_at (stmt
,
12738 "not vectorized: vector stmt in loop:%G",
12741 *stmt_vectype_out
= vectype
;
12743 /* Don't try to compute scalar types if the stmt produces a boolean
12744 vector; use the existing vector type instead. */
12745 tree nunits_vectype
= vectype
;
12746 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12748 /* The number of units is set according to the smallest scalar
12749 type (or the largest vector size, but we only support one
12750 vector size per vectorization). */
12751 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
12752 TREE_TYPE (vectype
));
12753 if (scalar_type
!= TREE_TYPE (vectype
))
12755 if (dump_enabled_p ())
12756 dump_printf_loc (MSG_NOTE
, vect_location
,
12757 "get vectype for smallest scalar type: %T\n",
12759 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12761 if (!nunits_vectype
)
12762 return opt_result::failure_at
12763 (stmt
, "not vectorized: unsupported data-type %T\n",
12765 if (dump_enabled_p ())
12766 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
12771 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
12772 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)))
12773 return opt_result::failure_at (stmt
,
12774 "Not vectorized: Incompatible number "
12775 "of vector subparts between %T and %T\n",
12776 nunits_vectype
, *stmt_vectype_out
);
12778 if (dump_enabled_p ())
12780 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12781 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12782 dump_printf (MSG_NOTE
, "\n");
12785 *nunits_vectype_out
= nunits_vectype
;
12786 return opt_result::success ();
12789 /* Generate and return statement sequence that sets vector length LEN that is:
12791 min_of_start_and_end = min (START_INDEX, END_INDEX);
12792 left_len = END_INDEX - min_of_start_and_end;
12793 rhs = min (left_len, LEN_LIMIT);
12796 Note: the cost of the code generated by this function is modeled
12797 by vect_estimate_min_profitable_iters, so changes here may need
12798 corresponding changes there. */
12801 vect_gen_len (tree len
, tree start_index
, tree end_index
, tree len_limit
)
12803 gimple_seq stmts
= NULL
;
12804 tree len_type
= TREE_TYPE (len
);
12805 gcc_assert (TREE_TYPE (start_index
) == len_type
);
12807 tree min
= gimple_build (&stmts
, MIN_EXPR
, len_type
, start_index
, end_index
);
12808 tree left_len
= gimple_build (&stmts
, MINUS_EXPR
, len_type
, end_index
, min
);
12809 tree rhs
= gimple_build (&stmts
, MIN_EXPR
, len_type
, left_len
, len_limit
);
12810 gimple
* stmt
= gimple_build_assign (len
, rhs
);
12811 gimple_seq_add_stmt (&stmts
, stmt
);