1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
,
95 stmt_vec_info stmt_info
, slp_tree node
,
96 tree vectype
, int misalign
,
97 enum vect_cost_model_location where
)
99 if ((kind
== vector_load
|| kind
== unaligned_load
)
100 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
101 kind
= vector_gather_load
;
102 if ((kind
== vector_store
|| kind
== unaligned_store
)
103 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
104 kind
= vector_scatter_store
;
106 stmt_info_for_cost si
107 = { count
, kind
, where
, stmt_info
, node
, vectype
, misalign
};
108 body_cost_vec
->safe_push (si
);
111 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
115 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
116 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
117 tree vectype
, int misalign
,
118 enum vect_cost_model_location where
)
120 return record_stmt_cost (body_cost_vec
, count
, kind
, stmt_info
, NULL
,
121 vectype
, misalign
, where
);
125 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
126 enum vect_cost_for_stmt kind
, slp_tree node
,
127 tree vectype
, int misalign
,
128 enum vect_cost_model_location where
)
130 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, node
,
131 vectype
, misalign
, where
);
135 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
136 enum vect_cost_for_stmt kind
,
137 enum vect_cost_model_location where
)
139 gcc_assert (kind
== cond_branch_taken
|| kind
== cond_branch_not_taken
140 || kind
== scalar_stmt
);
141 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, NULL
,
142 NULL_TREE
, 0, where
);
145 /* Return a variable of type ELEM_TYPE[NELEMS]. */
148 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
150 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
154 /* ARRAY is an array of vectors created by create_vector_array.
155 Return an SSA_NAME for the vector in index N. The reference
156 is part of the vectorization of STMT_INFO and the vector is associated
157 with scalar destination SCALAR_DEST. */
160 read_vector_array (vec_info
*vinfo
,
161 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
162 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
164 tree vect_type
, vect
, vect_name
, array_ref
;
167 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
168 vect_type
= TREE_TYPE (TREE_TYPE (array
));
169 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
170 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
171 build_int_cst (size_type_node
, n
),
172 NULL_TREE
, NULL_TREE
);
174 new_stmt
= gimple_build_assign (vect
, array_ref
);
175 vect_name
= make_ssa_name (vect
, new_stmt
);
176 gimple_assign_set_lhs (new_stmt
, vect_name
);
177 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
182 /* ARRAY is an array of vectors created by create_vector_array.
183 Emit code to store SSA_NAME VECT in index N of the array.
184 The store is part of the vectorization of STMT_INFO. */
187 write_vector_array (vec_info
*vinfo
,
188 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
189 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
194 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
195 build_int_cst (size_type_node
, n
),
196 NULL_TREE
, NULL_TREE
);
198 new_stmt
= gimple_build_assign (array_ref
, vect
);
199 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
202 /* PTR is a pointer to an array of type TYPE. Return a representation
203 of *PTR. The memory reference replaces those in FIRST_DR
207 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
211 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
212 /* Arrays have the same alignment as their type. */
213 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
217 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
218 Emit the clobber before *GSI. */
221 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
222 gimple_stmt_iterator
*gsi
, tree var
)
224 tree clobber
= build_clobber (TREE_TYPE (var
));
225 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
226 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
229 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
231 /* Function vect_mark_relevant.
233 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
236 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
237 enum vect_relevant relevant
, bool live_p
)
239 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
240 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
242 if (dump_enabled_p ())
243 dump_printf_loc (MSG_NOTE
, vect_location
,
244 "mark relevant %d, live %d: %G", relevant
, live_p
,
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
253 /* This is the last stmt in a sequence that was detected as a
254 pattern that can potentially be vectorized. Don't mark the stmt
255 as relevant/live because it's not going to be vectorized.
256 Instead mark the pattern-stmt that replaces it. */
258 if (dump_enabled_p ())
259 dump_printf_loc (MSG_NOTE
, vect_location
,
260 "last stmt in pattern. don't mark"
261 " relevant/live.\n");
262 stmt_vec_info old_stmt_info
= stmt_info
;
263 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
264 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
265 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
266 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
269 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
270 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
271 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
273 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
274 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
276 if (dump_enabled_p ())
277 dump_printf_loc (MSG_NOTE
, vect_location
,
278 "already marked relevant/live.\n");
282 worklist
->safe_push (stmt_info
);
286 /* Function is_simple_and_all_uses_invariant
288 Return true if STMT_INFO is simple and all uses of it are invariant. */
291 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
292 loop_vec_info loop_vinfo
)
297 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
301 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
303 enum vect_def_type dt
= vect_uninitialized_def
;
305 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
309 "use not simple.\n");
313 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
319 /* Function vect_stmt_relevant_p.
321 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
322 is "relevant for vectorization".
324 A stmt is considered "relevant for vectorization" if:
325 - it has uses outside the loop.
326 - it has vdefs (it alters memory).
327 - control stmts in the loop (except for the exit condition).
329 CHECKME: what other side effects would the vectorizer allow? */
332 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
333 enum vect_relevant
*relevant
, bool *live_p
)
335 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
337 imm_use_iterator imm_iter
;
341 *relevant
= vect_unused_in_scope
;
344 /* cond stmt other than loop exit cond. */
345 if (is_ctrl_stmt (stmt_info
->stmt
)
346 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
347 *relevant
= vect_used_in_scope
;
349 /* changing memory. */
350 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
351 if (gimple_vdef (stmt_info
->stmt
)
352 && !gimple_clobber_p (stmt_info
->stmt
))
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE
, vect_location
,
356 "vec_stmt_relevant_p: stmt has vdefs.\n");
357 *relevant
= vect_used_in_scope
;
360 /* uses outside the loop. */
361 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
363 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
365 basic_block bb
= gimple_bb (USE_STMT (use_p
));
366 if (!flow_bb_inside_loop_p (loop
, bb
))
368 if (is_gimple_debug (USE_STMT (use_p
)))
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE
, vect_location
,
373 "vec_stmt_relevant_p: used out of loop.\n");
375 /* We expect all such uses to be in the loop exit phis
376 (because of loop closed form) */
377 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
378 gcc_assert (bb
== single_exit (loop
)->dest
);
385 if (*live_p
&& *relevant
== vect_unused_in_scope
386 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE
, vect_location
,
390 "vec_stmt_relevant_p: stmt live but not relevant.\n");
391 *relevant
= vect_used_only_live
;
394 return (*live_p
|| *relevant
);
398 /* Function exist_non_indexing_operands_for_use_p
400 USE is one of the uses attached to STMT_INFO. Check if USE is
401 used in STMT_INFO for anything other than indexing an array. */
404 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
408 /* USE corresponds to some operand in STMT. If there is no data
409 reference in STMT, then any operand that corresponds to USE
410 is not indexing an array. */
411 if (!STMT_VINFO_DATA_REF (stmt_info
))
414 /* STMT has a data_ref. FORNOW this means that its of one of
418 (This should have been verified in analyze_data_refs).
420 'var' in the second case corresponds to a def, not a use,
421 so USE cannot correspond to any operands that are not used
424 Therefore, all we need to check is if STMT falls into the
425 first case, and whether var corresponds to USE. */
427 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
428 if (!assign
|| !gimple_assign_copy_p (assign
))
430 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
431 if (call
&& gimple_call_internal_p (call
))
433 internal_fn ifn
= gimple_call_internal_fn (call
);
434 int mask_index
= internal_fn_mask_index (ifn
);
436 && use
== gimple_call_arg (call
, mask_index
))
438 int stored_value_index
= internal_fn_stored_value_index (ifn
);
439 if (stored_value_index
>= 0
440 && use
== gimple_call_arg (call
, stored_value_index
))
442 if (internal_gather_scatter_fn_p (ifn
)
443 && use
== gimple_call_arg (call
, 1))
449 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
451 operand
= gimple_assign_rhs1 (assign
);
452 if (TREE_CODE (operand
) != SSA_NAME
)
463 Function process_use.
466 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
467 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
468 that defined USE. This is done by calling mark_relevant and passing it
469 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
470 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
474 Generally, LIVE_P and RELEVANT are used to define the liveness and
475 relevance info of the DEF_STMT of this USE:
476 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
477 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
479 - case 1: If USE is used only for address computations (e.g. array indexing),
480 which does not need to be directly vectorized, then the liveness/relevance
481 of the respective DEF_STMT is left unchanged.
482 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
483 we skip DEF_STMT cause it had already been processed.
484 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
485 "relevant" will be modified accordingly.
487 Return true if everything is as expected. Return false otherwise. */
490 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
491 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
494 stmt_vec_info dstmt_vinfo
;
495 enum vect_def_type dt
;
497 /* case 1: we are only interested in uses that need to be vectorized. Uses
498 that are used for address computation are not considered relevant. */
499 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
500 return opt_result::success ();
502 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
503 return opt_result::failure_at (stmt_vinfo
->stmt
,
505 " unsupported use in stmt.\n");
508 return opt_result::success ();
510 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
511 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
513 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
514 We have to force the stmt live since the epilogue loop needs it to
515 continue computing the reduction. */
516 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
517 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
518 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
519 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
520 && bb
->loop_father
== def_bb
->loop_father
)
522 if (dump_enabled_p ())
523 dump_printf_loc (MSG_NOTE
, vect_location
,
524 "reduc-stmt defining reduc-phi in the same nest.\n");
525 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
526 return opt_result::success ();
529 /* case 3a: outer-loop stmt defining an inner-loop stmt:
530 outer-loop-header-bb:
536 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE
, vect_location
,
540 "outer-loop def-stmt defining inner-loop stmt.\n");
544 case vect_unused_in_scope
:
545 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
546 vect_used_in_scope
: vect_unused_in_scope
;
549 case vect_used_in_outer_by_reduction
:
550 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
551 relevant
= vect_used_by_reduction
;
554 case vect_used_in_outer
:
555 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
556 relevant
= vect_used_in_scope
;
559 case vect_used_in_scope
:
567 /* case 3b: inner-loop stmt defining an outer-loop stmt:
568 outer-loop-header-bb:
572 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
574 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
576 if (dump_enabled_p ())
577 dump_printf_loc (MSG_NOTE
, vect_location
,
578 "inner-loop def-stmt defining outer-loop stmt.\n");
582 case vect_unused_in_scope
:
583 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
584 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
585 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
588 case vect_used_by_reduction
:
589 case vect_used_only_live
:
590 relevant
= vect_used_in_outer_by_reduction
;
593 case vect_used_in_scope
:
594 relevant
= vect_used_in_outer
;
601 /* We are also not interested in uses on loop PHI backedges that are
602 inductions. Otherwise we'll needlessly vectorize the IV increment
603 and cause hybrid SLP for SLP inductions. Unless the PHI is live
605 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
606 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
607 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
608 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
609 loop_latch_edge (bb
->loop_father
))
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_NOTE
, vect_location
,
614 "induction value on backedge.\n");
615 return opt_result::success ();
619 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
620 return opt_result::success ();
624 /* Function vect_mark_stmts_to_be_vectorized.
626 Not all stmts in the loop need to be vectorized. For example:
635 Stmt 1 and 3 do not need to be vectorized, because loop control and
636 addressing of vectorized data-refs are handled differently.
638 This pass detects such stmts. */
641 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
643 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
644 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
645 unsigned int nbbs
= loop
->num_nodes
;
646 gimple_stmt_iterator si
;
650 enum vect_relevant relevant
;
652 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
654 auto_vec
<stmt_vec_info
, 64> worklist
;
656 /* 1. Init worklist. */
657 for (i
= 0; i
< nbbs
; i
++)
660 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
662 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
667 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
668 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
670 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
672 if (is_gimple_debug (gsi_stmt (si
)))
674 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
675 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE
, vect_location
,
677 "init: stmt relevant? %G", stmt_info
->stmt
);
679 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
680 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
684 /* 2. Process_worklist */
685 while (worklist
.length () > 0)
690 stmt_vec_info stmt_vinfo
= worklist
.pop ();
691 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE
, vect_location
,
693 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
695 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696 (DEF_STMT) as relevant/irrelevant according to the relevance property
698 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
700 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
701 propagated as is to the DEF_STMTs of its USEs.
703 One exception is when STMT has been identified as defining a reduction
704 variable; in this case we set the relevance to vect_used_by_reduction.
705 This is because we distinguish between two kinds of relevant stmts -
706 those that are used by a reduction computation, and those that are
707 (also) used by a regular computation. This allows us later on to
708 identify stmts that are used solely by a reduction, and therefore the
709 order of the results that they produce does not have to be kept. */
711 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
713 case vect_reduction_def
:
714 gcc_assert (relevant
!= vect_unused_in_scope
);
715 if (relevant
!= vect_unused_in_scope
716 && relevant
!= vect_used_in_scope
717 && relevant
!= vect_used_by_reduction
718 && relevant
!= vect_used_only_live
)
719 return opt_result::failure_at
720 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
723 case vect_nested_cycle
:
724 if (relevant
!= vect_unused_in_scope
725 && relevant
!= vect_used_in_outer_by_reduction
726 && relevant
!= vect_used_in_outer
)
727 return opt_result::failure_at
728 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
731 case vect_double_reduction_def
:
732 if (relevant
!= vect_unused_in_scope
733 && relevant
!= vect_used_by_reduction
734 && relevant
!= vect_used_only_live
)
735 return opt_result::failure_at
736 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
743 if (is_pattern_stmt_p (stmt_vinfo
))
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
750 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
751 tree op
= gimple_assign_rhs1 (assign
);
754 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
757 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
758 loop_vinfo
, relevant
, &worklist
, false);
761 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
762 loop_vinfo
, relevant
, &worklist
, false);
767 for (; i
< gimple_num_ops (assign
); i
++)
769 op
= gimple_op (assign
, i
);
770 if (TREE_CODE (op
) == SSA_NAME
)
773 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
780 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
782 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
784 tree arg
= gimple_call_arg (call
, i
);
786 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
794 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
796 tree op
= USE_FROM_PTR (use_p
);
798 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
806 gather_scatter_info gs_info
;
807 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
810 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
819 } /* while worklist */
821 return opt_result::success ();
824 /* Function vect_model_simple_cost.
826 Models cost for simple operations, i.e. those that only emit ncopies of a
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
831 vect_model_simple_cost (vec_info
*,
832 stmt_vec_info stmt_info
, int ncopies
,
833 enum vect_def_type
*dt
,
836 stmt_vector_for_cost
*cost_vec
,
837 vect_cost_for_stmt kind
= vector_stmt
)
839 int inside_cost
= 0, prologue_cost
= 0;
841 gcc_assert (cost_vec
!= NULL
);
843 /* ??? Somehow we need to fix this at the callers. */
845 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
848 /* Cost the "broadcast" of a scalar operand in to a vector operand.
849 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
851 for (int i
= 0; i
< ndts
; i
++)
852 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
853 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
854 stmt_info
, 0, vect_prologue
);
856 /* Pass the inside-of-loop statements to the target-specific cost model. */
857 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
858 stmt_info
, 0, vect_body
);
860 if (dump_enabled_p ())
861 dump_printf_loc (MSG_NOTE
, vect_location
,
862 "vect_model_simple_cost: inside_cost = %d, "
863 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
867 /* Model cost for type demotion and promotion operations. PWR is
868 normally zero for single-step promotions and demotions. It will be
869 one if two-step promotion/demotion is required, and so on. NCOPIES
870 is the number of vector results (and thus number of instructions)
871 for the narrowest end of the operation chain. Each additional
872 step doubles the number of instructions required. If WIDEN_ARITH
873 is true the stmt is doing widening arithmetic. */
876 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
877 enum vect_def_type
*dt
,
878 unsigned int ncopies
, int pwr
,
879 stmt_vector_for_cost
*cost_vec
,
883 int inside_cost
= 0, prologue_cost
= 0;
885 for (i
= 0; i
< pwr
+ 1; i
++)
887 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
,
889 ? vector_stmt
: vec_promote_demote
,
890 stmt_info
, 0, vect_body
);
894 /* FORNOW: Assuming maximum 2 args per stmts. */
895 for (i
= 0; i
< 2; i
++)
896 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
897 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
898 stmt_info
, 0, vect_prologue
);
900 if (dump_enabled_p ())
901 dump_printf_loc (MSG_NOTE
, vect_location
,
902 "vect_model_promotion_demotion_cost: inside_cost = %d, "
903 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
906 /* Returns true if the current function returns DECL. */
909 cfun_returns (tree decl
)
913 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
915 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
918 if (gimple_return_retval (ret
) == decl
)
920 /* We often end up with an aggregate copy to the result decl,
921 handle that case as well. First skip intermediate clobbers
926 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
928 while (gimple_clobber_p (def
));
929 if (is_a
<gassign
*> (def
)
930 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
931 && gimple_assign_rhs1 (def
) == decl
)
937 /* Function vect_model_store_cost
939 Models cost for stores. In the case of grouped accesses, one access
940 has the overhead of the grouped access attributed to it. */
943 vect_model_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
944 vect_memory_access_type memory_access_type
,
945 dr_alignment_support alignment_support_scheme
,
947 vec_load_store_type vls_type
, slp_tree slp_node
,
948 stmt_vector_for_cost
*cost_vec
)
950 unsigned int inside_cost
= 0, prologue_cost
= 0;
951 stmt_vec_info first_stmt_info
= stmt_info
;
952 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
954 /* ??? Somehow we need to fix this at the callers. */
956 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
958 if (vls_type
== VLS_STORE_INVARIANT
)
961 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
962 stmt_info
, 0, vect_prologue
);
965 /* Grouped stores update all elements in the group at once,
966 so we want the DR for the first statement. */
967 if (!slp_node
&& grouped_access_p
)
968 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
970 /* True if we should include any once-per-group costs as well as
971 the cost of the statement itself. For SLP we only get called
972 once per group anyhow. */
973 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
975 /* We assume that the cost of a single store-lanes instruction is
976 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
977 access is instead being provided by a permute-and-store operation,
978 include the cost of the permutes. */
980 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
982 /* Uses a high and low interleave or shuffle operations for each
984 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
985 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
986 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
987 stmt_info
, 0, vect_body
);
989 if (dump_enabled_p ())
990 dump_printf_loc (MSG_NOTE
, vect_location
,
991 "vect_model_store_cost: strided group_size = %d .\n",
995 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
996 /* Costs of the stores. */
997 if (memory_access_type
== VMAT_ELEMENTWISE
998 || memory_access_type
== VMAT_GATHER_SCATTER
)
1000 /* N scalar stores plus extracting the elements. */
1001 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1002 inside_cost
+= record_stmt_cost (cost_vec
,
1003 ncopies
* assumed_nunits
,
1004 scalar_store
, stmt_info
, 0, vect_body
);
1007 vect_get_store_cost (vinfo
, stmt_info
, ncopies
, alignment_support_scheme
,
1008 misalignment
, &inside_cost
, cost_vec
);
1010 if (memory_access_type
== VMAT_ELEMENTWISE
1011 || memory_access_type
== VMAT_STRIDED_SLP
)
1013 /* N scalar stores plus extracting the elements. */
1014 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1015 inside_cost
+= record_stmt_cost (cost_vec
,
1016 ncopies
* assumed_nunits
,
1017 vec_to_scalar
, stmt_info
, 0, vect_body
);
1020 /* When vectorizing a store into the function result assign
1021 a penalty if the function returns in a multi-register location.
1022 In this case we assume we'll end up with having to spill the
1023 vector result and do piecewise loads as a conservative estimate. */
1024 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
1026 && (TREE_CODE (base
) == RESULT_DECL
1027 || (DECL_P (base
) && cfun_returns (base
)))
1028 && !aggregate_value_p (base
, cfun
->decl
))
1030 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
1031 /* ??? Handle PARALLEL in some way. */
1034 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1035 /* Assume that a single reg-reg move is possible and cheap,
1036 do not account for vector to gp register move cost. */
1040 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1042 stmt_info
, 0, vect_epilogue
);
1044 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1046 stmt_info
, 0, vect_epilogue
);
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_NOTE
, vect_location
,
1053 "vect_model_store_cost: inside_cost = %d, "
1054 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1058 /* Calculate cost of DR's memory access. */
1060 vect_get_store_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1061 dr_alignment_support alignment_support_scheme
,
1063 unsigned int *inside_cost
,
1064 stmt_vector_for_cost
*body_cost_vec
)
1066 switch (alignment_support_scheme
)
1070 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1071 vector_store
, stmt_info
, 0,
1074 if (dump_enabled_p ())
1075 dump_printf_loc (MSG_NOTE
, vect_location
,
1076 "vect_model_store_cost: aligned.\n");
1080 case dr_unaligned_supported
:
1082 /* Here, we assign an additional cost for the unaligned store. */
1083 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1084 unaligned_store
, stmt_info
,
1085 misalignment
, vect_body
);
1086 if (dump_enabled_p ())
1087 dump_printf_loc (MSG_NOTE
, vect_location
,
1088 "vect_model_store_cost: unaligned supported by "
1093 case dr_unaligned_unsupported
:
1095 *inside_cost
= VECT_MAX_COST
;
1097 if (dump_enabled_p ())
1098 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1099 "vect_model_store_cost: unsupported access.\n");
1109 /* Function vect_model_load_cost
1111 Models cost for loads. In the case of grouped accesses, one access has
1112 the overhead of the grouped access attributed to it. Since unaligned
1113 accesses are supported for loads, we also account for the costs of the
1114 access scheme chosen. */
1117 vect_model_load_cost (vec_info
*vinfo
,
1118 stmt_vec_info stmt_info
, unsigned ncopies
, poly_uint64 vf
,
1119 vect_memory_access_type memory_access_type
,
1120 dr_alignment_support alignment_support_scheme
,
1122 gather_scatter_info
*gs_info
,
1124 stmt_vector_for_cost
*cost_vec
)
1126 unsigned int inside_cost
= 0, prologue_cost
= 0;
1127 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1129 gcc_assert (cost_vec
);
1131 /* ??? Somehow we need to fix this at the callers. */
1133 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1135 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1137 /* If the load is permuted then the alignment is determined by
1138 the first group element not by the first scalar stmt DR. */
1139 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1140 /* Record the cost for the permutation. */
1141 unsigned n_perms
, n_loads
;
1142 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
,
1143 vf
, true, &n_perms
, &n_loads
);
1144 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1145 first_stmt_info
, 0, vect_body
);
1147 /* And adjust the number of loads performed. This handles
1148 redundancies as well as loads that are later dead. */
1152 /* Grouped loads read all elements in the group at once,
1153 so we want the DR for the first statement. */
1154 stmt_vec_info first_stmt_info
= stmt_info
;
1155 if (!slp_node
&& grouped_access_p
)
1156 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1158 /* True if we should include any once-per-group costs as well as
1159 the cost of the statement itself. For SLP we only get called
1160 once per group anyhow. */
1161 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1163 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1164 ones we actually need. Account for the cost of unused results. */
1165 if (first_stmt_p
&& !slp_node
&& memory_access_type
== VMAT_LOAD_STORE_LANES
)
1167 unsigned int gaps
= DR_GROUP_SIZE (first_stmt_info
);
1168 stmt_vec_info next_stmt_info
= first_stmt_info
;
1172 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
1174 while (next_stmt_info
);
1177 if (dump_enabled_p ())
1178 dump_printf_loc (MSG_NOTE
, vect_location
,
1179 "vect_model_load_cost: %d unused vectors.\n",
1181 vect_get_load_cost (vinfo
, stmt_info
, ncopies
* gaps
,
1182 alignment_support_scheme
, misalignment
, false,
1183 &inside_cost
, &prologue_cost
,
1184 cost_vec
, cost_vec
, true);
1188 /* We assume that the cost of a single load-lanes instruction is
1189 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1190 access is instead being provided by a load-and-permute operation,
1191 include the cost of the permutes. */
1193 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1195 /* Uses an even and odd extract operations or shuffle operations
1196 for each needed permute. */
1197 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1198 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1199 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1200 stmt_info
, 0, vect_body
);
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE
, vect_location
,
1204 "vect_model_load_cost: strided group_size = %d .\n",
1208 /* The loads themselves. */
1209 if (memory_access_type
== VMAT_ELEMENTWISE
1210 || memory_access_type
== VMAT_GATHER_SCATTER
)
1212 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1213 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1214 if (memory_access_type
== VMAT_GATHER_SCATTER
1215 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
1216 /* For emulated gathers N offset vector element extracts
1217 (we assume the scalar scaling and ptr + offset add is consumed by
1219 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
* assumed_nunits
,
1220 vec_to_scalar
, stmt_info
, 0,
1222 /* N scalar loads plus gathering them into a vector. */
1223 inside_cost
+= record_stmt_cost (cost_vec
,
1224 ncopies
* assumed_nunits
,
1225 scalar_load
, stmt_info
, 0, vect_body
);
1227 else if (memory_access_type
== VMAT_INVARIANT
)
1229 /* Invariant loads will ideally be hoisted and splat to a vector. */
1230 prologue_cost
+= record_stmt_cost (cost_vec
, 1,
1231 scalar_load
, stmt_info
, 0,
1233 prologue_cost
+= record_stmt_cost (cost_vec
, 1,
1234 scalar_to_vec
, stmt_info
, 0,
1238 vect_get_load_cost (vinfo
, stmt_info
, ncopies
,
1239 alignment_support_scheme
, misalignment
, first_stmt_p
,
1240 &inside_cost
, &prologue_cost
,
1241 cost_vec
, cost_vec
, true);
1242 if (memory_access_type
== VMAT_ELEMENTWISE
1243 || memory_access_type
== VMAT_STRIDED_SLP
1244 || (memory_access_type
== VMAT_GATHER_SCATTER
1245 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
))
1246 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1247 stmt_info
, 0, vect_body
);
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE
, vect_location
,
1251 "vect_model_load_cost: inside_cost = %d, "
1252 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1256 /* Calculate cost of DR's memory access. */
1258 vect_get_load_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1259 dr_alignment_support alignment_support_scheme
,
1261 bool add_realign_cost
, unsigned int *inside_cost
,
1262 unsigned int *prologue_cost
,
1263 stmt_vector_for_cost
*prologue_cost_vec
,
1264 stmt_vector_for_cost
*body_cost_vec
,
1265 bool record_prologue_costs
)
1267 switch (alignment_support_scheme
)
1271 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1272 stmt_info
, 0, vect_body
);
1274 if (dump_enabled_p ())
1275 dump_printf_loc (MSG_NOTE
, vect_location
,
1276 "vect_model_load_cost: aligned.\n");
1280 case dr_unaligned_supported
:
1282 /* Here, we assign an additional cost for the unaligned load. */
1283 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1284 unaligned_load
, stmt_info
,
1285 misalignment
, vect_body
);
1287 if (dump_enabled_p ())
1288 dump_printf_loc (MSG_NOTE
, vect_location
,
1289 "vect_model_load_cost: unaligned supported by "
1294 case dr_explicit_realign
:
1296 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1297 vector_load
, stmt_info
, 0, vect_body
);
1298 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1299 vec_perm
, stmt_info
, 0, vect_body
);
1301 /* FIXME: If the misalignment remains fixed across the iterations of
1302 the containing loop, the following cost should be added to the
1304 if (targetm
.vectorize
.builtin_mask_for_load
)
1305 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1306 stmt_info
, 0, vect_body
);
1308 if (dump_enabled_p ())
1309 dump_printf_loc (MSG_NOTE
, vect_location
,
1310 "vect_model_load_cost: explicit realign\n");
1314 case dr_explicit_realign_optimized
:
1316 if (dump_enabled_p ())
1317 dump_printf_loc (MSG_NOTE
, vect_location
,
1318 "vect_model_load_cost: unaligned software "
1321 /* Unaligned software pipeline has a load of an address, an initial
1322 load, and possibly a mask operation to "prime" the loop. However,
1323 if this is an access in a group of loads, which provide grouped
1324 access, then the above cost should only be considered for one
1325 access in the group. Inside the loop, there is a load op
1326 and a realignment op. */
1328 if (add_realign_cost
&& record_prologue_costs
)
1330 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1331 vector_stmt
, stmt_info
,
1333 if (targetm
.vectorize
.builtin_mask_for_load
)
1334 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1335 vector_stmt
, stmt_info
,
1339 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1340 stmt_info
, 0, vect_body
);
1341 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1342 stmt_info
, 0, vect_body
);
1344 if (dump_enabled_p ())
1345 dump_printf_loc (MSG_NOTE
, vect_location
,
1346 "vect_model_load_cost: explicit realign optimized"
1352 case dr_unaligned_unsupported
:
1354 *inside_cost
= VECT_MAX_COST
;
1356 if (dump_enabled_p ())
1357 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1358 "vect_model_load_cost: unsupported access.\n");
1367 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1368 the loop preheader for the vectorized stmt STMT_VINFO. */
1371 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1372 gimple_stmt_iterator
*gsi
)
1375 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1377 vinfo
->insert_on_entry (stmt_vinfo
, new_stmt
);
1379 if (dump_enabled_p ())
1380 dump_printf_loc (MSG_NOTE
, vect_location
,
1381 "created new init_stmt: %G", new_stmt
);
1384 /* Function vect_init_vector.
1386 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1387 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1388 vector type a vector with all elements equal to VAL is created first.
1389 Place the initialization at GSI if it is not NULL. Otherwise, place the
1390 initialization at the loop preheader.
1391 Return the DEF of INIT_STMT.
1392 It will be used in the vectorization of STMT_INFO. */
1395 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1396 gimple_stmt_iterator
*gsi
)
1401 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1402 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1404 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1405 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1407 /* Scalar boolean value should be transformed into
1408 all zeros or all ones value before building a vector. */
1409 if (VECTOR_BOOLEAN_TYPE_P (type
))
1411 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1412 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1414 if (CONSTANT_CLASS_P (val
))
1415 val
= integer_zerop (val
) ? false_val
: true_val
;
1418 new_temp
= make_ssa_name (TREE_TYPE (type
));
1419 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1420 val
, true_val
, false_val
);
1421 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1427 gimple_seq stmts
= NULL
;
1428 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1429 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1430 TREE_TYPE (type
), val
);
1432 /* ??? Condition vectorization expects us to do
1433 promotion of invariant/external defs. */
1434 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1435 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1436 !gsi_end_p (gsi2
); )
1438 init_stmt
= gsi_stmt (gsi2
);
1439 gsi_remove (&gsi2
, false);
1440 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1444 val
= build_vector_from_val (type
, val
);
1447 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1448 init_stmt
= gimple_build_assign (new_temp
, val
);
1449 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1454 /* Function vect_get_vec_defs_for_operand.
1456 OP is an operand in STMT_VINFO. This function returns a vector of
1457 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1459 In the case that OP is an SSA_NAME which is defined in the loop, then
1460 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1462 In case OP is an invariant or constant, a new stmt that creates a vector def
1463 needs to be introduced. VECTYPE may be used to specify a required type for
1464 vector invariant. */
1467 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1469 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1472 enum vect_def_type dt
;
1474 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1476 if (dump_enabled_p ())
1477 dump_printf_loc (MSG_NOTE
, vect_location
,
1478 "vect_get_vec_defs_for_operand: %T\n", op
);
1480 stmt_vec_info def_stmt_info
;
1481 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1482 &def_stmt_info
, &def_stmt
);
1483 gcc_assert (is_simple_use
);
1484 if (def_stmt
&& dump_enabled_p ())
1485 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1487 vec_oprnds
->create (ncopies
);
1488 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1490 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1494 vector_type
= vectype
;
1495 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1496 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1497 vector_type
= truth_type_for (stmt_vectype
);
1499 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1501 gcc_assert (vector_type
);
1502 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1504 vec_oprnds
->quick_push (vop
);
1508 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1509 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1510 for (unsigned i
= 0; i
< ncopies
; ++i
)
1511 vec_oprnds
->quick_push (gimple_get_lhs
1512 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1517 /* Get vectorized definitions for OP0 and OP1. */
1520 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1522 tree op0
, vec
<tree
> *vec_oprnds0
, tree vectype0
,
1523 tree op1
, vec
<tree
> *vec_oprnds1
, tree vectype1
,
1524 tree op2
, vec
<tree
> *vec_oprnds2
, tree vectype2
,
1525 tree op3
, vec
<tree
> *vec_oprnds3
, tree vectype3
)
1530 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1532 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1534 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1536 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1541 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1542 op0
, vec_oprnds0
, vectype0
);
1544 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1545 op1
, vec_oprnds1
, vectype1
);
1547 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1548 op2
, vec_oprnds2
, vectype2
);
1550 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1551 op3
, vec_oprnds3
, vectype3
);
1556 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1558 tree op0
, vec
<tree
> *vec_oprnds0
,
1559 tree op1
, vec
<tree
> *vec_oprnds1
,
1560 tree op2
, vec
<tree
> *vec_oprnds2
,
1561 tree op3
, vec
<tree
> *vec_oprnds3
)
1563 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1564 op0
, vec_oprnds0
, NULL_TREE
,
1565 op1
, vec_oprnds1
, NULL_TREE
,
1566 op2
, vec_oprnds2
, NULL_TREE
,
1567 op3
, vec_oprnds3
, NULL_TREE
);
1570 /* Helper function called by vect_finish_replace_stmt and
1571 vect_finish_stmt_generation. Set the location of the new
1572 statement and create and return a stmt_vec_info for it. */
1575 vect_finish_stmt_generation_1 (vec_info
*,
1576 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1578 if (dump_enabled_p ())
1579 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1583 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1585 /* While EH edges will generally prevent vectorization, stmt might
1586 e.g. be in a must-not-throw region. Ensure newly created stmts
1587 that could throw are part of the same region. */
1588 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1589 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1590 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1593 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1596 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1597 which sets the same scalar result as STMT_INFO did. Create and return a
1598 stmt_vec_info for VEC_STMT. */
1601 vect_finish_replace_stmt (vec_info
*vinfo
,
1602 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1604 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1605 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1607 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1608 gsi_replace (&gsi
, vec_stmt
, true);
1610 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1613 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1614 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1617 vect_finish_stmt_generation (vec_info
*vinfo
,
1618 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1619 gimple_stmt_iterator
*gsi
)
1621 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1623 if (!gsi_end_p (*gsi
)
1624 && gimple_has_mem_ops (vec_stmt
))
1626 gimple
*at_stmt
= gsi_stmt (*gsi
);
1627 tree vuse
= gimple_vuse (at_stmt
);
1628 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1630 tree vdef
= gimple_vdef (at_stmt
);
1631 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1632 gimple_set_modified (vec_stmt
, true);
1633 /* If we have an SSA vuse and insert a store, update virtual
1634 SSA form to avoid triggering the renamer. Do so only
1635 if we can easily see all uses - which is what almost always
1636 happens with the way vectorized stmts are inserted. */
1637 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1638 && ((is_gimple_assign (vec_stmt
)
1639 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1640 || (is_gimple_call (vec_stmt
)
1641 && (!(gimple_call_flags (vec_stmt
)
1642 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
))
1643 || (gimple_call_lhs (vec_stmt
)
1644 && !is_gimple_reg (gimple_call_lhs (vec_stmt
)))))))
1646 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1647 gimple_set_vdef (vec_stmt
, new_vdef
);
1648 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1652 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1653 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1656 /* We want to vectorize a call to combined function CFN with function
1657 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1658 as the types of all inputs. Check whether this is possible using
1659 an internal function, returning its code if so or IFN_LAST if not. */
1662 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1663 tree vectype_out
, tree vectype_in
)
1666 if (internal_fn_p (cfn
))
1667 ifn
= as_internal_fn (cfn
);
1669 ifn
= associated_internal_fn (fndecl
);
1670 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1672 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1673 if (info
.vectorizable
)
1675 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1676 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1677 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1678 OPTIMIZE_FOR_SPEED
))
1686 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1687 gimple_stmt_iterator
*);
1689 /* Check whether a load or store statement in the loop described by
1690 LOOP_VINFO is possible in a loop using partial vectors. This is
1691 testing whether the vectorizer pass has the appropriate support,
1692 as well as whether the target does.
1694 VLS_TYPE says whether the statement is a load or store and VECTYPE
1695 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1696 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1697 says how the load or store is going to be implemented and GROUP_SIZE
1698 is the number of load or store statements in the containing group.
1699 If the access is a gather load or scatter store, GS_INFO describes
1700 its arguments. If the load or store is conditional, SCALAR_MASK is the
1701 condition under which it occurs.
1703 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1704 vectors is not supported, otherwise record the required rgroup control
1708 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo
, tree vectype
,
1710 vec_load_store_type vls_type
,
1712 vect_memory_access_type
1714 gather_scatter_info
*gs_info
,
1717 /* Invariant loads need no special support. */
1718 if (memory_access_type
== VMAT_INVARIANT
)
1721 unsigned int nvectors
;
1723 nvectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1725 nvectors
= vect_get_num_copies (loop_vinfo
, vectype
);
1727 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1728 machine_mode vecmode
= TYPE_MODE (vectype
);
1729 bool is_load
= (vls_type
== VLS_LOAD
);
1730 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1733 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1734 : !vect_store_lanes_supported (vectype
, group_size
, true))
1736 if (dump_enabled_p ())
1737 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1738 "can't operate on partial vectors because"
1739 " the target doesn't have an appropriate"
1740 " load/store-lanes instruction.\n");
1741 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1744 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1749 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1751 internal_fn ifn
= (is_load
1752 ? IFN_MASK_GATHER_LOAD
1753 : IFN_MASK_SCATTER_STORE
);
1754 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1755 gs_info
->memory_type
,
1756 gs_info
->offset_vectype
,
1759 if (dump_enabled_p ())
1760 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1761 "can't operate on partial vectors because"
1762 " the target doesn't have an appropriate"
1763 " gather load or scatter store instruction.\n");
1764 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1767 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1772 if (memory_access_type
!= VMAT_CONTIGUOUS
1773 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1775 /* Element X of the data must come from iteration i * VF + X of the
1776 scalar loop. We need more work to support other mappings. */
1777 if (dump_enabled_p ())
1778 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1779 "can't operate on partial vectors because an"
1780 " access isn't contiguous.\n");
1781 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1785 if (!VECTOR_MODE_P (vecmode
))
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1789 "can't operate on partial vectors when emulating"
1790 " vector operations.\n");
1791 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1795 /* We might load more scalars than we need for permuting SLP loads.
1796 We checked in get_group_load_store_type that the extra elements
1797 don't leak into a new vector. */
1798 auto group_memory_nvectors
= [](poly_uint64 size
, poly_uint64 nunits
)
1800 unsigned int nvectors
;
1801 if (can_div_away_from_zero_p (size
, nunits
, &nvectors
))
1806 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1807 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1808 machine_mode mask_mode
;
1809 bool using_partial_vectors_p
= false;
1810 if (targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1811 && can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1813 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1814 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1815 using_partial_vectors_p
= true;
1819 if (get_len_load_store_mode (vecmode
, is_load
).exists (&vmode
))
1821 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1822 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
1823 unsigned factor
= (vecmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vecmode
);
1824 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, factor
);
1825 using_partial_vectors_p
= true;
1828 if (!using_partial_vectors_p
)
1830 if (dump_enabled_p ())
1831 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1832 "can't operate on partial vectors because the"
1833 " target doesn't have the appropriate partial"
1834 " vectorization load or store.\n");
1835 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1839 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1840 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1841 that needs to be applied to all loads and stores in a vectorized loop.
1842 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1843 otherwise return VEC_MASK & LOOP_MASK.
1845 MASK_TYPE is the type of both masks. If new statements are needed,
1846 insert them before GSI. */
1849 prepare_vec_mask (loop_vec_info loop_vinfo
, tree mask_type
, tree loop_mask
,
1850 tree vec_mask
, gimple_stmt_iterator
*gsi
)
1852 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1856 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1858 if (loop_vinfo
->vec_cond_masked_set
.contains ({ vec_mask
, loop_mask
}))
1861 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1862 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1863 vec_mask
, loop_mask
);
1865 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1869 /* Determine whether we can use a gather load or scatter store to vectorize
1870 strided load or store STMT_INFO by truncating the current offset to a
1871 smaller width. We need to be able to construct an offset vector:
1873 { 0, X, X*2, X*3, ... }
1875 without loss of precision, where X is STMT_INFO's DR_STEP.
1877 Return true if this is possible, describing the gather load or scatter
1878 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1881 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1882 loop_vec_info loop_vinfo
, bool masked_p
,
1883 gather_scatter_info
*gs_info
)
1885 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1886 data_reference
*dr
= dr_info
->dr
;
1887 tree step
= DR_STEP (dr
);
1888 if (TREE_CODE (step
) != INTEGER_CST
)
1890 /* ??? Perhaps we could use range information here? */
1891 if (dump_enabled_p ())
1892 dump_printf_loc (MSG_NOTE
, vect_location
,
1893 "cannot truncate variable step.\n");
1897 /* Get the number of bits in an element. */
1898 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1899 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1900 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1902 /* Set COUNT to the upper limit on the number of elements - 1.
1903 Start with the maximum vectorization factor. */
1904 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1906 /* Try lowering COUNT to the number of scalar latch iterations. */
1907 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1908 widest_int max_iters
;
1909 if (max_loop_iterations (loop
, &max_iters
)
1910 && max_iters
< count
)
1911 count
= max_iters
.to_shwi ();
1913 /* Try scales of 1 and the element size. */
1914 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1915 wi::overflow_type overflow
= wi::OVF_NONE
;
1916 for (int i
= 0; i
< 2; ++i
)
1918 int scale
= scales
[i
];
1920 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1923 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1924 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1927 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1928 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1930 /* Find the narrowest viable offset type. */
1931 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1932 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1935 /* See whether the target supports the operation with an offset
1936 no narrower than OFFSET_TYPE. */
1937 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1938 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1939 vectype
, memory_type
, offset_type
, scale
,
1940 &gs_info
->ifn
, &gs_info
->offset_vectype
)
1941 || gs_info
->ifn
== IFN_LAST
)
1944 gs_info
->decl
= NULL_TREE
;
1945 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1946 but we don't need to store that here. */
1947 gs_info
->base
= NULL_TREE
;
1948 gs_info
->element_type
= TREE_TYPE (vectype
);
1949 gs_info
->offset
= fold_convert (offset_type
, step
);
1950 gs_info
->offset_dt
= vect_constant_def
;
1951 gs_info
->scale
= scale
;
1952 gs_info
->memory_type
= memory_type
;
1956 if (overflow
&& dump_enabled_p ())
1957 dump_printf_loc (MSG_NOTE
, vect_location
,
1958 "truncating gather/scatter offset to %d bits"
1959 " might change its value.\n", element_bits
);
1964 /* Return true if we can use gather/scatter internal functions to
1965 vectorize STMT_INFO, which is a grouped or strided load or store.
1966 MASKED_P is true if load or store is conditional. When returning
1967 true, fill in GS_INFO with the information required to perform the
1971 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1972 loop_vec_info loop_vinfo
, bool masked_p
,
1973 gather_scatter_info
*gs_info
)
1975 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1976 || gs_info
->ifn
== IFN_LAST
)
1977 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1980 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1981 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1983 gcc_assert (TYPE_PRECISION (new_offset_type
)
1984 >= TYPE_PRECISION (old_offset_type
));
1985 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1987 if (dump_enabled_p ())
1988 dump_printf_loc (MSG_NOTE
, vect_location
,
1989 "using gather/scatter for strided/grouped access,"
1990 " scale = %d\n", gs_info
->scale
);
1995 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1996 elements with a known constant step. Return -1 if that step
1997 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2000 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
2002 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2003 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
2007 /* If the target supports a permute mask that reverses the elements in
2008 a vector of type VECTYPE, return that mask, otherwise return null. */
2011 perm_mask_for_reverse (tree vectype
)
2013 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2015 /* The encoding has a single stepped pattern. */
2016 vec_perm_builder
sel (nunits
, 1, 3);
2017 for (int i
= 0; i
< 3; ++i
)
2018 sel
.quick_push (nunits
- 1 - i
);
2020 vec_perm_indices
indices (sel
, 1, nunits
);
2021 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), TYPE_MODE (vectype
),
2024 return vect_gen_perm_mask_checked (vectype
, indices
);
2027 /* A subroutine of get_load_store_type, with a subset of the same
2028 arguments. Handle the case where STMT_INFO is a load or store that
2029 accesses consecutive elements with a negative step. Sets *POFFSET
2030 to the offset to be applied to the DR for the first access. */
2032 static vect_memory_access_type
2033 get_negative_load_store_type (vec_info
*vinfo
,
2034 stmt_vec_info stmt_info
, tree vectype
,
2035 vec_load_store_type vls_type
,
2036 unsigned int ncopies
, poly_int64
*poffset
)
2038 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2039 dr_alignment_support alignment_support_scheme
;
2043 if (dump_enabled_p ())
2044 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2045 "multiple types with negative step.\n");
2046 return VMAT_ELEMENTWISE
;
2049 /* For backward running DRs the first access in vectype actually is
2050 N-1 elements before the address of the DR. */
2051 *poffset
= ((-TYPE_VECTOR_SUBPARTS (vectype
) + 1)
2052 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype
))));
2054 int misalignment
= dr_misalignment (dr_info
, vectype
, *poffset
);
2055 alignment_support_scheme
2056 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
, misalignment
);
2057 if (alignment_support_scheme
!= dr_aligned
2058 && alignment_support_scheme
!= dr_unaligned_supported
)
2060 if (dump_enabled_p ())
2061 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2062 "negative step but alignment required.\n");
2064 return VMAT_ELEMENTWISE
;
2067 if (vls_type
== VLS_STORE_INVARIANT
)
2069 if (dump_enabled_p ())
2070 dump_printf_loc (MSG_NOTE
, vect_location
,
2071 "negative step with invariant source;"
2072 " no permute needed.\n");
2073 return VMAT_CONTIGUOUS_DOWN
;
2076 if (!perm_mask_for_reverse (vectype
))
2078 if (dump_enabled_p ())
2079 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2080 "negative step and reversing not supported.\n");
2082 return VMAT_ELEMENTWISE
;
2085 return VMAT_CONTIGUOUS_REVERSE
;
2088 /* STMT_INFO is either a masked or unconditional store. Return the value
2092 vect_get_store_rhs (stmt_vec_info stmt_info
)
2094 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2096 gcc_assert (gimple_assign_single_p (assign
));
2097 return gimple_assign_rhs1 (assign
);
2099 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2101 internal_fn ifn
= gimple_call_internal_fn (call
);
2102 int index
= internal_fn_stored_value_index (ifn
);
2103 gcc_assert (index
>= 0);
2104 return gimple_call_arg (call
, index
);
2109 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2111 This function returns a vector type which can be composed with NETLS pieces,
2112 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2113 same vector size as the return vector. It checks target whether supports
2114 pieces-size vector mode for construction firstly, if target fails to, check
2115 pieces-size scalar mode for construction further. It returns NULL_TREE if
2116 fails to find the available composition.
2118 For example, for (vtype=V16QI, nelts=4), we can probably get:
2119 - V16QI with PTYPE V4QI.
2120 - V4SI with PTYPE SI.
2124 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
2126 gcc_assert (VECTOR_TYPE_P (vtype
));
2127 gcc_assert (known_gt (nelts
, 0U));
2129 machine_mode vmode
= TYPE_MODE (vtype
);
2130 if (!VECTOR_MODE_P (vmode
))
2133 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
2134 unsigned int pbsize
;
2135 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
2137 /* First check if vec_init optab supports construction from
2138 vector pieces directly. */
2139 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
2140 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
2142 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
2143 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
2144 != CODE_FOR_nothing
))
2146 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
2150 /* Otherwise check if exists an integer type of the same piece size and
2151 if vec_init optab supports construction from it directly. */
2152 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
2153 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
2154 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
2155 != CODE_FOR_nothing
))
2157 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
2158 return build_vector_type (*ptype
, nelts
);
2165 /* A subroutine of get_load_store_type, with a subset of the same
2166 arguments. Handle the case where STMT_INFO is part of a grouped load
2169 For stores, the statements in the group are all consecutive
2170 and there is no gap at the end. For loads, the statements in the
2171 group might not be consecutive; there can be gaps between statements
2172 as well as at the end. */
2175 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2176 tree vectype
, slp_tree slp_node
,
2177 bool masked_p
, vec_load_store_type vls_type
,
2178 vect_memory_access_type
*memory_access_type
,
2179 poly_int64
*poffset
,
2180 dr_alignment_support
*alignment_support_scheme
,
2182 gather_scatter_info
*gs_info
)
2184 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2185 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2186 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2187 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2188 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2189 bool single_element_p
= (stmt_info
== first_stmt_info
2190 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2191 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2192 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2194 /* True if the vectorized statements would access beyond the last
2195 statement in the group. */
2196 bool overrun_p
= false;
2198 /* True if we can cope with such overrun by peeling for gaps, so that
2199 there is at least one final scalar iteration after the vector loop. */
2200 bool can_overrun_p
= (!masked_p
2201 && vls_type
== VLS_LOAD
2205 /* There can only be a gap at the end of the group if the stride is
2206 known at compile time. */
2207 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2209 /* Stores can't yet have gaps. */
2210 gcc_assert (slp_node
|| vls_type
== VLS_LOAD
|| gap
== 0);
2214 /* For SLP vectorization we directly vectorize a subchain
2215 without permutation. */
2216 if (! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
2218 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node
)[0]);
2219 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2221 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2222 separated by the stride, until we have a complete vector.
2223 Fall back to scalar accesses if that isn't possible. */
2224 if (multiple_p (nunits
, group_size
))
2225 *memory_access_type
= VMAT_STRIDED_SLP
;
2227 *memory_access_type
= VMAT_ELEMENTWISE
;
2231 overrun_p
= loop_vinfo
&& gap
!= 0;
2232 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2234 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2235 "Grouped store with gaps requires"
2236 " non-consecutive accesses\n");
2239 /* An overrun is fine if the trailing elements are smaller
2240 than the alignment boundary B. Every vector access will
2241 be a multiple of B and so we are guaranteed to access a
2242 non-gap element in the same B-sized block. */
2244 && gap
< (vect_known_alignment_in_bytes (first_dr_info
,
2246 / vect_get_scalar_dr_size (first_dr_info
)))
2249 /* If the gap splits the vector in half and the target
2250 can do half-vector operations avoid the epilogue peeling
2251 by simply loading half of the vector only. Usually
2252 the construction with an upper zero half will be elided. */
2253 dr_alignment_support alss
;
2254 int misalign
= dr_misalignment (first_dr_info
, vectype
);
2258 && (((alss
= vect_supportable_dr_alignment (vinfo
, first_dr_info
,
2259 vectype
, misalign
)))
2261 || alss
== dr_unaligned_supported
)
2262 && known_eq (nunits
, (group_size
- gap
) * 2)
2263 && known_eq (nunits
, group_size
)
2264 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2268 if (overrun_p
&& !can_overrun_p
)
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2272 "Peeling for outer loop is not supported\n");
2275 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2278 if (single_element_p
)
2279 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2280 only correct for single element "interleaving" SLP. */
2281 *memory_access_type
= get_negative_load_store_type
2282 (vinfo
, stmt_info
, vectype
, vls_type
, 1, poffset
);
2285 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2286 separated by the stride, until we have a complete vector.
2287 Fall back to scalar accesses if that isn't possible. */
2288 if (multiple_p (nunits
, group_size
))
2289 *memory_access_type
= VMAT_STRIDED_SLP
;
2291 *memory_access_type
= VMAT_ELEMENTWISE
;
2296 gcc_assert (!loop_vinfo
|| cmp
> 0);
2297 *memory_access_type
= VMAT_CONTIGUOUS
;
2300 /* When we have a contiguous access across loop iterations
2301 but the access in the loop doesn't cover the full vector
2302 we can end up with no gap recorded but still excess
2303 elements accessed, see PR103116. Make sure we peel for
2304 gaps if necessary and sufficient and give up if not. */
2306 && *memory_access_type
== VMAT_CONTIGUOUS
2307 && SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
2308 && !multiple_p (group_size
* LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
2311 unsigned HOST_WIDE_INT cnunits
, cvf
;
2313 || !nunits
.is_constant (&cnunits
)
2314 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&cvf
)
2315 /* Peeling for gaps assumes that a single scalar iteration
2316 is enough to make sure the last vector iteration doesn't
2317 access excess elements.
2318 ??? Enhancements include peeling multiple iterations
2319 or using masked loads with a static mask. */
2320 || (group_size
* cvf
) % cnunits
+ group_size
< cnunits
)
2322 if (dump_enabled_p ())
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2324 "peeling for gaps insufficient for "
2334 /* We can always handle this case using elementwise accesses,
2335 but see if something more efficient is available. */
2336 *memory_access_type
= VMAT_ELEMENTWISE
;
2338 /* If there is a gap at the end of the group then these optimizations
2339 would access excess elements in the last iteration. */
2340 bool would_overrun_p
= (gap
!= 0);
2341 /* An overrun is fine if the trailing elements are smaller than the
2342 alignment boundary B. Every vector access will be a multiple of B
2343 and so we are guaranteed to access a non-gap element in the
2344 same B-sized block. */
2347 && gap
< (vect_known_alignment_in_bytes (first_dr_info
, vectype
)
2348 / vect_get_scalar_dr_size (first_dr_info
)))
2349 would_overrun_p
= false;
2351 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2352 && (can_overrun_p
|| !would_overrun_p
)
2353 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2355 /* First cope with the degenerate case of a single-element
2357 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2360 /* Otherwise try using LOAD/STORE_LANES. */
2361 else if (vls_type
== VLS_LOAD
2362 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2363 : vect_store_lanes_supported (vectype
, group_size
,
2366 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2367 overrun_p
= would_overrun_p
;
2370 /* If that fails, try using permuting loads. */
2371 else if (vls_type
== VLS_LOAD
2372 ? vect_grouped_load_supported (vectype
, single_element_p
,
2374 : vect_grouped_store_supported (vectype
, group_size
))
2376 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2377 overrun_p
= would_overrun_p
;
2381 /* As a last resort, trying using a gather load or scatter store.
2383 ??? Although the code can handle all group sizes correctly,
2384 it probably isn't a win to use separate strided accesses based
2385 on nearby locations. Or, even if it's a win over scalar code,
2386 it might not be a win over vectorizing at a lower VF, if that
2387 allows us to use contiguous accesses. */
2388 if (*memory_access_type
== VMAT_ELEMENTWISE
2391 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2393 *memory_access_type
= VMAT_GATHER_SCATTER
;
2396 if (*memory_access_type
== VMAT_GATHER_SCATTER
2397 || *memory_access_type
== VMAT_ELEMENTWISE
)
2399 *alignment_support_scheme
= dr_unaligned_supported
;
2400 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2404 *misalignment
= dr_misalignment (first_dr_info
, vectype
, *poffset
);
2405 *alignment_support_scheme
2406 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, vectype
,
2410 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2412 /* STMT is the leader of the group. Check the operands of all the
2413 stmts of the group. */
2414 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2415 while (next_stmt_info
)
2417 tree op
= vect_get_store_rhs (next_stmt_info
);
2418 enum vect_def_type dt
;
2419 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2421 if (dump_enabled_p ())
2422 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2423 "use not simple.\n");
2426 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2432 gcc_assert (can_overrun_p
);
2433 if (dump_enabled_p ())
2434 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2435 "Data access with gaps requires scalar "
2437 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2443 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2444 if there is a memory access type that the vectorized form can use,
2445 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2446 or scatters, fill in GS_INFO accordingly. In addition
2447 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2448 the target does not support the alignment scheme. *MISALIGNMENT
2449 is set according to the alignment of the access (including
2450 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2452 SLP says whether we're performing SLP rather than loop vectorization.
2453 MASKED_P is true if the statement is conditional on a vectorized mask.
2454 VECTYPE is the vector type that the vectorized statements will use.
2455 NCOPIES is the number of vector statements that will be needed. */
2458 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2459 tree vectype
, slp_tree slp_node
,
2460 bool masked_p
, vec_load_store_type vls_type
,
2461 unsigned int ncopies
,
2462 vect_memory_access_type
*memory_access_type
,
2463 poly_int64
*poffset
,
2464 dr_alignment_support
*alignment_support_scheme
,
2466 gather_scatter_info
*gs_info
)
2468 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2469 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2470 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2472 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2474 *memory_access_type
= VMAT_GATHER_SCATTER
;
2475 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2477 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2478 &gs_info
->offset_dt
,
2479 &gs_info
->offset_vectype
))
2481 if (dump_enabled_p ())
2482 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2483 "%s index use not simple.\n",
2484 vls_type
== VLS_LOAD
? "gather" : "scatter");
2487 else if (gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
2489 if (vls_type
!= VLS_LOAD
)
2491 if (dump_enabled_p ())
2492 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2493 "unsupported emulated scatter.\n");
2496 else if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant ()
2497 || !TYPE_VECTOR_SUBPARTS
2498 (gs_info
->offset_vectype
).is_constant ()
2499 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2500 (gs_info
->offset_vectype
),
2501 TYPE_VECTOR_SUBPARTS (vectype
)))
2503 if (dump_enabled_p ())
2504 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2505 "unsupported vector types for emulated "
2510 /* Gather-scatter accesses perform only component accesses, alignment
2511 is irrelevant for them. */
2512 *alignment_support_scheme
= dr_unaligned_supported
;
2514 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2516 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
,
2518 vls_type
, memory_access_type
, poffset
,
2519 alignment_support_scheme
,
2520 misalignment
, gs_info
))
2523 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2525 gcc_assert (!slp_node
);
2527 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2529 *memory_access_type
= VMAT_GATHER_SCATTER
;
2531 *memory_access_type
= VMAT_ELEMENTWISE
;
2532 /* Alignment is irrelevant here. */
2533 *alignment_support_scheme
= dr_unaligned_supported
;
2537 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2540 gcc_assert (vls_type
== VLS_LOAD
);
2541 *memory_access_type
= VMAT_INVARIANT
;
2542 /* Invariant accesses perform only component accesses, alignment
2543 is irrelevant for them. */
2544 *alignment_support_scheme
= dr_unaligned_supported
;
2549 *memory_access_type
= get_negative_load_store_type
2550 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
, poffset
);
2552 *memory_access_type
= VMAT_CONTIGUOUS
;
2553 *misalignment
= dr_misalignment (STMT_VINFO_DR_INFO (stmt_info
),
2555 *alignment_support_scheme
2556 = vect_supportable_dr_alignment (vinfo
,
2557 STMT_VINFO_DR_INFO (stmt_info
),
2558 vectype
, *misalignment
);
2562 if ((*memory_access_type
== VMAT_ELEMENTWISE
2563 || *memory_access_type
== VMAT_STRIDED_SLP
)
2564 && !nunits
.is_constant ())
2566 if (dump_enabled_p ())
2567 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2568 "Not using elementwise accesses due to variable "
2569 "vectorization factor.\n");
2573 if (*alignment_support_scheme
== dr_unaligned_unsupported
)
2575 if (dump_enabled_p ())
2576 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2577 "unsupported unaligned access\n");
2581 /* FIXME: At the moment the cost model seems to underestimate the
2582 cost of using elementwise accesses. This check preserves the
2583 traditional behavior until that can be fixed. */
2584 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2585 if (!first_stmt_info
)
2586 first_stmt_info
= stmt_info
;
2587 if (*memory_access_type
== VMAT_ELEMENTWISE
2588 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2589 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2590 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2591 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2593 if (dump_enabled_p ())
2594 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2595 "not falling back to elementwise accesses\n");
2601 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2602 conditional operation STMT_INFO. When returning true, store the mask
2603 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2604 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2605 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2608 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2609 slp_tree slp_node
, unsigned mask_index
,
2610 tree
*mask
, slp_tree
*mask_node
,
2611 vect_def_type
*mask_dt_out
, tree
*mask_vectype_out
)
2613 enum vect_def_type mask_dt
;
2615 slp_tree mask_node_1
;
2616 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, mask_index
,
2617 mask
, &mask_node_1
, &mask_dt
, &mask_vectype
))
2619 if (dump_enabled_p ())
2620 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2621 "mask use not simple.\n");
2625 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask
)))
2627 if (dump_enabled_p ())
2628 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2629 "mask argument is not a boolean.\n");
2633 /* If the caller is not prepared for adjusting an external/constant
2634 SLP mask vector type fail. */
2637 && SLP_TREE_DEF_TYPE (mask_node_1
) != vect_internal_def
)
2639 if (dump_enabled_p ())
2640 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2641 "SLP mask argument is not vectorized.\n");
2645 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2647 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2649 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2651 if (dump_enabled_p ())
2652 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2653 "could not find an appropriate vector mask type.\n");
2657 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2658 TYPE_VECTOR_SUBPARTS (vectype
)))
2660 if (dump_enabled_p ())
2661 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2662 "vector mask type %T"
2663 " does not match vector data type %T.\n",
2664 mask_vectype
, vectype
);
2669 *mask_dt_out
= mask_dt
;
2670 *mask_vectype_out
= mask_vectype
;
2672 *mask_node
= mask_node_1
;
2676 /* Return true if stored value RHS is suitable for vectorizing store
2677 statement STMT_INFO. When returning true, store the type of the
2678 definition in *RHS_DT_OUT, the type of the vectorized store value in
2679 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2682 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2683 slp_tree slp_node
, tree rhs
,
2684 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2685 vec_load_store_type
*vls_type_out
)
2687 /* In the case this is a store from a constant make sure
2688 native_encode_expr can handle it. */
2689 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2691 if (dump_enabled_p ())
2692 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2693 "cannot encode constant as a byte sequence.\n");
2698 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2700 if (gimple_call_internal_p (call
)
2701 && internal_store_fn_p (gimple_call_internal_fn (call
)))
2702 op_no
= internal_fn_stored_value_index (gimple_call_internal_fn (call
));
2705 enum vect_def_type rhs_dt
;
2708 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, op_no
,
2709 &rhs
, &slp_op
, &rhs_dt
, &rhs_vectype
))
2711 if (dump_enabled_p ())
2712 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2713 "use not simple.\n");
2717 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2718 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2720 if (dump_enabled_p ())
2721 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2722 "incompatible vector types.\n");
2726 *rhs_dt_out
= rhs_dt
;
2727 *rhs_vectype_out
= rhs_vectype
;
2728 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2729 *vls_type_out
= VLS_STORE_INVARIANT
;
2731 *vls_type_out
= VLS_STORE
;
2735 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2736 Note that we support masks with floating-point type, in which case the
2737 floats are interpreted as a bitmask. */
2740 vect_build_all_ones_mask (vec_info
*vinfo
,
2741 stmt_vec_info stmt_info
, tree masktype
)
2743 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2744 return build_int_cst (masktype
, -1);
2745 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2747 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2748 mask
= build_vector_from_val (masktype
, mask
);
2749 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2751 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2755 for (int j
= 0; j
< 6; ++j
)
2757 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2758 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2759 mask
= build_vector_from_val (masktype
, mask
);
2760 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2765 /* Build an all-zero merge value of type VECTYPE while vectorizing
2766 STMT_INFO as a gather load. */
2769 vect_build_zero_merge_argument (vec_info
*vinfo
,
2770 stmt_vec_info stmt_info
, tree vectype
)
2773 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2774 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2775 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2779 for (int j
= 0; j
< 6; ++j
)
2781 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2782 merge
= build_real (TREE_TYPE (vectype
), r
);
2786 merge
= build_vector_from_val (vectype
, merge
);
2787 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2790 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2791 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2792 the gather load operation. If the load is conditional, MASK is the
2793 unvectorized condition and MASK_DT is its definition type, otherwise
2797 vect_build_gather_load_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2798 gimple_stmt_iterator
*gsi
,
2800 gather_scatter_info
*gs_info
,
2803 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2804 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2805 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2806 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2807 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2808 edge pe
= loop_preheader_edge (loop
);
2809 enum { NARROW
, NONE
, WIDEN
} modifier
;
2810 poly_uint64 gather_off_nunits
2811 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2813 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2814 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2815 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2816 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2817 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2818 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2819 tree scaletype
= TREE_VALUE (arglist
);
2820 tree real_masktype
= masktype
;
2821 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2823 || TREE_CODE (masktype
) == INTEGER_TYPE
2824 || types_compatible_p (srctype
, masktype
)));
2826 masktype
= truth_type_for (srctype
);
2828 tree mask_halftype
= masktype
;
2829 tree perm_mask
= NULL_TREE
;
2830 tree mask_perm_mask
= NULL_TREE
;
2831 if (known_eq (nunits
, gather_off_nunits
))
2833 else if (known_eq (nunits
* 2, gather_off_nunits
))
2837 /* Currently widening gathers and scatters are only supported for
2838 fixed-length vectors. */
2839 int count
= gather_off_nunits
.to_constant ();
2840 vec_perm_builder
sel (count
, count
, 1);
2841 for (int i
= 0; i
< count
; ++i
)
2842 sel
.quick_push (i
| (count
/ 2));
2844 vec_perm_indices
indices (sel
, 1, count
);
2845 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2848 else if (known_eq (nunits
, gather_off_nunits
* 2))
2852 /* Currently narrowing gathers and scatters are only supported for
2853 fixed-length vectors. */
2854 int count
= nunits
.to_constant ();
2855 vec_perm_builder
sel (count
, count
, 1);
2856 sel
.quick_grow (count
);
2857 for (int i
= 0; i
< count
; ++i
)
2858 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2859 vec_perm_indices
indices (sel
, 2, count
);
2860 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2864 if (mask
&& VECTOR_TYPE_P (real_masktype
))
2866 for (int i
= 0; i
< count
; ++i
)
2867 sel
[i
] = i
| (count
/ 2);
2868 indices
.new_vector (sel
, 2, count
);
2869 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2872 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2877 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2878 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2880 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2881 if (!is_gimple_min_invariant (ptr
))
2884 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2885 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2886 gcc_assert (!new_bb
);
2889 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2891 tree vec_oprnd0
= NULL_TREE
;
2892 tree vec_mask
= NULL_TREE
;
2893 tree src_op
= NULL_TREE
;
2894 tree mask_op
= NULL_TREE
;
2895 tree prev_res
= NULL_TREE
;
2899 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2900 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2903 auto_vec
<tree
> vec_oprnds0
;
2904 auto_vec
<tree
> vec_masks
;
2905 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2906 modifier
== WIDEN
? ncopies
/ 2 : ncopies
,
2907 gs_info
->offset
, &vec_oprnds0
);
2909 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2910 modifier
== NARROW
? ncopies
/ 2 : ncopies
,
2911 mask
, &vec_masks
, masktype
);
2912 for (int j
= 0; j
< ncopies
; ++j
)
2915 if (modifier
== WIDEN
&& (j
& 1))
2916 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
2917 perm_mask
, stmt_info
, gsi
);
2919 op
= vec_oprnd0
= vec_oprnds0
[modifier
== WIDEN
? j
/ 2 : j
];
2921 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2923 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2924 TYPE_VECTOR_SUBPARTS (idxtype
)));
2925 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2926 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2927 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2928 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2934 if (mask_perm_mask
&& (j
& 1))
2935 mask_op
= permute_vec_elements (vinfo
, mask_op
, mask_op
,
2936 mask_perm_mask
, stmt_info
, gsi
);
2939 if (modifier
== NARROW
)
2942 vec_mask
= vec_masks
[j
/ 2];
2945 vec_mask
= vec_masks
[j
];
2948 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2950 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2951 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2952 gcc_assert (known_eq (sub1
, sub2
));
2953 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2954 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2956 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2957 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2961 if (modifier
== NARROW
&& !VECTOR_TYPE_P (real_masktype
))
2963 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2965 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2966 : VEC_UNPACK_LO_EXPR
,
2968 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2974 tree mask_arg
= mask_op
;
2975 if (masktype
!= real_masktype
)
2977 tree utype
, optype
= TREE_TYPE (mask_op
);
2978 if (VECTOR_TYPE_P (real_masktype
)
2979 || TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2980 utype
= real_masktype
;
2982 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2983 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2984 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2986 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2987 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2989 if (!useless_type_conversion_p (real_masktype
, utype
))
2991 gcc_assert (TYPE_PRECISION (utype
)
2992 <= TYPE_PRECISION (real_masktype
));
2993 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2994 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2995 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2998 src_op
= build_zero_cst (srctype
);
3000 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
3003 if (!useless_type_conversion_p (vectype
, rettype
))
3005 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
3006 TYPE_VECTOR_SUBPARTS (rettype
)));
3007 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
3008 gimple_call_set_lhs (new_stmt
, op
);
3009 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3010 var
= make_ssa_name (vec_dest
);
3011 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
3012 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
3013 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3017 var
= make_ssa_name (vec_dest
, new_stmt
);
3018 gimple_call_set_lhs (new_stmt
, var
);
3019 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3022 if (modifier
== NARROW
)
3029 var
= permute_vec_elements (vinfo
, prev_res
, var
, perm_mask
,
3031 new_stmt
= SSA_NAME_DEF_STMT (var
);
3034 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3036 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3039 /* Prepare the base and offset in GS_INFO for vectorization.
3040 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3041 to the vectorized offset argument for the first copy of STMT_INFO.
3042 STMT_INFO is the statement described by GS_INFO and LOOP is the
3046 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo
,
3047 class loop
*loop
, stmt_vec_info stmt_info
,
3048 slp_tree slp_node
, gather_scatter_info
*gs_info
,
3049 tree
*dataref_ptr
, vec
<tree
> *vec_offset
)
3051 gimple_seq stmts
= NULL
;
3052 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
3056 edge pe
= loop_preheader_edge (loop
);
3057 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3058 gcc_assert (!new_bb
);
3061 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_offset
);
3065 = vect_get_num_copies (loop_vinfo
, gs_info
->offset_vectype
);
3066 vect_get_vec_defs_for_operand (loop_vinfo
, stmt_info
, ncopies
,
3067 gs_info
->offset
, vec_offset
,
3068 gs_info
->offset_vectype
);
3072 /* Prepare to implement a grouped or strided load or store using
3073 the gather load or scatter store operation described by GS_INFO.
3074 STMT_INFO is the load or store statement.
3076 Set *DATAREF_BUMP to the amount that should be added to the base
3077 address after each copy of the vectorized statement. Set *VEC_OFFSET
3078 to an invariant offset vector in which element I has the value
3079 I * DR_STEP / SCALE. */
3082 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
3083 loop_vec_info loop_vinfo
,
3084 gather_scatter_info
*gs_info
,
3085 tree
*dataref_bump
, tree
*vec_offset
)
3087 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3088 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3090 tree bump
= size_binop (MULT_EXPR
,
3091 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
3092 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
3093 *dataref_bump
= cse_and_gimplify_to_preheader (loop_vinfo
, bump
);
3095 /* The offset given in GS_INFO can have pointer type, so use the element
3096 type of the vector instead. */
3097 tree offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
3099 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3100 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
3101 ssize_int (gs_info
->scale
));
3102 step
= fold_convert (offset_type
, step
);
3104 /* Create {0, X, X*2, X*3, ...}. */
3105 tree offset
= fold_build2 (VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
3106 build_zero_cst (offset_type
), step
);
3107 *vec_offset
= cse_and_gimplify_to_preheader (loop_vinfo
, offset
);
3110 /* Return the amount that should be added to a vector pointer to move
3111 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3112 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3116 vect_get_data_ptr_increment (vec_info
*vinfo
,
3117 dr_vec_info
*dr_info
, tree aggr_type
,
3118 vect_memory_access_type memory_access_type
)
3120 if (memory_access_type
== VMAT_INVARIANT
)
3121 return size_zero_node
;
3123 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3124 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3125 if (tree_int_cst_sgn (step
) == -1)
3126 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3130 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3133 vectorizable_bswap (vec_info
*vinfo
,
3134 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3135 gimple
**vec_stmt
, slp_tree slp_node
,
3137 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3140 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3141 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3144 op
= gimple_call_arg (stmt
, 0);
3145 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3146 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3148 /* Multiple types in SLP are handled by creating the appropriate number of
3149 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3154 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3156 gcc_assert (ncopies
>= 1);
3158 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3162 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3163 unsigned word_bytes
;
3164 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3167 /* The encoding uses one stepped pattern for each byte in the word. */
3168 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3169 for (unsigned i
= 0; i
< 3; ++i
)
3170 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3171 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3173 vec_perm_indices
indices (elts
, 1, num_bytes
);
3174 machine_mode vmode
= TYPE_MODE (char_vectype
);
3175 if (!can_vec_perm_const_p (vmode
, vmode
, indices
))
3181 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3183 if (dump_enabled_p ())
3184 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3185 "incompatible vector types for invariants\n");
3189 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3190 DUMP_VECT_SCOPE ("vectorizable_bswap");
3191 record_stmt_cost (cost_vec
,
3192 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3193 record_stmt_cost (cost_vec
,
3195 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
,
3196 vec_perm
, stmt_info
, 0, vect_body
);
3200 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3203 vec
<tree
> vec_oprnds
= vNULL
;
3204 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
3206 /* Arguments are ready. create the new vector stmt. */
3209 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3212 tree tem
= make_ssa_name (char_vectype
);
3213 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3214 char_vectype
, vop
));
3215 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3216 tree tem2
= make_ssa_name (char_vectype
);
3217 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3218 tem
, tem
, bswap_vconst
);
3219 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3220 tem
= make_ssa_name (vectype
);
3221 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3223 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3225 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3227 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3231 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3233 vec_oprnds
.release ();
3237 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3238 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3239 in a single step. On success, store the binary pack code in
3243 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3244 tree_code
*convert_code
)
3246 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3247 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3251 int multi_step_cvt
= 0;
3252 auto_vec
<tree
, 8> interm_types
;
3253 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3254 &code
, &multi_step_cvt
, &interm_types
)
3258 *convert_code
= code
;
3262 /* Function vectorizable_call.
3264 Check if STMT_INFO performs a function call that can be vectorized.
3265 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3266 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3267 Return true if STMT_INFO is vectorizable in this way. */
3270 vectorizable_call (vec_info
*vinfo
,
3271 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3272 gimple
**vec_stmt
, slp_tree slp_node
,
3273 stmt_vector_for_cost
*cost_vec
)
3279 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3280 tree vectype_out
, vectype_in
;
3281 poly_uint64 nunits_in
;
3282 poly_uint64 nunits_out
;
3283 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3284 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3285 tree fndecl
, new_temp
, rhs_type
;
3286 enum vect_def_type dt
[4]
3287 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3288 vect_unknown_def_type
};
3289 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3290 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3291 int ndts
= ARRAY_SIZE (dt
);
3293 auto_vec
<tree
, 8> vargs
;
3294 enum { NARROW
, NONE
, WIDEN
} modifier
;
3298 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3301 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3305 /* Is STMT_INFO a vectorizable call? */
3306 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3310 if (gimple_call_internal_p (stmt
)
3311 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3312 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3313 /* Handled by vectorizable_load and vectorizable_store. */
3316 if (gimple_call_lhs (stmt
) == NULL_TREE
3317 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3320 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3322 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3324 /* Process function arguments. */
3325 rhs_type
= NULL_TREE
;
3326 vectype_in
= NULL_TREE
;
3327 nargs
= gimple_call_num_args (stmt
);
3329 /* Bail out if the function has more than four arguments, we do not have
3330 interesting builtin functions to vectorize with more than two arguments
3331 except for fma. No arguments is also not good. */
3332 if (nargs
== 0 || nargs
> 4)
3335 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3336 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3337 if (cfn
== CFN_GOMP_SIMD_LANE
)
3340 rhs_type
= unsigned_type_node
;
3344 if (internal_fn_p (cfn
))
3345 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3347 for (i
= 0; i
< nargs
; i
++)
3349 if ((int) i
== mask_opno
)
3351 if (!vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_opno
,
3352 &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3357 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3358 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3360 if (dump_enabled_p ())
3361 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3362 "use not simple.\n");
3366 /* We can only handle calls with arguments of the same type. */
3368 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3370 if (dump_enabled_p ())
3371 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3372 "argument types differ.\n");
3376 rhs_type
= TREE_TYPE (op
);
3379 vectype_in
= vectypes
[i
];
3380 else if (vectypes
[i
]
3381 && !types_compatible_p (vectypes
[i
], vectype_in
))
3383 if (dump_enabled_p ())
3384 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3385 "argument vector types differ.\n");
3389 /* If all arguments are external or constant defs, infer the vector type
3390 from the scalar type. */
3392 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3394 gcc_assert (vectype_in
);
3397 if (dump_enabled_p ())
3398 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3399 "no vectype for scalar type %T\n", rhs_type
);
3403 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3404 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3405 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3406 by a pack of the two vectors into an SI vector. We would need
3407 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3408 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3410 if (dump_enabled_p ())
3411 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3412 "mismatched vector sizes %T and %T\n",
3413 vectype_in
, vectype_out
);
3417 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3418 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3420 if (dump_enabled_p ())
3421 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3422 "mixed mask and nonmask vector types\n");
3426 if (vect_emulated_vector_p (vectype_in
) || vect_emulated_vector_p (vectype_out
))
3428 if (dump_enabled_p ())
3429 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3430 "use emulated vector type for call\n");
3435 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3436 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3437 if (known_eq (nunits_in
* 2, nunits_out
))
3439 else if (known_eq (nunits_out
, nunits_in
))
3441 else if (known_eq (nunits_out
* 2, nunits_in
))
3446 /* We only handle functions that do not read or clobber memory. */
3447 if (gimple_vuse (stmt
))
3449 if (dump_enabled_p ())
3450 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3451 "function reads from or writes to memory.\n");
3455 /* For now, we only vectorize functions if a target specific builtin
3456 is available. TODO -- in some cases, it might be profitable to
3457 insert the calls for pieces of the vector, in order to be able
3458 to vectorize other operations in the loop. */
3460 internal_fn ifn
= IFN_LAST
;
3461 tree callee
= gimple_call_fndecl (stmt
);
3463 /* First try using an internal function. */
3464 tree_code convert_code
= ERROR_MARK
;
3466 && (modifier
== NONE
3467 || (modifier
== NARROW
3468 && simple_integer_narrowing (vectype_out
, vectype_in
,
3470 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3473 /* If that fails, try asking for a target-specific built-in function. */
3474 if (ifn
== IFN_LAST
)
3476 if (cfn
!= CFN_LAST
)
3477 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3478 (cfn
, vectype_out
, vectype_in
);
3479 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3480 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3481 (callee
, vectype_out
, vectype_in
);
3484 if (ifn
== IFN_LAST
&& !fndecl
)
3486 if (cfn
== CFN_GOMP_SIMD_LANE
3489 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3490 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3491 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3492 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3494 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3495 { 0, 1, 2, ... vf - 1 } vector. */
3496 gcc_assert (nargs
== 0);
3498 else if (modifier
== NONE
3499 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3500 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3501 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3502 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3503 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3504 slp_op
, vectype_in
, cost_vec
);
3507 if (dump_enabled_p ())
3508 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3509 "function is not vectorizable.\n");
3516 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3517 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3519 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3521 /* Sanity check: make sure that at least one copy of the vectorized stmt
3522 needs to be generated. */
3523 gcc_assert (ncopies
>= 1);
3525 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
3526 internal_fn cond_fn
= get_conditional_internal_fn (ifn
);
3527 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3528 if (!vec_stmt
) /* transformation not required. */
3531 for (i
= 0; i
< nargs
; ++i
)
3532 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
],
3534 ? vectypes
[i
] : vectype_in
))
3536 if (dump_enabled_p ())
3537 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3538 "incompatible vector types for invariants\n");
3541 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3542 DUMP_VECT_SCOPE ("vectorizable_call");
3543 vect_model_simple_cost (vinfo
, stmt_info
,
3544 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3545 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3546 record_stmt_cost (cost_vec
, ncopies
/ 2,
3547 vec_promote_demote
, stmt_info
, 0, vect_body
);
3550 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
3551 && (reduc_idx
>= 0 || mask_opno
>= 0))
3554 && (cond_fn
== IFN_LAST
3555 || !direct_internal_fn_supported_p (cond_fn
, vectype_out
,
3556 OPTIMIZE_FOR_SPEED
)))
3558 if (dump_enabled_p ())
3559 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3560 "can't use a fully-masked loop because no"
3561 " conditional operation is available.\n");
3562 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
3566 unsigned int nvectors
3568 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3570 tree scalar_mask
= NULL_TREE
;
3572 scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3573 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3574 vectype_out
, scalar_mask
);
3582 if (dump_enabled_p ())
3583 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3586 scalar_dest
= gimple_call_lhs (stmt
);
3587 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3589 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3590 unsigned int vect_nargs
= nargs
;
3591 if (masked_loop_p
&& reduc_idx
>= 0)
3597 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3599 tree prev_res
= NULL_TREE
;
3600 vargs
.safe_grow (vect_nargs
, true);
3601 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3602 for (j
= 0; j
< ncopies
; ++j
)
3604 /* Build argument list for the vectorized call. */
3607 vec
<tree
> vec_oprnds0
;
3609 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3610 vec_oprnds0
= vec_defs
[0];
3612 /* Arguments are ready. Create the new vector stmt. */
3613 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3616 if (masked_loop_p
&& reduc_idx
>= 0)
3618 unsigned int vec_num
= vec_oprnds0
.length ();
3619 /* Always true for SLP. */
3620 gcc_assert (ncopies
== 1);
3621 vargs
[varg
++] = vect_get_loop_mask (gsi
, masks
, vec_num
,
3625 for (k
= 0; k
< nargs
; k
++)
3627 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3628 vargs
[varg
++] = vec_oprndsk
[i
];
3630 if (masked_loop_p
&& reduc_idx
>= 0)
3631 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3633 if (modifier
== NARROW
)
3635 /* We don't define any narrowing conditional functions
3637 gcc_assert (mask_opno
< 0);
3638 tree half_res
= make_ssa_name (vectype_in
);
3640 = gimple_build_call_internal_vec (ifn
, vargs
);
3641 gimple_call_set_lhs (call
, half_res
);
3642 gimple_call_set_nothrow (call
, true);
3643 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3646 prev_res
= half_res
;
3649 new_temp
= make_ssa_name (vec_dest
);
3650 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3651 prev_res
, half_res
);
3652 vect_finish_stmt_generation (vinfo
, stmt_info
,
3657 if (mask_opno
>= 0 && masked_loop_p
)
3659 unsigned int vec_num
= vec_oprnds0
.length ();
3660 /* Always true for SLP. */
3661 gcc_assert (ncopies
== 1);
3662 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3664 vargs
[mask_opno
] = prepare_vec_mask
3665 (loop_vinfo
, TREE_TYPE (mask
), mask
,
3666 vargs
[mask_opno
], gsi
);
3670 if (ifn
!= IFN_LAST
)
3671 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3673 call
= gimple_build_call_vec (fndecl
, vargs
);
3674 new_temp
= make_ssa_name (vec_dest
, call
);
3675 gimple_call_set_lhs (call
, new_temp
);
3676 gimple_call_set_nothrow (call
, true);
3677 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3680 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3686 if (masked_loop_p
&& reduc_idx
>= 0)
3687 vargs
[varg
++] = vect_get_loop_mask (gsi
, masks
, ncopies
,
3689 for (i
= 0; i
< nargs
; i
++)
3691 op
= gimple_call_arg (stmt
, i
);
3694 vec_defs
.quick_push (vNULL
);
3695 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3699 vargs
[varg
++] = vec_defs
[i
][j
];
3701 if (masked_loop_p
&& reduc_idx
>= 0)
3702 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3704 if (mask_opno
>= 0 && masked_loop_p
)
3706 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3709 = prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
3710 vargs
[mask_opno
], gsi
);
3714 if (cfn
== CFN_GOMP_SIMD_LANE
)
3716 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3718 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3719 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3720 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3721 new_temp
= make_ssa_name (vec_dest
);
3722 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3723 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3725 else if (modifier
== NARROW
)
3727 /* We don't define any narrowing conditional functions at
3729 gcc_assert (mask_opno
< 0);
3730 tree half_res
= make_ssa_name (vectype_in
);
3731 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3732 gimple_call_set_lhs (call
, half_res
);
3733 gimple_call_set_nothrow (call
, true);
3734 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3737 prev_res
= half_res
;
3740 new_temp
= make_ssa_name (vec_dest
);
3741 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3742 prev_res
, half_res
);
3743 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3748 if (ifn
!= IFN_LAST
)
3749 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3751 call
= gimple_build_call_vec (fndecl
, vargs
);
3752 new_temp
= make_ssa_name (vec_dest
, call
);
3753 gimple_call_set_lhs (call
, new_temp
);
3754 gimple_call_set_nothrow (call
, true);
3755 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3759 if (j
== (modifier
== NARROW
? 1 : 0))
3760 *vec_stmt
= new_stmt
;
3761 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3763 for (i
= 0; i
< nargs
; i
++)
3765 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3766 vec_oprndsi
.release ();
3769 else if (modifier
== NARROW
)
3771 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3772 /* We don't define any narrowing conditional functions at present. */
3773 gcc_assert (mask_opno
< 0);
3774 for (j
= 0; j
< ncopies
; ++j
)
3776 /* Build argument list for the vectorized call. */
3778 vargs
.create (nargs
* 2);
3784 vec
<tree
> vec_oprnds0
;
3786 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3787 vec_oprnds0
= vec_defs
[0];
3789 /* Arguments are ready. Create the new vector stmt. */
3790 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3794 for (k
= 0; k
< nargs
; k
++)
3796 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3797 vargs
.quick_push (vec_oprndsk
[i
]);
3798 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3801 if (ifn
!= IFN_LAST
)
3802 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3804 call
= gimple_build_call_vec (fndecl
, vargs
);
3805 new_temp
= make_ssa_name (vec_dest
, call
);
3806 gimple_call_set_lhs (call
, new_temp
);
3807 gimple_call_set_nothrow (call
, true);
3808 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3809 SLP_TREE_VEC_STMTS (slp_node
).quick_push (call
);
3814 for (i
= 0; i
< nargs
; i
++)
3816 op
= gimple_call_arg (stmt
, i
);
3819 vec_defs
.quick_push (vNULL
);
3820 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
3821 op
, &vec_defs
[i
], vectypes
[i
]);
3823 vec_oprnd0
= vec_defs
[i
][2*j
];
3824 vec_oprnd1
= vec_defs
[i
][2*j
+1];
3826 vargs
.quick_push (vec_oprnd0
);
3827 vargs
.quick_push (vec_oprnd1
);
3830 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3831 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3832 gimple_call_set_lhs (new_stmt
, new_temp
);
3833 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3835 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3839 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3841 for (i
= 0; i
< nargs
; i
++)
3843 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3844 vec_oprndsi
.release ();
3848 /* No current target implements this case. */
3853 /* The call in STMT might prevent it from being removed in dce.
3854 We however cannot remove it here, due to the way the ssa name
3855 it defines is mapped to the new definition. So just replace
3856 rhs of the statement with something harmless. */
3861 stmt_info
= vect_orig_stmt (stmt_info
);
3862 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3865 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3866 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3872 struct simd_call_arg_info
3876 HOST_WIDE_INT linear_step
;
3877 enum vect_def_type dt
;
3879 bool simd_lane_linear
;
3882 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3883 is linear within simd lane (but not within whole loop), note it in
3887 vect_simd_lane_linear (tree op
, class loop
*loop
,
3888 struct simd_call_arg_info
*arginfo
)
3890 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3892 if (!is_gimple_assign (def_stmt
)
3893 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3894 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3897 tree base
= gimple_assign_rhs1 (def_stmt
);
3898 HOST_WIDE_INT linear_step
= 0;
3899 tree v
= gimple_assign_rhs2 (def_stmt
);
3900 while (TREE_CODE (v
) == SSA_NAME
)
3903 def_stmt
= SSA_NAME_DEF_STMT (v
);
3904 if (is_gimple_assign (def_stmt
))
3905 switch (gimple_assign_rhs_code (def_stmt
))
3908 t
= gimple_assign_rhs2 (def_stmt
);
3909 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3911 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3912 v
= gimple_assign_rhs1 (def_stmt
);
3915 t
= gimple_assign_rhs2 (def_stmt
);
3916 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3918 linear_step
= tree_to_shwi (t
);
3919 v
= gimple_assign_rhs1 (def_stmt
);
3922 t
= gimple_assign_rhs1 (def_stmt
);
3923 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3924 || (TYPE_PRECISION (TREE_TYPE (v
))
3925 < TYPE_PRECISION (TREE_TYPE (t
))))
3934 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3936 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3937 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3942 arginfo
->linear_step
= linear_step
;
3944 arginfo
->simd_lane_linear
= true;
3950 /* Return the number of elements in vector type VECTYPE, which is associated
3951 with a SIMD clone. At present these vectors always have a constant
3954 static unsigned HOST_WIDE_INT
3955 simd_clone_subparts (tree vectype
)
3957 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3960 /* Function vectorizable_simd_clone_call.
3962 Check if STMT_INFO performs a function call that can be vectorized
3963 by calling a simd clone of the function.
3964 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3965 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3966 Return true if STMT_INFO is vectorizable in this way. */
3969 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3970 gimple_stmt_iterator
*gsi
,
3971 gimple
**vec_stmt
, slp_tree slp_node
,
3972 stmt_vector_for_cost
*)
3977 tree vec_oprnd0
= NULL_TREE
;
3980 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3981 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3982 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3983 tree fndecl
, new_temp
;
3985 auto_vec
<simd_call_arg_info
> arginfo
;
3986 vec
<tree
> vargs
= vNULL
;
3988 tree lhs
, rtype
, ratype
;
3989 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3991 /* Is STMT a vectorizable call? */
3992 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3996 fndecl
= gimple_call_fndecl (stmt
);
3997 if (fndecl
== NULL_TREE
)
4000 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
4001 if (node
== NULL
|| node
->simd_clones
== NULL
)
4004 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4007 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4011 if (gimple_call_lhs (stmt
)
4012 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
4015 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
4017 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4019 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
4026 /* Process function arguments. */
4027 nargs
= gimple_call_num_args (stmt
);
4029 /* Bail out if the function has zero arguments. */
4033 arginfo
.reserve (nargs
, true);
4035 for (i
= 0; i
< nargs
; i
++)
4037 simd_call_arg_info thisarginfo
;
4040 thisarginfo
.linear_step
= 0;
4041 thisarginfo
.align
= 0;
4042 thisarginfo
.op
= NULL_TREE
;
4043 thisarginfo
.simd_lane_linear
= false;
4045 op
= gimple_call_arg (stmt
, i
);
4046 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
4047 &thisarginfo
.vectype
)
4048 || thisarginfo
.dt
== vect_uninitialized_def
)
4050 if (dump_enabled_p ())
4051 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4052 "use not simple.\n");
4056 if (thisarginfo
.dt
== vect_constant_def
4057 || thisarginfo
.dt
== vect_external_def
)
4058 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
4061 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
4062 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo
.vectype
))
4064 if (dump_enabled_p ())
4065 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4066 "vector mask arguments are not supported\n");
4071 /* For linear arguments, the analyze phase should have saved
4072 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4073 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
4074 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
4076 gcc_assert (vec_stmt
);
4077 thisarginfo
.linear_step
4078 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
4080 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
4081 thisarginfo
.simd_lane_linear
4082 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
4083 == boolean_true_node
);
4084 /* If loop has been peeled for alignment, we need to adjust it. */
4085 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
4086 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
4087 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
4089 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
4090 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
4091 tree opt
= TREE_TYPE (thisarginfo
.op
);
4092 bias
= fold_convert (TREE_TYPE (step
), bias
);
4093 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
4095 = fold_build2 (POINTER_TYPE_P (opt
)
4096 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
4097 thisarginfo
.op
, bias
);
4101 && thisarginfo
.dt
!= vect_constant_def
4102 && thisarginfo
.dt
!= vect_external_def
4104 && TREE_CODE (op
) == SSA_NAME
4105 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
4107 && tree_fits_shwi_p (iv
.step
))
4109 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
4110 thisarginfo
.op
= iv
.base
;
4112 else if ((thisarginfo
.dt
== vect_constant_def
4113 || thisarginfo
.dt
== vect_external_def
)
4114 && POINTER_TYPE_P (TREE_TYPE (op
)))
4115 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
4116 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4118 if (POINTER_TYPE_P (TREE_TYPE (op
))
4119 && !thisarginfo
.linear_step
4121 && thisarginfo
.dt
!= vect_constant_def
4122 && thisarginfo
.dt
!= vect_external_def
4125 && TREE_CODE (op
) == SSA_NAME
)
4126 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
4128 arginfo
.quick_push (thisarginfo
);
4131 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4132 if (!vf
.is_constant ())
4134 if (dump_enabled_p ())
4135 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4136 "not considering SIMD clones; not yet supported"
4137 " for variable-width vectors.\n");
4141 unsigned int badness
= 0;
4142 struct cgraph_node
*bestn
= NULL
;
4143 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
4144 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
4146 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4147 n
= n
->simdclone
->next_clone
)
4149 unsigned int this_badness
= 0;
4150 unsigned int num_calls
;
4151 if (!constant_multiple_p (vf
, n
->simdclone
->simdlen
, &num_calls
)
4152 || n
->simdclone
->nargs
!= nargs
)
4155 this_badness
+= exact_log2 (num_calls
) * 4096;
4156 if (n
->simdclone
->inbranch
)
4157 this_badness
+= 8192;
4158 int target_badness
= targetm
.simd_clone
.usable (n
);
4159 if (target_badness
< 0)
4161 this_badness
+= target_badness
* 512;
4162 /* FORNOW: Have to add code to add the mask argument. */
4163 if (n
->simdclone
->inbranch
)
4165 for (i
= 0; i
< nargs
; i
++)
4167 switch (n
->simdclone
->args
[i
].arg_type
)
4169 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4170 if (!useless_type_conversion_p
4171 (n
->simdclone
->args
[i
].orig_type
,
4172 TREE_TYPE (gimple_call_arg (stmt
, i
))))
4174 else if (arginfo
[i
].dt
== vect_constant_def
4175 || arginfo
[i
].dt
== vect_external_def
4176 || arginfo
[i
].linear_step
)
4179 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4180 if (arginfo
[i
].dt
!= vect_constant_def
4181 && arginfo
[i
].dt
!= vect_external_def
)
4184 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4185 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4186 if (arginfo
[i
].dt
== vect_constant_def
4187 || arginfo
[i
].dt
== vect_external_def
4188 || (arginfo
[i
].linear_step
4189 != n
->simdclone
->args
[i
].linear_step
))
4192 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4193 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4194 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4195 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4196 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4197 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4201 case SIMD_CLONE_ARG_TYPE_MASK
:
4204 if (i
== (size_t) -1)
4206 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4211 if (arginfo
[i
].align
)
4212 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4213 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4215 if (i
== (size_t) -1)
4217 if (bestn
== NULL
|| this_badness
< badness
)
4220 badness
= this_badness
;
4227 for (i
= 0; i
< nargs
; i
++)
4228 if ((arginfo
[i
].dt
== vect_constant_def
4229 || arginfo
[i
].dt
== vect_external_def
)
4230 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4232 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
));
4233 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4235 if (arginfo
[i
].vectype
== NULL
4236 || !constant_multiple_p (bestn
->simdclone
->simdlen
,
4237 simd_clone_subparts (arginfo
[i
].vectype
)))
4241 fndecl
= bestn
->decl
;
4242 nunits
= bestn
->simdclone
->simdlen
;
4243 ncopies
= vector_unroll_factor (vf
, nunits
);
4245 /* If the function isn't const, only allow it in simd loops where user
4246 has asserted that at least nunits consecutive iterations can be
4247 performed using SIMD instructions. */
4248 if ((loop
== NULL
|| maybe_lt ((unsigned) loop
->safelen
, nunits
))
4249 && gimple_vuse (stmt
))
4252 /* Sanity check: make sure that at least one copy of the vectorized stmt
4253 needs to be generated. */
4254 gcc_assert (ncopies
>= 1);
4256 if (!vec_stmt
) /* transformation not required. */
4258 /* When the original call is pure or const but the SIMD ABI dictates
4259 an aggregate return we will have to use a virtual definition and
4260 in a loop eventually even need to add a virtual PHI. That's
4261 not straight-forward so allow to fix this up via renaming. */
4262 if (gimple_call_lhs (stmt
)
4263 && !gimple_vdef (stmt
)
4264 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn
->decl
))) == ARRAY_TYPE
)
4265 vinfo
->any_known_not_updated_vssa
= true;
4266 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4267 for (i
= 0; i
< nargs
; i
++)
4268 if ((bestn
->simdclone
->args
[i
].arg_type
4269 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4270 || (bestn
->simdclone
->args
[i
].arg_type
4271 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4273 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4276 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4277 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4278 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4279 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4280 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4281 tree sll
= arginfo
[i
].simd_lane_linear
4282 ? boolean_true_node
: boolean_false_node
;
4283 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4285 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4286 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4287 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4288 dt, slp_node, cost_vec); */
4294 if (dump_enabled_p ())
4295 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4298 scalar_dest
= gimple_call_lhs (stmt
);
4299 vec_dest
= NULL_TREE
;
4304 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4305 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4306 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4309 rtype
= TREE_TYPE (ratype
);
4313 auto_vec
<vec
<tree
> > vec_oprnds
;
4314 auto_vec
<unsigned> vec_oprnds_i
;
4315 vec_oprnds
.safe_grow_cleared (nargs
, true);
4316 vec_oprnds_i
.safe_grow_cleared (nargs
, true);
4317 for (j
= 0; j
< ncopies
; ++j
)
4319 /* Build argument list for the vectorized call. */
4321 vargs
.create (nargs
);
4325 for (i
= 0; i
< nargs
; i
++)
4327 unsigned int k
, l
, m
, o
;
4329 op
= gimple_call_arg (stmt
, i
);
4330 switch (bestn
->simdclone
->args
[i
].arg_type
)
4332 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4333 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4334 o
= vector_unroll_factor (nunits
,
4335 simd_clone_subparts (atype
));
4336 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4338 if (simd_clone_subparts (atype
)
4339 < simd_clone_subparts (arginfo
[i
].vectype
))
4341 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4342 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4343 / simd_clone_subparts (atype
));
4344 gcc_assert ((k
& (k
- 1)) == 0);
4347 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4348 ncopies
* o
/ k
, op
,
4350 vec_oprnds_i
[i
] = 0;
4351 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4355 vec_oprnd0
= arginfo
[i
].op
;
4356 if ((m
& (k
- 1)) == 0)
4357 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4359 arginfo
[i
].op
= vec_oprnd0
;
4361 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4363 bitsize_int ((m
& (k
- 1)) * prec
));
4365 = gimple_build_assign (make_ssa_name (atype
),
4367 vect_finish_stmt_generation (vinfo
, stmt_info
,
4369 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4373 k
= (simd_clone_subparts (atype
)
4374 / simd_clone_subparts (arginfo
[i
].vectype
));
4375 gcc_assert ((k
& (k
- 1)) == 0);
4376 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4378 vec_alloc (ctor_elts
, k
);
4381 for (l
= 0; l
< k
; l
++)
4383 if (m
== 0 && l
== 0)
4385 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4389 vec_oprnds_i
[i
] = 0;
4390 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4393 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4394 arginfo
[i
].op
= vec_oprnd0
;
4397 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4401 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0
),
4405 = build1 (VIEW_CONVERT_EXPR
, atype
, vec_oprnd0
);
4407 = gimple_build_assign (make_ssa_name (atype
),
4409 vect_finish_stmt_generation (vinfo
, stmt_info
,
4411 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4414 vargs
.safe_push (vec_oprnd0
);
4417 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4419 = gimple_build_assign (make_ssa_name (atype
),
4421 vect_finish_stmt_generation (vinfo
, stmt_info
,
4423 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4428 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4429 vargs
.safe_push (op
);
4431 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4432 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4437 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4438 &stmts
, true, NULL_TREE
);
4442 edge pe
= loop_preheader_edge (loop
);
4443 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4444 gcc_assert (!new_bb
);
4446 if (arginfo
[i
].simd_lane_linear
)
4448 vargs
.safe_push (arginfo
[i
].op
);
4451 tree phi_res
= copy_ssa_name (op
);
4452 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4453 add_phi_arg (new_phi
, arginfo
[i
].op
,
4454 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4456 = POINTER_TYPE_P (TREE_TYPE (op
))
4457 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4458 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4459 ? sizetype
: TREE_TYPE (op
);
4461 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4463 tree tcst
= wide_int_to_tree (type
, cst
);
4464 tree phi_arg
= copy_ssa_name (op
);
4466 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4467 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4468 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4469 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4471 arginfo
[i
].op
= phi_res
;
4472 vargs
.safe_push (phi_res
);
4477 = POINTER_TYPE_P (TREE_TYPE (op
))
4478 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4479 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4480 ? sizetype
: TREE_TYPE (op
);
4482 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4484 tree tcst
= wide_int_to_tree (type
, cst
);
4485 new_temp
= make_ssa_name (TREE_TYPE (op
));
4487 = gimple_build_assign (new_temp
, code
,
4488 arginfo
[i
].op
, tcst
);
4489 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4490 vargs
.safe_push (new_temp
);
4493 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4494 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4495 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4496 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4497 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4498 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4504 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4508 || known_eq (simd_clone_subparts (rtype
), nunits
));
4510 new_temp
= create_tmp_var (ratype
);
4511 else if (useless_type_conversion_p (vectype
, rtype
))
4512 new_temp
= make_ssa_name (vec_dest
, new_call
);
4514 new_temp
= make_ssa_name (rtype
, new_call
);
4515 gimple_call_set_lhs (new_call
, new_temp
);
4517 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4518 gimple
*new_stmt
= new_call
;
4522 if (!multiple_p (simd_clone_subparts (vectype
), nunits
))
4525 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4526 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4527 k
= vector_unroll_factor (nunits
,
4528 simd_clone_subparts (vectype
));
4529 gcc_assert ((k
& (k
- 1)) == 0);
4530 for (l
= 0; l
< k
; l
++)
4535 t
= build_fold_addr_expr (new_temp
);
4536 t
= build2 (MEM_REF
, vectype
, t
,
4537 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4540 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4541 bitsize_int (prec
), bitsize_int (l
* prec
));
4542 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4543 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4545 if (j
== 0 && l
== 0)
4546 *vec_stmt
= new_stmt
;
4547 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4551 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4554 else if (!multiple_p (nunits
, simd_clone_subparts (vectype
)))
4556 unsigned int k
= (simd_clone_subparts (vectype
)
4557 / simd_clone_subparts (rtype
));
4558 gcc_assert ((k
& (k
- 1)) == 0);
4559 if ((j
& (k
- 1)) == 0)
4560 vec_alloc (ret_ctor_elts
, k
);
4564 o
= vector_unroll_factor (nunits
,
4565 simd_clone_subparts (rtype
));
4566 for (m
= 0; m
< o
; m
++)
4568 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4569 size_int (m
), NULL_TREE
, NULL_TREE
);
4570 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4572 vect_finish_stmt_generation (vinfo
, stmt_info
,
4574 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4575 gimple_assign_lhs (new_stmt
));
4577 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4580 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4581 if ((j
& (k
- 1)) != k
- 1)
4583 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4585 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4586 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4588 if ((unsigned) j
== k
- 1)
4589 *vec_stmt
= new_stmt
;
4590 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4595 tree t
= build_fold_addr_expr (new_temp
);
4596 t
= build2 (MEM_REF
, vectype
, t
,
4597 build_int_cst (TREE_TYPE (t
), 0));
4598 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4599 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4600 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4602 else if (!useless_type_conversion_p (vectype
, rtype
))
4604 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
4606 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4607 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4612 *vec_stmt
= new_stmt
;
4613 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4616 for (i
= 0; i
< nargs
; ++i
)
4618 vec
<tree
> oprndsi
= vec_oprnds
[i
];
4623 /* The call in STMT might prevent it from being removed in dce.
4624 We however cannot remove it here, due to the way the ssa name
4625 it defines is mapped to the new definition. So just replace
4626 rhs of the statement with something harmless. */
4634 type
= TREE_TYPE (scalar_dest
);
4635 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4636 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4639 new_stmt
= gimple_build_nop ();
4640 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4641 unlink_stmt_vdef (stmt
);
4647 /* Function vect_gen_widened_results_half
4649 Create a vector stmt whose code, type, number of arguments, and result
4650 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4651 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4652 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4653 needs to be created (DECL is a function-decl of a target-builtin).
4654 STMT_INFO is the original scalar stmt that we are vectorizing. */
4657 vect_gen_widened_results_half (vec_info
*vinfo
, enum tree_code code
,
4658 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4659 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4660 stmt_vec_info stmt_info
)
4665 /* Generate half of the widened result: */
4666 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4667 if (op_type
!= binary_op
)
4669 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4670 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4671 gimple_assign_set_lhs (new_stmt
, new_temp
);
4672 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4678 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4679 For multi-step conversions store the resulting vectors and call the function
4683 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4685 stmt_vec_info stmt_info
,
4686 vec
<tree
> &vec_dsts
,
4687 gimple_stmt_iterator
*gsi
,
4688 slp_tree slp_node
, enum tree_code code
)
4691 tree vop0
, vop1
, new_tmp
, vec_dest
;
4693 vec_dest
= vec_dsts
.pop ();
4695 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4697 /* Create demotion operation. */
4698 vop0
= (*vec_oprnds
)[i
];
4699 vop1
= (*vec_oprnds
)[i
+ 1];
4700 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4701 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4702 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4703 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4706 /* Store the resulting vector for next recursive call. */
4707 (*vec_oprnds
)[i
/2] = new_tmp
;
4710 /* This is the last step of the conversion sequence. Store the
4711 vectors in SLP_NODE or in vector info of the scalar statement
4712 (or in STMT_VINFO_RELATED_STMT chain). */
4714 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4716 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4720 /* For multi-step demotion operations we first generate demotion operations
4721 from the source type to the intermediate types, and then combine the
4722 results (stored in VEC_OPRNDS) in demotion operation to the destination
4726 /* At each level of recursion we have half of the operands we had at the
4728 vec_oprnds
->truncate ((i
+1)/2);
4729 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
4731 stmt_info
, vec_dsts
, gsi
,
4732 slp_node
, VEC_PACK_TRUNC_EXPR
);
4735 vec_dsts
.quick_push (vec_dest
);
4739 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4740 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4741 STMT_INFO. For multi-step conversions store the resulting vectors and
4742 call the function recursively. */
4745 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
4746 vec
<tree
> *vec_oprnds0
,
4747 vec
<tree
> *vec_oprnds1
,
4748 stmt_vec_info stmt_info
, tree vec_dest
,
4749 gimple_stmt_iterator
*gsi
,
4750 enum tree_code code1
,
4751 enum tree_code code2
, int op_type
)
4754 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4755 gimple
*new_stmt1
, *new_stmt2
;
4756 vec
<tree
> vec_tmp
= vNULL
;
4758 vec_tmp
.create (vec_oprnds0
->length () * 2);
4759 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4761 if (op_type
== binary_op
)
4762 vop1
= (*vec_oprnds1
)[i
];
4766 /* Generate the two halves of promotion operation. */
4767 new_stmt1
= vect_gen_widened_results_half (vinfo
, code1
, vop0
, vop1
,
4768 op_type
, vec_dest
, gsi
,
4770 new_stmt2
= vect_gen_widened_results_half (vinfo
, code2
, vop0
, vop1
,
4771 op_type
, vec_dest
, gsi
,
4773 if (is_gimple_call (new_stmt1
))
4775 new_tmp1
= gimple_call_lhs (new_stmt1
);
4776 new_tmp2
= gimple_call_lhs (new_stmt2
);
4780 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4781 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4784 /* Store the results for the next step. */
4785 vec_tmp
.quick_push (new_tmp1
);
4786 vec_tmp
.quick_push (new_tmp2
);
4789 vec_oprnds0
->release ();
4790 *vec_oprnds0
= vec_tmp
;
4793 /* Create vectorized promotion stmts for widening stmts using only half the
4794 potential vector size for input. */
4796 vect_create_half_widening_stmts (vec_info
*vinfo
,
4797 vec
<tree
> *vec_oprnds0
,
4798 vec
<tree
> *vec_oprnds1
,
4799 stmt_vec_info stmt_info
, tree vec_dest
,
4800 gimple_stmt_iterator
*gsi
,
4801 enum tree_code code1
,
4809 vec
<tree
> vec_tmp
= vNULL
;
4811 vec_tmp
.create (vec_oprnds0
->length ());
4812 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4814 tree new_tmp1
, new_tmp2
, new_tmp3
, out_type
;
4816 gcc_assert (op_type
== binary_op
);
4817 vop1
= (*vec_oprnds1
)[i
];
4819 /* Widen the first vector input. */
4820 out_type
= TREE_TYPE (vec_dest
);
4821 new_tmp1
= make_ssa_name (out_type
);
4822 new_stmt1
= gimple_build_assign (new_tmp1
, NOP_EXPR
, vop0
);
4823 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt1
, gsi
);
4824 if (VECTOR_TYPE_P (TREE_TYPE (vop1
)))
4826 /* Widen the second vector input. */
4827 new_tmp2
= make_ssa_name (out_type
);
4828 new_stmt2
= gimple_build_assign (new_tmp2
, NOP_EXPR
, vop1
);
4829 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt2
, gsi
);
4830 /* Perform the operation. With both vector inputs widened. */
4831 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, new_tmp2
);
4835 /* Perform the operation. With the single vector input widened. */
4836 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, vop1
);
4839 new_tmp3
= make_ssa_name (vec_dest
, new_stmt3
);
4840 gimple_assign_set_lhs (new_stmt3
, new_tmp3
);
4841 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt3
, gsi
);
4843 /* Store the results for the next step. */
4844 vec_tmp
.quick_push (new_tmp3
);
4847 vec_oprnds0
->release ();
4848 *vec_oprnds0
= vec_tmp
;
4852 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4853 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4854 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4855 Return true if STMT_INFO is vectorizable in this way. */
4858 vectorizable_conversion (vec_info
*vinfo
,
4859 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4860 gimple
**vec_stmt
, slp_tree slp_node
,
4861 stmt_vector_for_cost
*cost_vec
)
4865 tree op0
, op1
= NULL_TREE
;
4866 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4867 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4868 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4870 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4872 poly_uint64 nunits_in
;
4873 poly_uint64 nunits_out
;
4874 tree vectype_out
, vectype_in
;
4876 tree lhs_type
, rhs_type
;
4877 enum { NARROW
, NONE
, WIDEN
} modifier
;
4878 vec
<tree
> vec_oprnds0
= vNULL
;
4879 vec
<tree
> vec_oprnds1
= vNULL
;
4881 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4882 int multi_step_cvt
= 0;
4883 vec
<tree
> interm_types
= vNULL
;
4884 tree intermediate_type
, cvt_type
= NULL_TREE
;
4886 unsigned short fltsz
;
4888 /* Is STMT a vectorizable conversion? */
4890 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4893 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4897 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4901 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4904 code
= gimple_assign_rhs_code (stmt
);
4905 if (!CONVERT_EXPR_CODE_P (code
)
4906 && code
!= FIX_TRUNC_EXPR
4907 && code
!= FLOAT_EXPR
4908 && code
!= WIDEN_PLUS_EXPR
4909 && code
!= WIDEN_MINUS_EXPR
4910 && code
!= WIDEN_MULT_EXPR
4911 && code
!= WIDEN_LSHIFT_EXPR
)
4914 bool widen_arith
= (code
== WIDEN_PLUS_EXPR
4915 || code
== WIDEN_MINUS_EXPR
4916 || code
== WIDEN_MULT_EXPR
4917 || code
== WIDEN_LSHIFT_EXPR
);
4918 op_type
= TREE_CODE_LENGTH (code
);
4920 /* Check types of lhs and rhs. */
4921 scalar_dest
= gimple_assign_lhs (stmt
);
4922 lhs_type
= TREE_TYPE (scalar_dest
);
4923 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4925 /* Check the operands of the operation. */
4926 slp_tree slp_op0
, slp_op1
= NULL
;
4927 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
4928 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
4930 if (dump_enabled_p ())
4931 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4932 "use not simple.\n");
4936 rhs_type
= TREE_TYPE (op0
);
4937 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4938 && !((INTEGRAL_TYPE_P (lhs_type
)
4939 && INTEGRAL_TYPE_P (rhs_type
))
4940 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4941 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4944 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4945 && ((INTEGRAL_TYPE_P (lhs_type
)
4946 && !type_has_mode_precision_p (lhs_type
))
4947 || (INTEGRAL_TYPE_P (rhs_type
)
4948 && !type_has_mode_precision_p (rhs_type
))))
4950 if (dump_enabled_p ())
4951 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4952 "type conversion to/from bit-precision unsupported."
4957 if (op_type
== binary_op
)
4959 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
4960 || code
== WIDEN_PLUS_EXPR
|| code
== WIDEN_MINUS_EXPR
);
4962 op1
= gimple_assign_rhs2 (stmt
);
4964 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
4965 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
4967 if (dump_enabled_p ())
4968 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4969 "use not simple.\n");
4972 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4975 vectype_in
= vectype1_in
;
4978 /* If op0 is an external or constant def, infer the vector type
4979 from the scalar type. */
4981 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
4983 gcc_assert (vectype_in
);
4986 if (dump_enabled_p ())
4987 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4988 "no vectype for scalar type %T\n", rhs_type
);
4993 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4994 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4996 if (dump_enabled_p ())
4997 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4998 "can't convert between boolean and non "
4999 "boolean vectors %T\n", rhs_type
);
5004 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
5005 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5006 if (known_eq (nunits_out
, nunits_in
))
5011 else if (multiple_p (nunits_out
, nunits_in
))
5015 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
5019 /* Multiple types in SLP are handled by creating the appropriate number of
5020 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5024 else if (modifier
== NARROW
)
5025 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
5027 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
5029 /* Sanity check: make sure that at least one copy of the vectorized stmt
5030 needs to be generated. */
5031 gcc_assert (ncopies
>= 1);
5033 bool found_mode
= false;
5034 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
5035 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
5036 opt_scalar_mode rhs_mode_iter
;
5038 /* Supportable by target? */
5042 if (code
!= FIX_TRUNC_EXPR
5043 && code
!= FLOAT_EXPR
5044 && !CONVERT_EXPR_CODE_P (code
))
5046 if (supportable_convert_operation (code
, vectype_out
, vectype_in
, &code1
))
5050 if (dump_enabled_p ())
5051 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5052 "conversion not supported by target.\n");
5056 if (known_eq (nunits_in
, nunits_out
))
5058 if (!supportable_half_widening_operation (code
, vectype_out
,
5059 vectype_in
, &code1
))
5061 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5064 if (supportable_widening_operation (vinfo
, code
, stmt_info
,
5065 vectype_out
, vectype_in
, &code1
,
5066 &code2
, &multi_step_cvt
,
5069 /* Binary widening operation can only be supported directly by the
5071 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5075 if (code
!= FLOAT_EXPR
5076 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
5079 fltsz
= GET_MODE_SIZE (lhs_mode
);
5080 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
5082 rhs_mode
= rhs_mode_iter
.require ();
5083 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
5087 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5088 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5089 if (cvt_type
== NULL_TREE
)
5092 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5094 if (!supportable_convert_operation (code
, vectype_out
,
5095 cvt_type
, &codecvt1
))
5098 else if (!supportable_widening_operation (vinfo
, code
, stmt_info
,
5099 vectype_out
, cvt_type
,
5100 &codecvt1
, &codecvt2
,
5105 gcc_assert (multi_step_cvt
== 0);
5107 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
5109 vectype_in
, &code1
, &code2
,
5110 &multi_step_cvt
, &interm_types
))
5120 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5121 codecvt2
= ERROR_MARK
;
5125 interm_types
.safe_push (cvt_type
);
5126 cvt_type
= NULL_TREE
;
5131 gcc_assert (op_type
== unary_op
);
5132 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
5133 &code1
, &multi_step_cvt
,
5137 if (code
!= FIX_TRUNC_EXPR
5138 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
5142 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5143 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5144 if (cvt_type
== NULL_TREE
)
5146 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
5149 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
5150 &code1
, &multi_step_cvt
,
5159 if (!vec_stmt
) /* transformation not required. */
5162 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
5163 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
5165 if (dump_enabled_p ())
5166 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5167 "incompatible vector types for invariants\n");
5170 DUMP_VECT_SCOPE ("vectorizable_conversion");
5171 if (modifier
== NONE
)
5173 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
5174 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5177 else if (modifier
== NARROW
)
5179 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
5180 /* The final packing step produces one vector result per copy. */
5181 unsigned int nvectors
5182 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
5183 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5184 multi_step_cvt
, cost_vec
,
5189 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5190 /* The initial unpacking step produces two vector results
5191 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5192 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5193 unsigned int nvectors
5195 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5197 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5198 multi_step_cvt
, cost_vec
,
5201 interm_types
.release ();
5206 if (dump_enabled_p ())
5207 dump_printf_loc (MSG_NOTE
, vect_location
,
5208 "transform conversion. ncopies = %d.\n", ncopies
);
5210 if (op_type
== binary_op
)
5212 if (CONSTANT_CLASS_P (op0
))
5213 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5214 else if (CONSTANT_CLASS_P (op1
))
5215 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5218 /* In case of multi-step conversion, we first generate conversion operations
5219 to the intermediate types, and then from that types to the final one.
5220 We create vector destinations for the intermediate type (TYPES) received
5221 from supportable_*_operation, and store them in the correct order
5222 for future use in vect_create_vectorized_*_stmts (). */
5223 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5224 vec_dest
= vect_create_destination_var (scalar_dest
,
5225 (cvt_type
&& modifier
== WIDEN
)
5226 ? cvt_type
: vectype_out
);
5227 vec_dsts
.quick_push (vec_dest
);
5231 for (i
= interm_types
.length () - 1;
5232 interm_types
.iterate (i
, &intermediate_type
); i
--)
5234 vec_dest
= vect_create_destination_var (scalar_dest
,
5236 vec_dsts
.quick_push (vec_dest
);
5241 vec_dest
= vect_create_destination_var (scalar_dest
,
5243 ? vectype_out
: cvt_type
);
5248 if (modifier
== WIDEN
)
5250 else if (modifier
== NARROW
)
5253 ninputs
= vect_pow2 (multi_step_cvt
);
5261 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5263 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5265 /* Arguments are ready, create the new vector stmt. */
5266 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5267 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
5268 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5269 gimple_assign_set_lhs (new_stmt
, new_temp
);
5270 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5273 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5275 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5280 /* In case the vectorization factor (VF) is bigger than the number
5281 of elements that we can fit in a vectype (nunits), we have to
5282 generate more than one vector stmt - i.e - we need to "unroll"
5283 the vector stmt by a factor VF/nunits. */
5284 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5286 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
5288 if (code
== WIDEN_LSHIFT_EXPR
)
5290 int oprnds_size
= vec_oprnds0
.length ();
5291 vec_oprnds1
.create (oprnds_size
);
5292 for (i
= 0; i
< oprnds_size
; ++i
)
5293 vec_oprnds1
.quick_push (op1
);
5295 /* Arguments are ready. Create the new vector stmts. */
5296 for (i
= multi_step_cvt
; i
>= 0; i
--)
5298 tree this_dest
= vec_dsts
[i
];
5299 enum tree_code c1
= code1
, c2
= code2
;
5300 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5305 if (known_eq (nunits_out
, nunits_in
))
5306 vect_create_half_widening_stmts (vinfo
, &vec_oprnds0
,
5307 &vec_oprnds1
, stmt_info
,
5311 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5312 &vec_oprnds1
, stmt_info
,
5317 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5322 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5323 new_temp
= make_ssa_name (vec_dest
);
5324 new_stmt
= gimple_build_assign (new_temp
, codecvt1
, vop0
);
5325 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5328 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
5331 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5333 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5338 /* In case the vectorization factor (VF) is bigger than the number
5339 of elements that we can fit in a vectype (nunits), we have to
5340 generate more than one vector stmt - i.e - we need to "unroll"
5341 the vector stmt by a factor VF/nunits. */
5342 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5344 /* Arguments are ready. Create the new vector stmts. */
5346 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5348 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5349 new_temp
= make_ssa_name (vec_dest
);
5351 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5352 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5353 vec_oprnds0
[i
] = new_temp
;
5356 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5358 stmt_info
, vec_dsts
, gsi
,
5363 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5365 vec_oprnds0
.release ();
5366 vec_oprnds1
.release ();
5367 interm_types
.release ();
5372 /* Return true if we can assume from the scalar form of STMT_INFO that
5373 neither the scalar nor the vector forms will generate code. STMT_INFO
5374 is known not to involve a data reference. */
5377 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5379 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5383 tree lhs
= gimple_assign_lhs (stmt
);
5384 tree_code code
= gimple_assign_rhs_code (stmt
);
5385 tree rhs
= gimple_assign_rhs1 (stmt
);
5387 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5390 if (CONVERT_EXPR_CODE_P (code
))
5391 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5396 /* Function vectorizable_assignment.
5398 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5399 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5400 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5401 Return true if STMT_INFO is vectorizable in this way. */
5404 vectorizable_assignment (vec_info
*vinfo
,
5405 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5406 gimple
**vec_stmt
, slp_tree slp_node
,
5407 stmt_vector_for_cost
*cost_vec
)
5412 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5414 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5418 vec
<tree
> vec_oprnds
= vNULL
;
5420 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5421 enum tree_code code
;
5424 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5427 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5431 /* Is vectorizable assignment? */
5432 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5436 scalar_dest
= gimple_assign_lhs (stmt
);
5437 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5440 if (STMT_VINFO_DATA_REF (stmt_info
))
5443 code
= gimple_assign_rhs_code (stmt
);
5444 if (!(gimple_assign_single_p (stmt
)
5445 || code
== PAREN_EXPR
5446 || CONVERT_EXPR_CODE_P (code
)))
5449 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5450 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5452 /* Multiple types in SLP are handled by creating the appropriate number of
5453 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5458 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5460 gcc_assert (ncopies
>= 1);
5463 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5464 &dt
[0], &vectype_in
))
5466 if (dump_enabled_p ())
5467 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5468 "use not simple.\n");
5472 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5474 /* We can handle NOP_EXPR conversions that do not change the number
5475 of elements or the vector size. */
5476 if ((CONVERT_EXPR_CODE_P (code
)
5477 || code
== VIEW_CONVERT_EXPR
)
5479 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5480 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5481 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5484 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
5485 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5487 if (dump_enabled_p ())
5488 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5489 "can't convert between boolean and non "
5490 "boolean vectors %T\n", TREE_TYPE (op
));
5495 /* We do not handle bit-precision changes. */
5496 if ((CONVERT_EXPR_CODE_P (code
)
5497 || code
== VIEW_CONVERT_EXPR
)
5498 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5499 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5500 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5501 /* But a conversion that does not change the bit-pattern is ok. */
5502 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5503 > TYPE_PRECISION (TREE_TYPE (op
)))
5504 && TYPE_UNSIGNED (TREE_TYPE (op
))))
5506 if (dump_enabled_p ())
5507 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5508 "type conversion to/from bit-precision "
5513 if (!vec_stmt
) /* transformation not required. */
5516 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
5518 if (dump_enabled_p ())
5519 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5520 "incompatible vector types for invariants\n");
5523 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5524 DUMP_VECT_SCOPE ("vectorizable_assignment");
5525 if (!vect_nop_conversion_p (stmt_info
))
5526 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5532 if (dump_enabled_p ())
5533 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5536 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5539 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
5541 /* Arguments are ready. create the new vector stmt. */
5542 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5544 if (CONVERT_EXPR_CODE_P (code
)
5545 || code
== VIEW_CONVERT_EXPR
)
5546 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5547 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5548 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5549 gimple_assign_set_lhs (new_stmt
, new_temp
);
5550 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5552 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5554 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5557 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5559 vec_oprnds
.release ();
5564 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5565 either as shift by a scalar or by a vector. */
5568 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5571 machine_mode vec_mode
;
5576 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5580 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5582 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5584 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5586 || (optab_handler (optab
, TYPE_MODE (vectype
))
5587 == CODE_FOR_nothing
))
5591 vec_mode
= TYPE_MODE (vectype
);
5592 icode
= (int) optab_handler (optab
, vec_mode
);
5593 if (icode
== CODE_FOR_nothing
)
5600 /* Function vectorizable_shift.
5602 Check if STMT_INFO performs a shift operation that can be vectorized.
5603 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5604 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5605 Return true if STMT_INFO is vectorizable in this way. */
5608 vectorizable_shift (vec_info
*vinfo
,
5609 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5610 gimple
**vec_stmt
, slp_tree slp_node
,
5611 stmt_vector_for_cost
*cost_vec
)
5615 tree op0
, op1
= NULL
;
5616 tree vec_oprnd1
= NULL_TREE
;
5618 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5619 enum tree_code code
;
5620 machine_mode vec_mode
;
5624 machine_mode optab_op2_mode
;
5625 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5627 poly_uint64 nunits_in
;
5628 poly_uint64 nunits_out
;
5633 vec
<tree
> vec_oprnds0
= vNULL
;
5634 vec
<tree
> vec_oprnds1
= vNULL
;
5637 bool scalar_shift_arg
= true;
5638 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5639 bool incompatible_op1_vectype_p
= false;
5641 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5644 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5645 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5649 /* Is STMT a vectorizable binary/unary operation? */
5650 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5654 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5657 code
= gimple_assign_rhs_code (stmt
);
5659 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5660 || code
== RROTATE_EXPR
))
5663 scalar_dest
= gimple_assign_lhs (stmt
);
5664 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5665 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5667 if (dump_enabled_p ())
5668 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5669 "bit-precision shifts not supported.\n");
5674 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5675 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5677 if (dump_enabled_p ())
5678 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5679 "use not simple.\n");
5682 /* If op0 is an external or constant def, infer the vector type
5683 from the scalar type. */
5685 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
5687 gcc_assert (vectype
);
5690 if (dump_enabled_p ())
5691 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5692 "no vectype for scalar type\n");
5696 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5697 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5698 if (maybe_ne (nunits_out
, nunits_in
))
5701 stmt_vec_info op1_def_stmt_info
;
5703 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
5704 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
5706 if (dump_enabled_p ())
5707 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5708 "use not simple.\n");
5712 /* Multiple types in SLP are handled by creating the appropriate number of
5713 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5718 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5720 gcc_assert (ncopies
>= 1);
5722 /* Determine whether the shift amount is a vector, or scalar. If the
5723 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5725 if ((dt
[1] == vect_internal_def
5726 || dt
[1] == vect_induction_def
5727 || dt
[1] == vect_nested_cycle
)
5729 scalar_shift_arg
= false;
5730 else if (dt
[1] == vect_constant_def
5731 || dt
[1] == vect_external_def
5732 || dt
[1] == vect_internal_def
)
5734 /* In SLP, need to check whether the shift count is the same,
5735 in loops if it is a constant or invariant, it is always
5739 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5740 stmt_vec_info slpstmt_info
;
5742 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5744 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5745 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5746 scalar_shift_arg
= false;
5749 /* For internal SLP defs we have to make sure we see scalar stmts
5750 for all vector elements.
5751 ??? For different vectors we could resort to a different
5752 scalar shift operand but code-generation below simply always
5754 if (dt
[1] == vect_internal_def
5755 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5757 scalar_shift_arg
= false;
5760 /* If the shift amount is computed by a pattern stmt we cannot
5761 use the scalar amount directly thus give up and use a vector
5763 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5764 scalar_shift_arg
= false;
5768 if (dump_enabled_p ())
5769 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5770 "operand mode requires invariant argument.\n");
5774 /* Vector shifted by vector. */
5775 bool was_scalar_shift_arg
= scalar_shift_arg
;
5776 if (!scalar_shift_arg
)
5778 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5779 if (dump_enabled_p ())
5780 dump_printf_loc (MSG_NOTE
, vect_location
,
5781 "vector/vector shift/rotate found.\n");
5784 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
5786 incompatible_op1_vectype_p
5787 = (op1_vectype
== NULL_TREE
5788 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5789 TYPE_VECTOR_SUBPARTS (vectype
))
5790 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5791 if (incompatible_op1_vectype_p
5793 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
5794 || slp_op1
->refcnt
!= 1))
5796 if (dump_enabled_p ())
5797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5798 "unusable type for last operand in"
5799 " vector/vector shift/rotate.\n");
5803 /* See if the machine has a vector shifted by scalar insn and if not
5804 then see if it has a vector shifted by vector insn. */
5807 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5809 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5811 if (dump_enabled_p ())
5812 dump_printf_loc (MSG_NOTE
, vect_location
,
5813 "vector/scalar shift/rotate found.\n");
5817 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5819 && (optab_handler (optab
, TYPE_MODE (vectype
))
5820 != CODE_FOR_nothing
))
5822 scalar_shift_arg
= false;
5824 if (dump_enabled_p ())
5825 dump_printf_loc (MSG_NOTE
, vect_location
,
5826 "vector/vector shift/rotate found.\n");
5829 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
5833 /* Unlike the other binary operators, shifts/rotates have
5834 the rhs being int, instead of the same type as the lhs,
5835 so make sure the scalar is the right type if we are
5836 dealing with vectors of long long/long/short/char. */
5837 incompatible_op1_vectype_p
5839 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
5841 if (incompatible_op1_vectype_p
5842 && dt
[1] == vect_internal_def
)
5844 if (dump_enabled_p ())
5845 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5846 "unusable type for last operand in"
5847 " vector/vector shift/rotate.\n");
5854 /* Supportable by target? */
5857 if (dump_enabled_p ())
5858 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5862 vec_mode
= TYPE_MODE (vectype
);
5863 icode
= (int) optab_handler (optab
, vec_mode
);
5864 if (icode
== CODE_FOR_nothing
)
5866 if (dump_enabled_p ())
5867 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5868 "op not supported by target.\n");
5871 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5872 if (vect_emulated_vector_p (vectype
))
5875 if (!vec_stmt
) /* transformation not required. */
5878 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5879 || ((!scalar_shift_arg
|| dt
[1] == vect_internal_def
)
5880 && (!incompatible_op1_vectype_p
5881 || dt
[1] == vect_constant_def
)
5882 && !vect_maybe_update_slp_op_vectype
5884 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
5886 if (dump_enabled_p ())
5887 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5888 "incompatible vector types for invariants\n");
5891 /* Now adjust the constant shift amount in place. */
5893 && incompatible_op1_vectype_p
5894 && dt
[1] == vect_constant_def
)
5896 for (unsigned i
= 0;
5897 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
5899 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
5900 = fold_convert (TREE_TYPE (vectype
),
5901 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
5902 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
5906 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5907 DUMP_VECT_SCOPE ("vectorizable_shift");
5908 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
5909 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
5915 if (dump_enabled_p ())
5916 dump_printf_loc (MSG_NOTE
, vect_location
,
5917 "transform binary/unary operation.\n");
5919 if (incompatible_op1_vectype_p
&& !slp_node
)
5921 gcc_assert (!scalar_shift_arg
&& was_scalar_shift_arg
);
5922 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5923 if (dt
[1] != vect_constant_def
)
5924 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
5925 TREE_TYPE (vectype
), NULL
);
5929 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5931 if (scalar_shift_arg
&& dt
[1] != vect_internal_def
)
5933 /* Vector shl and shr insn patterns can be defined with scalar
5934 operand 2 (shift operand). In this case, use constant or loop
5935 invariant op1 directly, without extending it to vector mode
5937 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5938 if (!VECTOR_MODE_P (optab_op2_mode
))
5940 if (dump_enabled_p ())
5941 dump_printf_loc (MSG_NOTE
, vect_location
,
5942 "operand 1 using scalar mode.\n");
5944 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
5945 vec_oprnds1
.quick_push (vec_oprnd1
);
5946 /* Store vec_oprnd1 for every vector stmt to be created.
5947 We check during the analysis that all the shift arguments
5949 TODO: Allow different constants for different vector
5950 stmts generated for an SLP instance. */
5952 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
5953 vec_oprnds1
.quick_push (vec_oprnd1
);
5956 else if (!scalar_shift_arg
&& slp_node
&& incompatible_op1_vectype_p
)
5958 if (was_scalar_shift_arg
)
5960 /* If the argument was the same in all lanes create
5961 the correctly typed vector shift amount directly. */
5962 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5963 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
5964 !loop_vinfo
? gsi
: NULL
);
5965 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
5966 !loop_vinfo
? gsi
: NULL
);
5967 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5968 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
5969 vec_oprnds1
.quick_push (vec_oprnd1
);
5971 else if (dt
[1] == vect_constant_def
)
5972 /* The constant shift amount has been adjusted in place. */
5975 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
5978 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5979 (a special case for certain kind of vector shifts); otherwise,
5980 operand 1 should be of a vector type (the usual case). */
5981 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5983 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
5985 /* Arguments are ready. Create the new vector stmt. */
5986 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5988 /* For internal defs where we need to use a scalar shift arg
5989 extract the first lane. */
5990 if (scalar_shift_arg
&& dt
[1] == vect_internal_def
)
5992 vop1
= vec_oprnds1
[0];
5993 new_temp
= make_ssa_name (TREE_TYPE (TREE_TYPE (vop1
)));
5995 = gimple_build_assign (new_temp
,
5996 build3 (BIT_FIELD_REF
, TREE_TYPE (new_temp
),
5998 TYPE_SIZE (TREE_TYPE (new_temp
)),
5999 bitsize_zero_node
));
6000 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6004 vop1
= vec_oprnds1
[i
];
6005 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
6006 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6007 gimple_assign_set_lhs (new_stmt
, new_temp
);
6008 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6010 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6012 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6016 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6018 vec_oprnds0
.release ();
6019 vec_oprnds1
.release ();
6025 /* Function vectorizable_operation.
6027 Check if STMT_INFO performs a binary, unary or ternary operation that can
6029 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6030 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6031 Return true if STMT_INFO is vectorizable in this way. */
6034 vectorizable_operation (vec_info
*vinfo
,
6035 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6036 gimple
**vec_stmt
, slp_tree slp_node
,
6037 stmt_vector_for_cost
*cost_vec
)
6041 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
6043 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6044 enum tree_code code
, orig_code
;
6045 machine_mode vec_mode
;
6049 bool target_support_p
;
6050 enum vect_def_type dt
[3]
6051 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
6053 poly_uint64 nunits_in
;
6054 poly_uint64 nunits_out
;
6056 int ncopies
, vec_num
;
6058 vec
<tree
> vec_oprnds0
= vNULL
;
6059 vec
<tree
> vec_oprnds1
= vNULL
;
6060 vec
<tree
> vec_oprnds2
= vNULL
;
6061 tree vop0
, vop1
, vop2
;
6062 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6064 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6067 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6071 /* Is STMT a vectorizable binary/unary operation? */
6072 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6076 /* Loads and stores are handled in vectorizable_{load,store}. */
6077 if (STMT_VINFO_DATA_REF (stmt_info
))
6080 orig_code
= code
= gimple_assign_rhs_code (stmt
);
6082 /* Shifts are handled in vectorizable_shift. */
6083 if (code
== LSHIFT_EXPR
6084 || code
== RSHIFT_EXPR
6085 || code
== LROTATE_EXPR
6086 || code
== RROTATE_EXPR
)
6089 /* Comparisons are handled in vectorizable_comparison. */
6090 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
6093 /* Conditions are handled in vectorizable_condition. */
6094 if (code
== COND_EXPR
)
6097 /* For pointer addition and subtraction, we should use the normal
6098 plus and minus for the vector operation. */
6099 if (code
== POINTER_PLUS_EXPR
)
6101 if (code
== POINTER_DIFF_EXPR
)
6104 /* Support only unary or binary operations. */
6105 op_type
= TREE_CODE_LENGTH (code
);
6106 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
6108 if (dump_enabled_p ())
6109 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6110 "num. args = %d (not unary/binary/ternary op).\n",
6115 scalar_dest
= gimple_assign_lhs (stmt
);
6116 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6118 /* Most operations cannot handle bit-precision types without extra
6120 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
6122 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
6123 /* Exception are bitwise binary operations. */
6124 && code
!= BIT_IOR_EXPR
6125 && code
!= BIT_XOR_EXPR
6126 && code
!= BIT_AND_EXPR
)
6128 if (dump_enabled_p ())
6129 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6130 "bit-precision arithmetic not supported.\n");
6135 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6136 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6138 if (dump_enabled_p ())
6139 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6140 "use not simple.\n");
6143 /* If op0 is an external or constant def, infer the vector type
6144 from the scalar type. */
6147 /* For boolean type we cannot determine vectype by
6148 invariant value (don't know whether it is a vector
6149 of booleans or vector of integers). We use output
6150 vectype because operations on boolean don't change
6152 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6154 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6156 if (dump_enabled_p ())
6157 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6158 "not supported operation on bool value.\n");
6161 vectype
= vectype_out
;
6164 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6168 gcc_assert (vectype
);
6171 if (dump_enabled_p ())
6172 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6173 "no vectype for scalar type %T\n",
6179 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6180 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6181 if (maybe_ne (nunits_out
, nunits_in
))
6184 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6185 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6186 if (op_type
== binary_op
|| op_type
== ternary_op
)
6188 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6189 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6191 if (dump_enabled_p ())
6192 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6193 "use not simple.\n");
6197 && maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype2
)))
6200 if (op_type
== ternary_op
)
6202 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6203 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6205 if (dump_enabled_p ())
6206 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6207 "use not simple.\n");
6211 && maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype3
)))
6215 /* Multiple types in SLP are handled by creating the appropriate number of
6216 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6221 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6225 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6229 gcc_assert (ncopies
>= 1);
6231 /* Reject attempts to combine mask types with nonmask types, e.g. if
6232 we have an AND between a (nonmask) boolean loaded from memory and
6233 a (mask) boolean result of a comparison.
6235 TODO: We could easily fix these cases up using pattern statements. */
6236 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6237 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6238 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6240 if (dump_enabled_p ())
6241 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6242 "mixed mask and nonmask vector types\n");
6246 /* Supportable by target? */
6248 vec_mode
= TYPE_MODE (vectype
);
6249 if (code
== MULT_HIGHPART_EXPR
)
6250 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6253 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6256 if (dump_enabled_p ())
6257 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6261 target_support_p
= (optab_handler (optab
, vec_mode
)
6262 != CODE_FOR_nothing
);
6265 bool using_emulated_vectors_p
= vect_emulated_vector_p (vectype
);
6266 if (!target_support_p
)
6268 if (dump_enabled_p ())
6269 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6270 "op not supported by target.\n");
6271 /* Check only during analysis. */
6272 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6273 || (!vec_stmt
&& !vect_can_vectorize_without_simd_p (code
)))
6275 if (dump_enabled_p ())
6276 dump_printf_loc (MSG_NOTE
, vect_location
,
6277 "proceeding using word mode.\n");
6278 using_emulated_vectors_p
= true;
6281 if (using_emulated_vectors_p
6282 && !vect_can_vectorize_without_simd_p (code
))
6284 if (dump_enabled_p ())
6285 dump_printf (MSG_NOTE
, "using word mode not possible.\n");
6289 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6290 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6291 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6293 if (!vec_stmt
) /* transformation not required. */
6295 /* If this operation is part of a reduction, a fully-masked loop
6296 should only change the active lanes of the reduction chain,
6297 keeping the inactive lanes as-is. */
6299 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
6302 if (cond_fn
== IFN_LAST
6303 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
6304 OPTIMIZE_FOR_SPEED
))
6306 if (dump_enabled_p ())
6307 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6308 "can't use a fully-masked loop because no"
6309 " conditional operation is available.\n");
6310 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
6313 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6317 /* Put types on constant and invariant SLP children. */
6319 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6320 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6321 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6323 if (dump_enabled_p ())
6324 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6325 "incompatible vector types for invariants\n");
6329 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6330 DUMP_VECT_SCOPE ("vectorizable_operation");
6331 vect_model_simple_cost (vinfo
, stmt_info
,
6332 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6333 if (using_emulated_vectors_p
)
6335 /* The above vect_model_simple_cost call handles constants
6336 in the prologue and (mis-)costs one of the stmts as
6337 vector stmt. See tree-vect-generic.cc:do_plus_minus/do_negate
6338 for the actual lowering that will be applied. */
6340 = slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
;
6354 record_stmt_cost (cost_vec
, n
, scalar_stmt
, stmt_info
, 0, vect_body
);
6361 if (dump_enabled_p ())
6362 dump_printf_loc (MSG_NOTE
, vect_location
,
6363 "transform binary/unary operation.\n");
6365 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6367 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6368 vectors with unsigned elements, but the result is signed. So, we
6369 need to compute the MINUS_EXPR into vectype temporary and
6370 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6371 tree vec_cvt_dest
= NULL_TREE
;
6372 if (orig_code
== POINTER_DIFF_EXPR
)
6374 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6375 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6379 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6381 /* In case the vectorization factor (VF) is bigger than the number
6382 of elements that we can fit in a vectype (nunits), we have to generate
6383 more than one vector stmt - i.e - we need to "unroll" the
6384 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6385 from one copy of the vector stmt to the next, in the field
6386 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6387 stages to find the correct vector defs to be used when vectorizing
6388 stmts that use the defs of the current stmt. The example below
6389 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6390 we need to create 4 vectorized stmts):
6392 before vectorization:
6393 RELATED_STMT VEC_STMT
6397 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6399 RELATED_STMT VEC_STMT
6400 VS1_0: vx0 = memref0 VS1_1 -
6401 VS1_1: vx1 = memref1 VS1_2 -
6402 VS1_2: vx2 = memref2 VS1_3 -
6403 VS1_3: vx3 = memref3 - -
6404 S1: x = load - VS1_0
6407 step2: vectorize stmt S2 (done here):
6408 To vectorize stmt S2 we first need to find the relevant vector
6409 def for the first operand 'x'. This is, as usual, obtained from
6410 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6411 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6412 relevant vector def 'vx0'. Having found 'vx0' we can generate
6413 the vector stmt VS2_0, and as usual, record it in the
6414 STMT_VINFO_VEC_STMT of stmt S2.
6415 When creating the second copy (VS2_1), we obtain the relevant vector
6416 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6417 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6418 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6419 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6420 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6421 chain of stmts and pointers:
6422 RELATED_STMT VEC_STMT
6423 VS1_0: vx0 = memref0 VS1_1 -
6424 VS1_1: vx1 = memref1 VS1_2 -
6425 VS1_2: vx2 = memref2 VS1_3 -
6426 VS1_3: vx3 = memref3 - -
6427 S1: x = load - VS1_0
6428 VS2_0: vz0 = vx0 + v1 VS2_1 -
6429 VS2_1: vz1 = vx1 + v1 VS2_2 -
6430 VS2_2: vz2 = vx2 + v1 VS2_3 -
6431 VS2_3: vz3 = vx3 + v1 - -
6432 S2: z = x + 1 - VS2_0 */
6434 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6435 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
6436 /* Arguments are ready. Create the new vector stmt. */
6437 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6439 gimple
*new_stmt
= NULL
;
6440 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6441 ? vec_oprnds1
[i
] : NULL_TREE
);
6442 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
6443 if (masked_loop_p
&& reduc_idx
>= 0)
6445 /* Perform the operation on active elements only and take
6446 inactive elements from the reduction chain input. */
6448 vop2
= reduc_idx
== 1 ? vop1
: vop0
;
6449 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6451 gcall
*call
= gimple_build_call_internal (cond_fn
, 4, mask
,
6453 new_temp
= make_ssa_name (vec_dest
, call
);
6454 gimple_call_set_lhs (call
, new_temp
);
6455 gimple_call_set_nothrow (call
, true);
6456 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
6461 tree mask
= NULL_TREE
;
6462 /* When combining two masks check if either of them is elsewhere
6463 combined with a loop mask, if that's the case we can mark that the
6464 new combined mask doesn't need to be combined with a loop mask. */
6466 && code
== BIT_AND_EXPR
6467 && VECTOR_BOOLEAN_TYPE_P (vectype
))
6469 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op0
,
6472 mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6475 vop0
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
6479 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op1
,
6482 mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6485 vop1
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
6490 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
6491 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6492 gimple_assign_set_lhs (new_stmt
, new_temp
);
6493 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6494 if (using_emulated_vectors_p
)
6495 suppress_warning (new_stmt
, OPT_Wvector_operation_performance
);
6497 /* Enter the combined value into the vector cond hash so we don't
6498 AND it with a loop mask again. */
6500 loop_vinfo
->vec_cond_masked_set
.add ({ new_temp
, mask
});
6504 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6505 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6507 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6508 gimple_assign_set_lhs (new_stmt
, new_temp
);
6509 vect_finish_stmt_generation (vinfo
, stmt_info
,
6514 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6516 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6520 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6522 vec_oprnds0
.release ();
6523 vec_oprnds1
.release ();
6524 vec_oprnds2
.release ();
6529 /* A helper function to ensure data reference DR_INFO's base alignment. */
6532 ensure_base_align (dr_vec_info
*dr_info
)
6534 /* Alignment is only analyzed for the first element of a DR group,
6535 use that to look at base alignment we need to enforce. */
6536 if (STMT_VINFO_GROUPED_ACCESS (dr_info
->stmt
))
6537 dr_info
= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info
->stmt
));
6539 gcc_assert (dr_info
->misalignment
!= DR_MISALIGNMENT_UNINITIALIZED
);
6541 if (dr_info
->base_misaligned
)
6543 tree base_decl
= dr_info
->base_decl
;
6545 // We should only be able to increase the alignment of a base object if
6546 // we know what its new alignment should be at compile time.
6547 unsigned HOST_WIDE_INT align_base_to
=
6548 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6550 if (decl_in_symtab_p (base_decl
))
6551 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6552 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6554 SET_DECL_ALIGN (base_decl
, align_base_to
);
6555 DECL_USER_ALIGN (base_decl
) = 1;
6557 dr_info
->base_misaligned
= false;
6562 /* Function get_group_alias_ptr_type.
6564 Return the alias type for the group starting at FIRST_STMT_INFO. */
6567 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6569 struct data_reference
*first_dr
, *next_dr
;
6571 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6572 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6573 while (next_stmt_info
)
6575 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6576 if (get_alias_set (DR_REF (first_dr
))
6577 != get_alias_set (DR_REF (next_dr
)))
6579 if (dump_enabled_p ())
6580 dump_printf_loc (MSG_NOTE
, vect_location
,
6581 "conflicting alias set types.\n");
6582 return ptr_type_node
;
6584 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6586 return reference_alias_ptr_type (DR_REF (first_dr
));
6590 /* Function scan_operand_equal_p.
6592 Helper function for check_scan_store. Compare two references
6593 with .GOMP_SIMD_LANE bases. */
6596 scan_operand_equal_p (tree ref1
, tree ref2
)
6598 tree ref
[2] = { ref1
, ref2
};
6599 poly_int64 bitsize
[2], bitpos
[2];
6600 tree offset
[2], base
[2];
6601 for (int i
= 0; i
< 2; ++i
)
6604 int unsignedp
, reversep
, volatilep
= 0;
6605 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6606 &offset
[i
], &mode
, &unsignedp
,
6607 &reversep
, &volatilep
);
6608 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6610 if (TREE_CODE (base
[i
]) == MEM_REF
6611 && offset
[i
] == NULL_TREE
6612 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6614 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6615 if (is_gimple_assign (def_stmt
)
6616 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6617 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6618 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6620 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6622 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6623 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6628 if (!operand_equal_p (base
[0], base
[1], 0))
6630 if (maybe_ne (bitsize
[0], bitsize
[1]))
6632 if (offset
[0] != offset
[1])
6634 if (!offset
[0] || !offset
[1])
6636 if (!operand_equal_p (offset
[0], offset
[1], 0))
6639 for (int i
= 0; i
< 2; ++i
)
6641 step
[i
] = integer_one_node
;
6642 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6644 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6645 if (is_gimple_assign (def_stmt
)
6646 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6647 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6650 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6651 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6654 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6656 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6657 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6659 tree rhs1
= NULL_TREE
;
6660 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6662 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6663 if (gimple_assign_cast_p (def_stmt
))
6664 rhs1
= gimple_assign_rhs1 (def_stmt
);
6666 else if (CONVERT_EXPR_P (offset
[i
]))
6667 rhs1
= TREE_OPERAND (offset
[i
], 0);
6669 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6670 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6671 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6672 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6675 if (!operand_equal_p (offset
[0], offset
[1], 0)
6676 || !operand_equal_p (step
[0], step
[1], 0))
6684 enum scan_store_kind
{
6685 /* Normal permutation. */
6686 scan_store_kind_perm
,
6688 /* Whole vector left shift permutation with zero init. */
6689 scan_store_kind_lshift_zero
,
6691 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6692 scan_store_kind_lshift_cond
6695 /* Function check_scan_store.
6697 Verify if we can perform the needed permutations or whole vector shifts.
6698 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6699 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6700 to do at each step. */
6703 scan_store_can_perm_p (tree vectype
, tree init
,
6704 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6706 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6707 unsigned HOST_WIDE_INT nunits
;
6708 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6710 int units_log2
= exact_log2 (nunits
);
6711 if (units_log2
<= 0)
6715 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6716 for (i
= 0; i
<= units_log2
; ++i
)
6718 unsigned HOST_WIDE_INT j
, k
;
6719 enum scan_store_kind kind
= scan_store_kind_perm
;
6720 vec_perm_builder
sel (nunits
, nunits
, 1);
6721 sel
.quick_grow (nunits
);
6722 if (i
== units_log2
)
6724 for (j
= 0; j
< nunits
; ++j
)
6725 sel
[j
] = nunits
- 1;
6729 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6731 for (k
= 0; j
< nunits
; ++j
, ++k
)
6732 sel
[j
] = nunits
+ k
;
6734 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6735 if (!can_vec_perm_const_p (vec_mode
, vec_mode
, indices
))
6737 if (i
== units_log2
)
6740 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6742 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6744 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6745 /* Whole vector shifts shift in zeros, so if init is all zero
6746 constant, there is no need to do anything further. */
6747 if ((TREE_CODE (init
) != INTEGER_CST
6748 && TREE_CODE (init
) != REAL_CST
)
6749 || !initializer_zerop (init
))
6751 tree masktype
= truth_type_for (vectype
);
6752 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6754 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6757 kind
= whole_vector_shift_kind
;
6759 if (use_whole_vector
)
6761 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6762 use_whole_vector
->safe_grow_cleared (i
, true);
6763 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6764 use_whole_vector
->safe_push (kind
);
6772 /* Function check_scan_store.
6774 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6777 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
6778 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6779 vect_memory_access_type memory_access_type
)
6781 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6782 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6785 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6788 || memory_access_type
!= VMAT_CONTIGUOUS
6789 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6790 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6791 || loop_vinfo
== NULL
6792 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6793 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6794 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
6795 || !integer_zerop (DR_INIT (dr_info
->dr
))
6796 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6797 || !alias_sets_conflict_p (get_alias_set (vectype
),
6798 get_alias_set (TREE_TYPE (ref_type
))))
6800 if (dump_enabled_p ())
6801 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6802 "unsupported OpenMP scan store.\n");
6806 /* We need to pattern match code built by OpenMP lowering and simplified
6807 by following optimizations into something we can handle.
6808 #pragma omp simd reduction(inscan,+:r)
6812 #pragma omp scan inclusive (r)
6815 shall have body with:
6816 // Initialization for input phase, store the reduction initializer:
6817 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6818 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6820 // Actual input phase:
6822 r.0_5 = D.2042[_20];
6825 // Initialization for scan phase:
6826 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6832 // Actual scan phase:
6834 r.1_8 = D.2042[_20];
6836 The "omp simd array" variable D.2042 holds the privatized copy used
6837 inside of the loop and D.2043 is another one that holds copies of
6838 the current original list item. The separate GOMP_SIMD_LANE ifn
6839 kinds are there in order to allow optimizing the initializer store
6840 and combiner sequence, e.g. if it is originally some C++ish user
6841 defined reduction, but allow the vectorizer to pattern recognize it
6842 and turn into the appropriate vectorized scan.
6844 For exclusive scan, this is slightly different:
6845 #pragma omp simd reduction(inscan,+:r)
6849 #pragma omp scan exclusive (r)
6852 shall have body with:
6853 // Initialization for input phase, store the reduction initializer:
6854 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6855 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6857 // Actual input phase:
6859 r.0_5 = D.2042[_20];
6862 // Initialization for scan phase:
6863 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6869 // Actual scan phase:
6871 r.1_8 = D.2044[_20];
6874 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6876 /* Match the D.2042[_21] = 0; store above. Just require that
6877 it is a constant or external definition store. */
6878 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6881 if (dump_enabled_p ())
6882 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6883 "unsupported OpenMP scan initializer store.\n");
6887 if (! loop_vinfo
->scan_map
)
6888 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6889 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6890 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6893 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6895 /* These stores can be vectorized normally. */
6899 if (rhs_dt
!= vect_internal_def
)
6902 if (dump_enabled_p ())
6903 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6904 "unsupported OpenMP scan combiner pattern.\n");
6908 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6909 tree rhs
= gimple_assign_rhs1 (stmt
);
6910 if (TREE_CODE (rhs
) != SSA_NAME
)
6913 gimple
*other_store_stmt
= NULL
;
6914 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6915 bool inscan_var_store
6916 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6918 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6920 if (!inscan_var_store
)
6922 use_operand_p use_p
;
6923 imm_use_iterator iter
;
6924 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6926 gimple
*use_stmt
= USE_STMT (use_p
);
6927 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6929 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6930 || !is_gimple_assign (use_stmt
)
6931 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6933 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6935 other_store_stmt
= use_stmt
;
6937 if (other_store_stmt
== NULL
)
6939 rhs
= gimple_assign_lhs (other_store_stmt
);
6940 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6944 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6946 use_operand_p use_p
;
6947 imm_use_iterator iter
;
6948 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6950 gimple
*use_stmt
= USE_STMT (use_p
);
6951 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6953 if (other_store_stmt
)
6955 other_store_stmt
= use_stmt
;
6961 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6962 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6963 || !is_gimple_assign (def_stmt
)
6964 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6967 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6968 /* For pointer addition, we should use the normal plus for the vector
6972 case POINTER_PLUS_EXPR
:
6975 case MULT_HIGHPART_EXPR
:
6980 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6983 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6984 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6985 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6988 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6989 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6990 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6991 || !gimple_assign_load_p (load1_stmt
)
6992 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6993 || !gimple_assign_load_p (load2_stmt
))
6996 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6997 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6998 if (load1_stmt_info
== NULL
6999 || load2_stmt_info
== NULL
7000 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
7001 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
7002 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
7003 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7006 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
7008 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7009 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
7010 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
7012 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7014 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7018 use_operand_p use_p
;
7019 imm_use_iterator iter
;
7020 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
7022 gimple
*use_stmt
= USE_STMT (use_p
);
7023 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
7025 if (other_store_stmt
)
7027 other_store_stmt
= use_stmt
;
7031 if (other_store_stmt
== NULL
)
7033 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
7034 || !gimple_store_p (other_store_stmt
))
7037 stmt_vec_info other_store_stmt_info
7038 = loop_vinfo
->lookup_stmt (other_store_stmt
);
7039 if (other_store_stmt_info
== NULL
7040 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
7041 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7044 gimple
*stmt1
= stmt
;
7045 gimple
*stmt2
= other_store_stmt
;
7046 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7047 std::swap (stmt1
, stmt2
);
7048 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7049 gimple_assign_rhs1 (load2_stmt
)))
7051 std::swap (rhs1
, rhs2
);
7052 std::swap (load1_stmt
, load2_stmt
);
7053 std::swap (load1_stmt_info
, load2_stmt_info
);
7055 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7056 gimple_assign_rhs1 (load1_stmt
)))
7059 tree var3
= NULL_TREE
;
7060 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
7061 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
7062 gimple_assign_rhs1 (load2_stmt
)))
7064 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7066 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7067 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
7068 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
7070 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7071 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
7072 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
7073 || lookup_attribute ("omp simd inscan exclusive",
7074 DECL_ATTRIBUTES (var3
)))
7078 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
7079 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
7080 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
7083 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7084 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
7085 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
7086 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
7087 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7088 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
7091 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7092 std::swap (var1
, var2
);
7094 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7096 if (!lookup_attribute ("omp simd inscan exclusive",
7097 DECL_ATTRIBUTES (var1
)))
7102 if (loop_vinfo
->scan_map
== NULL
)
7104 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7108 /* The IL is as expected, now check if we can actually vectorize it.
7115 should be vectorized as (where _40 is the vectorized rhs
7116 from the D.2042[_21] = 0; store):
7117 _30 = MEM <vector(8) int> [(int *)&D.2043];
7118 _31 = MEM <vector(8) int> [(int *)&D.2042];
7119 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7121 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7122 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7124 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7125 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7126 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7128 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7129 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7131 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7132 MEM <vector(8) int> [(int *)&D.2043] = _39;
7133 MEM <vector(8) int> [(int *)&D.2042] = _38;
7140 should be vectorized as (where _40 is the vectorized rhs
7141 from the D.2042[_21] = 0; store):
7142 _30 = MEM <vector(8) int> [(int *)&D.2043];
7143 _31 = MEM <vector(8) int> [(int *)&D.2042];
7144 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7145 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7147 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7148 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7149 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7151 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7152 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7153 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7155 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7156 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7159 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7160 MEM <vector(8) int> [(int *)&D.2044] = _39;
7161 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7162 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7163 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7164 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7167 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7168 if (units_log2
== -1)
7175 /* Function vectorizable_scan_store.
7177 Helper of vectorizable_score, arguments like on vectorizable_store.
7178 Handle only the transformation, checking is done in check_scan_store. */
7181 vectorizable_scan_store (vec_info
*vinfo
,
7182 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7183 gimple
**vec_stmt
, int ncopies
)
7185 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7186 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7187 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7188 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7190 if (dump_enabled_p ())
7191 dump_printf_loc (MSG_NOTE
, vect_location
,
7192 "transform scan store. ncopies = %d\n", ncopies
);
7194 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7195 tree rhs
= gimple_assign_rhs1 (stmt
);
7196 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7198 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7199 bool inscan_var_store
7200 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7202 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7204 use_operand_p use_p
;
7205 imm_use_iterator iter
;
7206 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7208 gimple
*use_stmt
= USE_STMT (use_p
);
7209 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7211 rhs
= gimple_assign_lhs (use_stmt
);
7216 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7217 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7218 if (code
== POINTER_PLUS_EXPR
)
7220 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7221 && commutative_tree_code (code
));
7222 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7223 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7224 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7225 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7226 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7227 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7228 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7229 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7230 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7231 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7232 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7234 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7236 std::swap (rhs1
, rhs2
);
7237 std::swap (var1
, var2
);
7238 std::swap (load1_dr_info
, load2_dr_info
);
7241 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7244 unsigned HOST_WIDE_INT nunits
;
7245 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7247 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7248 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7249 gcc_assert (units_log2
> 0);
7250 auto_vec
<tree
, 16> perms
;
7251 perms
.quick_grow (units_log2
+ 1);
7252 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7253 for (int i
= 0; i
<= units_log2
; ++i
)
7255 unsigned HOST_WIDE_INT j
, k
;
7256 vec_perm_builder
sel (nunits
, nunits
, 1);
7257 sel
.quick_grow (nunits
);
7258 if (i
== units_log2
)
7259 for (j
= 0; j
< nunits
; ++j
)
7260 sel
[j
] = nunits
- 1;
7263 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7265 for (k
= 0; j
< nunits
; ++j
, ++k
)
7266 sel
[j
] = nunits
+ k
;
7268 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7269 if (!use_whole_vector
.is_empty ()
7270 && use_whole_vector
[i
] != scan_store_kind_perm
)
7272 if (zero_vec
== NULL_TREE
)
7273 zero_vec
= build_zero_cst (vectype
);
7274 if (masktype
== NULL_TREE
7275 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7276 masktype
= truth_type_for (vectype
);
7277 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7280 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7283 tree vec_oprnd1
= NULL_TREE
;
7284 tree vec_oprnd2
= NULL_TREE
;
7285 tree vec_oprnd3
= NULL_TREE
;
7286 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7287 tree dataref_offset
= build_int_cst (ref_type
, 0);
7288 tree bump
= vect_get_data_ptr_increment (vinfo
, dr_info
,
7289 vectype
, VMAT_CONTIGUOUS
);
7290 tree ldataref_ptr
= NULL_TREE
;
7291 tree orig
= NULL_TREE
;
7292 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7293 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7294 auto_vec
<tree
> vec_oprnds1
;
7295 auto_vec
<tree
> vec_oprnds2
;
7296 auto_vec
<tree
> vec_oprnds3
;
7297 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
7298 *init
, &vec_oprnds1
,
7299 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
7300 rhs2
, &vec_oprnds3
);
7301 for (int j
= 0; j
< ncopies
; j
++)
7303 vec_oprnd1
= vec_oprnds1
[j
];
7304 if (ldataref_ptr
== NULL
)
7305 vec_oprnd2
= vec_oprnds2
[j
];
7306 vec_oprnd3
= vec_oprnds3
[j
];
7309 else if (!inscan_var_store
)
7310 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7314 vec_oprnd2
= make_ssa_name (vectype
);
7315 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7316 unshare_expr (ldataref_ptr
),
7318 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7319 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7320 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7321 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7322 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7325 tree v
= vec_oprnd2
;
7326 for (int i
= 0; i
< units_log2
; ++i
)
7328 tree new_temp
= make_ssa_name (vectype
);
7329 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7331 && (use_whole_vector
[i
]
7332 != scan_store_kind_perm
))
7333 ? zero_vec
: vec_oprnd1
, v
,
7335 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7336 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7337 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7339 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7341 /* Whole vector shift shifted in zero bits, but if *init
7342 is not initializer_zerop, we need to replace those elements
7343 with elements from vec_oprnd1. */
7344 tree_vector_builder
vb (masktype
, nunits
, 1);
7345 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7346 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7347 ? boolean_false_node
: boolean_true_node
);
7349 tree new_temp2
= make_ssa_name (vectype
);
7350 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7351 new_temp
, vec_oprnd1
);
7352 vect_finish_stmt_generation (vinfo
, stmt_info
,
7354 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7355 new_temp
= new_temp2
;
7358 /* For exclusive scan, perform the perms[i] permutation once
7361 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7369 tree new_temp2
= make_ssa_name (vectype
);
7370 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7371 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7372 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7377 tree new_temp
= make_ssa_name (vectype
);
7378 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7379 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7380 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7382 tree last_perm_arg
= new_temp
;
7383 /* For exclusive scan, new_temp computed above is the exclusive scan
7384 prefix sum. Turn it into inclusive prefix sum for the broadcast
7385 of the last element into orig. */
7386 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7388 last_perm_arg
= make_ssa_name (vectype
);
7389 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7390 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7391 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7394 orig
= make_ssa_name (vectype
);
7395 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7396 last_perm_arg
, perms
[units_log2
]);
7397 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7398 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7400 if (!inscan_var_store
)
7402 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7403 unshare_expr (dataref_ptr
),
7405 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7406 g
= gimple_build_assign (data_ref
, new_temp
);
7407 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7408 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7412 if (inscan_var_store
)
7413 for (int j
= 0; j
< ncopies
; j
++)
7416 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7418 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7419 unshare_expr (dataref_ptr
),
7421 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7422 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7423 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7424 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7430 /* Function vectorizable_store.
7432 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7433 that can be vectorized.
7434 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7435 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7436 Return true if STMT_INFO is vectorizable in this way. */
7439 vectorizable_store (vec_info
*vinfo
,
7440 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7441 gimple
**vec_stmt
, slp_tree slp_node
,
7442 stmt_vector_for_cost
*cost_vec
)
7446 tree vec_oprnd
= NULL_TREE
;
7448 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7449 class loop
*loop
= NULL
;
7450 machine_mode vec_mode
;
7452 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7453 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7454 tree dataref_ptr
= NULL_TREE
;
7455 tree dataref_offset
= NULL_TREE
;
7456 gimple
*ptr_incr
= NULL
;
7459 stmt_vec_info first_stmt_info
;
7461 unsigned int group_size
, i
;
7462 vec
<tree
> oprnds
= vNULL
;
7463 vec
<tree
> result_chain
= vNULL
;
7464 vec
<tree
> vec_oprnds
= vNULL
;
7465 bool slp
= (slp_node
!= NULL
);
7466 unsigned int vec_num
;
7467 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
7469 gather_scatter_info gs_info
;
7471 vec_load_store_type vls_type
;
7474 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7477 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7481 /* Is vectorizable store? */
7483 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7484 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7486 tree scalar_dest
= gimple_assign_lhs (assign
);
7487 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7488 && is_pattern_stmt_p (stmt_info
))
7489 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7490 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7491 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7492 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7493 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7494 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7495 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7496 && TREE_CODE (scalar_dest
) != MEM_REF
)
7501 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7502 if (!call
|| !gimple_call_internal_p (call
))
7505 internal_fn ifn
= gimple_call_internal_fn (call
);
7506 if (!internal_store_fn_p (ifn
))
7509 if (slp_node
!= NULL
)
7511 if (dump_enabled_p ())
7512 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7513 "SLP of masked stores not supported.\n");
7517 int mask_index
= internal_fn_mask_index (ifn
);
7519 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
7520 &mask
, NULL
, &mask_dt
, &mask_vectype
))
7524 op
= vect_get_store_rhs (stmt_info
);
7526 /* Cannot have hybrid store SLP -- that would mean storing to the
7527 same location twice. */
7528 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7530 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7531 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7535 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7536 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7541 /* Multiple types in SLP are handled by creating the appropriate number of
7542 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7547 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7549 gcc_assert (ncopies
>= 1);
7551 /* FORNOW. This restriction should be relaxed. */
7552 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7554 if (dump_enabled_p ())
7555 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7556 "multiple types in nested loop.\n");
7560 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
7561 op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7564 elem_type
= TREE_TYPE (vectype
);
7565 vec_mode
= TYPE_MODE (vectype
);
7567 if (!STMT_VINFO_DATA_REF (stmt_info
))
7570 vect_memory_access_type memory_access_type
;
7571 enum dr_alignment_support alignment_support_scheme
;
7574 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, vls_type
,
7575 ncopies
, &memory_access_type
, &poffset
,
7576 &alignment_support_scheme
, &misalignment
, &gs_info
))
7581 if (memory_access_type
== VMAT_CONTIGUOUS
)
7583 if (!VECTOR_MODE_P (vec_mode
)
7584 || !can_vec_mask_load_store_p (vec_mode
,
7585 TYPE_MODE (mask_vectype
), false))
7588 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7589 && (memory_access_type
!= VMAT_GATHER_SCATTER
7590 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7592 if (dump_enabled_p ())
7593 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7594 "unsupported access type for masked store.\n");
7600 /* FORNOW. In some cases can vectorize even if data-type not supported
7601 (e.g. - array initialization with 0). */
7602 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7606 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7607 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7608 && memory_access_type
!= VMAT_GATHER_SCATTER
7609 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7612 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7613 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7614 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7618 first_stmt_info
= stmt_info
;
7619 first_dr_info
= dr_info
;
7620 group_size
= vec_num
= 1;
7623 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7625 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7626 memory_access_type
))
7630 if (!vec_stmt
) /* transformation not required. */
7632 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7635 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
7636 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
7637 vls_type
, group_size
,
7638 memory_access_type
, &gs_info
,
7642 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
7645 if (dump_enabled_p ())
7646 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7647 "incompatible vector types for invariants\n");
7651 if (dump_enabled_p ()
7652 && memory_access_type
!= VMAT_ELEMENTWISE
7653 && memory_access_type
!= VMAT_GATHER_SCATTER
7654 && alignment_support_scheme
!= dr_aligned
)
7655 dump_printf_loc (MSG_NOTE
, vect_location
,
7656 "Vectorizing an unaligned access.\n");
7658 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7659 vect_model_store_cost (vinfo
, stmt_info
, ncopies
,
7660 memory_access_type
, alignment_support_scheme
,
7661 misalignment
, vls_type
, slp_node
, cost_vec
);
7664 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7668 ensure_base_align (dr_info
);
7670 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7672 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7673 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7674 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7675 tree ptr
, var
, scale
, vec_mask
;
7676 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7677 tree mask_halfvectype
= mask_vectype
;
7678 edge pe
= loop_preheader_edge (loop
);
7681 enum { NARROW
, NONE
, WIDEN
} modifier
;
7682 poly_uint64 scatter_off_nunits
7683 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7685 if (known_eq (nunits
, scatter_off_nunits
))
7687 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7691 /* Currently gathers and scatters are only supported for
7692 fixed-length vectors. */
7693 unsigned int count
= scatter_off_nunits
.to_constant ();
7694 vec_perm_builder
sel (count
, count
, 1);
7695 for (i
= 0; i
< (unsigned int) count
; ++i
)
7696 sel
.quick_push (i
| (count
/ 2));
7698 vec_perm_indices
indices (sel
, 1, count
);
7699 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7701 gcc_assert (perm_mask
!= NULL_TREE
);
7703 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7707 /* Currently gathers and scatters are only supported for
7708 fixed-length vectors. */
7709 unsigned int count
= nunits
.to_constant ();
7710 vec_perm_builder
sel (count
, count
, 1);
7711 for (i
= 0; i
< (unsigned int) count
; ++i
)
7712 sel
.quick_push (i
| (count
/ 2));
7714 vec_perm_indices
indices (sel
, 2, count
);
7715 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7716 gcc_assert (perm_mask
!= NULL_TREE
);
7720 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7725 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7726 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7727 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7728 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7729 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7730 scaletype
= TREE_VALUE (arglist
);
7732 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7733 && TREE_CODE (rettype
) == VOID_TYPE
);
7735 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7736 if (!is_gimple_min_invariant (ptr
))
7738 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7739 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7740 gcc_assert (!new_bb
);
7743 if (mask
== NULL_TREE
)
7745 mask_arg
= build_int_cst (masktype
, -1);
7746 mask_arg
= vect_init_vector (vinfo
, stmt_info
,
7747 mask_arg
, masktype
, NULL
);
7750 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7752 auto_vec
<tree
> vec_oprnds0
;
7753 auto_vec
<tree
> vec_oprnds1
;
7754 auto_vec
<tree
> vec_masks
;
7757 tree mask_vectype
= truth_type_for (vectype
);
7758 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7760 ? ncopies
/ 2 : ncopies
,
7761 mask
, &vec_masks
, mask_vectype
);
7763 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7765 ? ncopies
/ 2 : ncopies
,
7766 gs_info
.offset
, &vec_oprnds0
);
7767 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7769 ? ncopies
/ 2 : ncopies
,
7771 for (j
= 0; j
< ncopies
; ++j
)
7773 if (modifier
== WIDEN
)
7776 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
7777 perm_mask
, stmt_info
, gsi
);
7779 op
= vec_oprnd0
= vec_oprnds0
[j
/ 2];
7780 src
= vec_oprnd1
= vec_oprnds1
[j
];
7782 mask_op
= vec_mask
= vec_masks
[j
];
7784 else if (modifier
== NARROW
)
7787 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
7788 perm_mask
, stmt_info
, gsi
);
7790 src
= vec_oprnd1
= vec_oprnds1
[j
/ 2];
7791 op
= vec_oprnd0
= vec_oprnds0
[j
];
7793 mask_op
= vec_mask
= vec_masks
[j
/ 2];
7797 op
= vec_oprnd0
= vec_oprnds0
[j
];
7798 src
= vec_oprnd1
= vec_oprnds1
[j
];
7800 mask_op
= vec_mask
= vec_masks
[j
];
7803 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7805 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7806 TYPE_VECTOR_SUBPARTS (srctype
)));
7807 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7808 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7810 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7811 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7815 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7817 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7818 TYPE_VECTOR_SUBPARTS (idxtype
)));
7819 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7820 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7822 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7823 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7831 if (modifier
== NARROW
)
7833 var
= vect_get_new_ssa_name (mask_halfvectype
,
7836 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7837 : VEC_UNPACK_LO_EXPR
,
7839 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7842 tree optype
= TREE_TYPE (mask_arg
);
7843 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7846 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7847 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7848 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7850 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7851 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7853 if (!useless_type_conversion_p (masktype
, utype
))
7855 gcc_assert (TYPE_PRECISION (utype
)
7856 <= TYPE_PRECISION (masktype
));
7857 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7858 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7859 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7865 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7866 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7868 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
7870 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7873 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7874 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
7876 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7877 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7882 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7884 /* We vectorize all the stmts of the interleaving group when we
7885 reach the last stmt in the group. */
7886 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7887 < DR_GROUP_SIZE (first_stmt_info
)
7896 grouped_store
= false;
7897 /* VEC_NUM is the number of vect stmts to be created for this
7899 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7900 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7901 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7902 == first_stmt_info
);
7903 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7904 op
= vect_get_store_rhs (first_stmt_info
);
7907 /* VEC_NUM is the number of vect stmts to be created for this
7909 vec_num
= group_size
;
7911 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7914 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7916 if (dump_enabled_p ())
7917 dump_printf_loc (MSG_NOTE
, vect_location
,
7918 "transform store. ncopies = %d\n", ncopies
);
7920 if (memory_access_type
== VMAT_ELEMENTWISE
7921 || memory_access_type
== VMAT_STRIDED_SLP
)
7923 gimple_stmt_iterator incr_gsi
;
7929 tree stride_base
, stride_step
, alias_off
;
7933 /* Checked by get_load_store_type. */
7934 unsigned int const_nunits
= nunits
.to_constant ();
7936 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7937 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7939 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
7941 = fold_build_pointer_plus
7942 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7943 size_binop (PLUS_EXPR
,
7944 convert_to_ptrofftype (dr_offset
),
7945 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7946 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7948 /* For a store with loop-invariant (but other than power-of-2)
7949 stride (i.e. not a grouped access) like so:
7951 for (i = 0; i < n; i += stride)
7954 we generate a new induction variable and new stores from
7955 the components of the (vectorized) rhs:
7957 for (j = 0; ; j += VF*stride)
7962 array[j + stride] = tmp2;
7966 unsigned nstores
= const_nunits
;
7968 tree ltype
= elem_type
;
7969 tree lvectype
= vectype
;
7972 if (group_size
< const_nunits
7973 && const_nunits
% group_size
== 0)
7975 nstores
= const_nunits
/ group_size
;
7977 ltype
= build_vector_type (elem_type
, group_size
);
7980 /* First check if vec_extract optab doesn't support extraction
7981 of vector elts directly. */
7982 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7984 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7985 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
7986 group_size
).exists (&vmode
)
7987 || (convert_optab_handler (vec_extract_optab
,
7988 TYPE_MODE (vectype
), vmode
)
7989 == CODE_FOR_nothing
))
7991 /* Try to avoid emitting an extract of vector elements
7992 by performing the extracts using an integer type of the
7993 same size, extracting from a vector of those and then
7994 re-interpreting it as the original vector type if
7997 = group_size
* GET_MODE_BITSIZE (elmode
);
7998 unsigned int lnunits
= const_nunits
/ group_size
;
7999 /* If we can't construct such a vector fall back to
8000 element extracts from the original vector type and
8001 element size stores. */
8002 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
8003 && VECTOR_MODE_P (TYPE_MODE (vectype
))
8004 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
8005 lnunits
).exists (&vmode
)
8006 && (convert_optab_handler (vec_extract_optab
,
8008 != CODE_FOR_nothing
))
8012 ltype
= build_nonstandard_integer_type (lsize
, 1);
8013 lvectype
= build_vector_type (ltype
, nstores
);
8015 /* Else fall back to vector extraction anyway.
8016 Fewer stores are more important than avoiding spilling
8017 of the vector we extract from. Compared to the
8018 construction case in vectorizable_load no store-forwarding
8019 issue exists here for reasonable archs. */
8022 else if (group_size
>= const_nunits
8023 && group_size
% const_nunits
== 0)
8026 lnel
= const_nunits
;
8030 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
8031 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8034 ivstep
= stride_step
;
8035 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
8036 build_int_cst (TREE_TYPE (ivstep
), vf
));
8038 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8040 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8041 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8042 create_iv (stride_base
, ivstep
, NULL
,
8043 loop
, &incr_gsi
, insert_after
,
8045 incr
= gsi_stmt (incr_gsi
);
8047 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8049 alias_off
= build_int_cst (ref_type
, 0);
8050 stmt_vec_info next_stmt_info
= first_stmt_info
;
8051 for (g
= 0; g
< group_size
; g
++)
8053 running_off
= offvar
;
8056 tree size
= TYPE_SIZE_UNIT (ltype
);
8057 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
8059 tree newoff
= copy_ssa_name (running_off
, NULL
);
8060 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8062 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8063 running_off
= newoff
;
8066 op
= vect_get_store_rhs (next_stmt_info
);
8067 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
,
8069 unsigned int group_el
= 0;
8070 unsigned HOST_WIDE_INT
8071 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8072 for (j
= 0; j
< ncopies
; j
++)
8074 vec_oprnd
= vec_oprnds
[j
];
8075 /* Pun the vector to extract from if necessary. */
8076 if (lvectype
!= vectype
)
8078 tree tem
= make_ssa_name (lvectype
);
8080 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
8081 lvectype
, vec_oprnd
));
8082 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
8085 for (i
= 0; i
< nstores
; i
++)
8087 tree newref
, newoff
;
8088 gimple
*incr
, *assign
;
8089 tree size
= TYPE_SIZE (ltype
);
8090 /* Extract the i'th component. */
8091 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
8092 bitsize_int (i
), size
);
8093 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
8096 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
8100 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8102 newref
= build2 (MEM_REF
, ltype
,
8103 running_off
, this_off
);
8104 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
8106 /* And store it to *running_off. */
8107 assign
= gimple_build_assign (newref
, elem
);
8108 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
8112 || group_el
== group_size
)
8114 newoff
= copy_ssa_name (running_off
, NULL
);
8115 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8116 running_off
, stride_step
);
8117 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8119 running_off
= newoff
;
8122 if (g
== group_size
- 1
8125 if (j
== 0 && i
== 0)
8127 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
8131 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8132 vec_oprnds
.release ();
8140 auto_vec
<tree
> dr_chain (group_size
);
8141 oprnds
.create (group_size
);
8143 gcc_assert (alignment_support_scheme
);
8144 vec_loop_masks
*loop_masks
8145 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8146 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8148 vec_loop_lens
*loop_lens
8149 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
8150 ? &LOOP_VINFO_LENS (loop_vinfo
)
8153 /* Shouldn't go with length-based approach if fully masked. */
8154 gcc_assert (!loop_lens
|| !loop_masks
);
8156 /* Targets with store-lane instructions must not require explicit
8157 realignment. vect_supportable_dr_alignment always returns either
8158 dr_aligned or dr_unaligned_supported for masked operations. */
8159 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8162 || alignment_support_scheme
== dr_aligned
8163 || alignment_support_scheme
== dr_unaligned_supported
);
8165 tree offset
= NULL_TREE
;
8166 if (!known_eq (poffset
, 0))
8167 offset
= size_int (poffset
);
8170 tree vec_offset
= NULL_TREE
;
8171 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8173 aggr_type
= NULL_TREE
;
8176 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8178 aggr_type
= elem_type
;
8179 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8180 &bump
, &vec_offset
);
8184 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8185 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8187 aggr_type
= vectype
;
8188 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
8189 memory_access_type
);
8193 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8195 /* In case the vectorization factor (VF) is bigger than the number
8196 of elements that we can fit in a vectype (nunits), we have to generate
8197 more than one vector stmt - i.e - we need to "unroll" the
8198 vector stmt by a factor VF/nunits. */
8200 /* In case of interleaving (non-unit grouped access):
8207 We create vectorized stores starting from base address (the access of the
8208 first stmt in the chain (S2 in the above example), when the last store stmt
8209 of the chain (S4) is reached:
8212 VS2: &base + vec_size*1 = vx0
8213 VS3: &base + vec_size*2 = vx1
8214 VS4: &base + vec_size*3 = vx3
8216 Then permutation statements are generated:
8218 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8219 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8222 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8223 (the order of the data-refs in the output of vect_permute_store_chain
8224 corresponds to the order of scalar stmts in the interleaving chain - see
8225 the documentation of vect_permute_store_chain()).
8227 In case of both multiple types and interleaving, above vector stores and
8228 permutation stmts are created for every copy. The result vector stmts are
8229 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8230 STMT_VINFO_RELATED_STMT for the next copies.
8233 auto_vec
<tree
> vec_masks
;
8234 tree vec_mask
= NULL
;
8235 auto_vec
<tree
> vec_offsets
;
8236 auto_vec
<vec
<tree
> > gvec_oprnds
;
8237 gvec_oprnds
.safe_grow_cleared (group_size
, true);
8238 for (j
= 0; j
< ncopies
; j
++)
8245 /* Get vectorized arguments for SLP_NODE. */
8246 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1,
8248 vec_oprnd
= vec_oprnds
[0];
8252 /* For interleaved stores we collect vectorized defs for all the
8253 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8254 used as an input to vect_permute_store_chain().
8256 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8257 and OPRNDS are of size 1. */
8258 stmt_vec_info next_stmt_info
= first_stmt_info
;
8259 for (i
= 0; i
< group_size
; i
++)
8261 /* Since gaps are not supported for interleaved stores,
8262 DR_GROUP_SIZE is the exact number of stmts in the chain.
8263 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8264 that there is no interleaving, DR_GROUP_SIZE is 1,
8265 and only one iteration of the loop will be executed. */
8266 op
= vect_get_store_rhs (next_stmt_info
);
8267 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
8268 ncopies
, op
, &gvec_oprnds
[i
]);
8269 vec_oprnd
= gvec_oprnds
[i
][0];
8270 dr_chain
.quick_push (gvec_oprnds
[i
][0]);
8271 oprnds
.quick_push (gvec_oprnds
[i
][0]);
8272 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8276 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
8277 mask
, &vec_masks
, mask_vectype
);
8278 vec_mask
= vec_masks
[0];
8282 /* We should have catched mismatched types earlier. */
8283 gcc_assert (useless_type_conversion_p (vectype
,
8284 TREE_TYPE (vec_oprnd
)));
8285 bool simd_lane_access_p
8286 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8287 if (simd_lane_access_p
8289 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8290 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8291 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
8292 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8293 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8294 get_alias_set (TREE_TYPE (ref_type
))))
8296 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8297 dataref_offset
= build_int_cst (ref_type
, 0);
8299 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8301 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
8302 slp_node
, &gs_info
, &dataref_ptr
,
8304 vec_offset
= vec_offsets
[0];
8308 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
8309 simd_lane_access_p
? loop
: NULL
,
8310 offset
, &dummy
, gsi
, &ptr_incr
,
8311 simd_lane_access_p
, bump
);
8315 /* For interleaved stores we created vectorized defs for all the
8316 defs stored in OPRNDS in the previous iteration (previous copy).
8317 DR_CHAIN is then used as an input to vect_permute_store_chain().
8318 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8319 OPRNDS are of size 1. */
8320 for (i
= 0; i
< group_size
; i
++)
8322 vec_oprnd
= gvec_oprnds
[i
][j
];
8323 dr_chain
[i
] = gvec_oprnds
[i
][j
];
8324 oprnds
[i
] = gvec_oprnds
[i
][j
];
8327 vec_mask
= vec_masks
[j
];
8330 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8331 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8332 vec_offset
= vec_offsets
[j
];
8334 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8338 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8342 /* Get an array into which we can store the individual vectors. */
8343 vec_array
= create_vector_array (vectype
, vec_num
);
8345 /* Invalidate the current contents of VEC_ARRAY. This should
8346 become an RTL clobber too, which prevents the vector registers
8347 from being upward-exposed. */
8348 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8350 /* Store the individual vectors into the array. */
8351 for (i
= 0; i
< vec_num
; i
++)
8353 vec_oprnd
= dr_chain
[i
];
8354 write_vector_array (vinfo
, stmt_info
,
8355 gsi
, vec_oprnd
, vec_array
, i
);
8358 tree final_mask
= NULL
;
8360 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8363 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
8364 final_mask
, vec_mask
, gsi
);
8370 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8372 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8373 tree alias_ptr
= build_int_cst (ref_type
, align
);
8374 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8375 dataref_ptr
, alias_ptr
,
8376 final_mask
, vec_array
);
8381 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8382 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8383 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8385 gimple_call_set_lhs (call
, data_ref
);
8387 gimple_call_set_nothrow (call
, true);
8388 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8391 /* Record that VEC_ARRAY is now dead. */
8392 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8400 result_chain
.create (group_size
);
8402 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
8403 gsi
, &result_chain
);
8406 stmt_vec_info next_stmt_info
= first_stmt_info
;
8407 for (i
= 0; i
< vec_num
; i
++)
8410 unsigned HOST_WIDE_INT align
;
8412 tree final_mask
= NULL_TREE
;
8414 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8416 vectype
, vec_num
* j
+ i
);
8418 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
8419 final_mask
, vec_mask
, gsi
);
8421 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8423 tree scale
= size_int (gs_info
.scale
);
8426 call
= gimple_build_call_internal
8427 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8428 scale
, vec_oprnd
, final_mask
);
8430 call
= gimple_build_call_internal
8431 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8433 gimple_call_set_nothrow (call
, true);
8434 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8440 /* Bump the vector pointer. */
8441 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
8442 gsi
, stmt_info
, bump
);
8445 vec_oprnd
= vec_oprnds
[i
];
8446 else if (grouped_store
)
8447 /* For grouped stores vectorized defs are interleaved in
8448 vect_permute_store_chain(). */
8449 vec_oprnd
= result_chain
[i
];
8451 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8452 if (alignment_support_scheme
== dr_aligned
)
8454 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
8456 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
8460 misalign
= misalignment
;
8461 if (dataref_offset
== NULL_TREE
8462 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8463 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8465 align
= least_bit_hwi (misalign
| align
);
8467 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8469 tree perm_mask
= perm_mask_for_reverse (vectype
);
8470 tree perm_dest
= vect_create_destination_var
8471 (vect_get_store_rhs (stmt_info
), vectype
);
8472 tree new_temp
= make_ssa_name (perm_dest
);
8474 /* Generate the permute statement. */
8476 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8477 vec_oprnd
, perm_mask
);
8478 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8480 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8481 vec_oprnd
= new_temp
;
8484 /* Arguments are ready. Create the new vector stmt. */
8487 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8489 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8491 final_mask
, vec_oprnd
);
8492 gimple_call_set_nothrow (call
, true);
8493 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8499 = vect_get_loop_len (loop_vinfo
, loop_lens
,
8500 vec_num
* ncopies
, vec_num
* j
+ i
);
8501 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8502 machine_mode vmode
= TYPE_MODE (vectype
);
8503 opt_machine_mode new_ovmode
8504 = get_len_load_store_mode (vmode
, false);
8505 machine_mode new_vmode
= new_ovmode
.require ();
8506 /* Need conversion if it's wrapped with VnQI. */
8507 if (vmode
!= new_vmode
)
8510 = build_vector_type_for_mode (unsigned_intQI_type_node
,
8513 = vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
8515 = build1 (VIEW_CONVERT_EXPR
, new_vtype
, vec_oprnd
);
8517 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
,
8519 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
8524 signed char biasval
=
8525 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
8527 tree bias
= build_int_cst (intQI_type_node
, biasval
);
8529 = gimple_build_call_internal (IFN_LEN_STORE
, 5, dataref_ptr
,
8530 ptr
, final_len
, vec_oprnd
,
8532 gimple_call_set_nothrow (call
, true);
8533 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8538 data_ref
= fold_build2 (MEM_REF
, vectype
,
8542 : build_int_cst (ref_type
, 0));
8543 if (alignment_support_scheme
== dr_aligned
)
8546 TREE_TYPE (data_ref
)
8547 = build_aligned_type (TREE_TYPE (data_ref
),
8548 align
* BITS_PER_UNIT
);
8549 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8550 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
8551 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8557 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8558 if (!next_stmt_info
)
8565 *vec_stmt
= new_stmt
;
8566 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8570 for (i
= 0; i
< group_size
; ++i
)
8572 vec
<tree
> oprndsi
= gvec_oprnds
[i
];
8576 result_chain
.release ();
8577 vec_oprnds
.release ();
8582 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8583 VECTOR_CST mask. No checks are made that the target platform supports the
8584 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8585 vect_gen_perm_mask_checked. */
8588 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8592 poly_uint64 nunits
= sel
.length ();
8593 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8595 mask_type
= build_vector_type (ssizetype
, nunits
);
8596 return vec_perm_indices_to_tree (mask_type
, sel
);
8599 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8600 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8603 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8605 machine_mode vmode
= TYPE_MODE (vectype
);
8606 gcc_assert (can_vec_perm_const_p (vmode
, vmode
, sel
));
8607 return vect_gen_perm_mask_any (vectype
, sel
);
8610 /* Given a vector variable X and Y, that was generated for the scalar
8611 STMT_INFO, generate instructions to permute the vector elements of X and Y
8612 using permutation mask MASK_VEC, insert them at *GSI and return the
8613 permuted vector variable. */
8616 permute_vec_elements (vec_info
*vinfo
,
8617 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8618 gimple_stmt_iterator
*gsi
)
8620 tree vectype
= TREE_TYPE (x
);
8621 tree perm_dest
, data_ref
;
8624 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8625 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8626 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8628 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8629 data_ref
= make_ssa_name (perm_dest
);
8631 /* Generate the permute statement. */
8632 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8633 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8638 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8639 inserting them on the loops preheader edge. Returns true if we
8640 were successful in doing so (and thus STMT_INFO can be moved then),
8641 otherwise returns false. */
8644 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8650 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8652 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8653 if (!gimple_nop_p (def_stmt
)
8654 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8656 /* Make sure we don't need to recurse. While we could do
8657 so in simple cases when there are more complex use webs
8658 we don't have an easy way to preserve stmt order to fulfil
8659 dependencies within them. */
8662 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8664 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8666 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8667 if (!gimple_nop_p (def_stmt2
)
8668 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8678 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8680 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8681 if (!gimple_nop_p (def_stmt
)
8682 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8684 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8685 gsi_remove (&gsi
, false);
8686 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8693 /* vectorizable_load.
8695 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8696 that can be vectorized.
8697 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8698 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8699 Return true if STMT_INFO is vectorizable in this way. */
8702 vectorizable_load (vec_info
*vinfo
,
8703 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8704 gimple
**vec_stmt
, slp_tree slp_node
,
8705 stmt_vector_for_cost
*cost_vec
)
8708 tree vec_dest
= NULL
;
8709 tree data_ref
= NULL
;
8710 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8711 class loop
*loop
= NULL
;
8712 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8713 bool nested_in_vect_loop
= false;
8718 tree dataref_ptr
= NULL_TREE
;
8719 tree dataref_offset
= NULL_TREE
;
8720 gimple
*ptr_incr
= NULL
;
8723 unsigned int group_size
;
8724 poly_uint64 group_gap_adj
;
8725 tree msq
= NULL_TREE
, lsq
;
8726 tree realignment_token
= NULL_TREE
;
8728 vec
<tree
> dr_chain
= vNULL
;
8729 bool grouped_load
= false;
8730 stmt_vec_info first_stmt_info
;
8731 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8732 bool compute_in_loop
= false;
8733 class loop
*at_loop
;
8735 bool slp
= (slp_node
!= NULL
);
8736 bool slp_perm
= false;
8737 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8740 gather_scatter_info gs_info
;
8742 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8744 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8747 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8751 if (!STMT_VINFO_DATA_REF (stmt_info
))
8754 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8755 int mask_index
= -1;
8756 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8758 scalar_dest
= gimple_assign_lhs (assign
);
8759 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8762 tree_code code
= gimple_assign_rhs_code (assign
);
8763 if (code
!= ARRAY_REF
8764 && code
!= BIT_FIELD_REF
8765 && code
!= INDIRECT_REF
8766 && code
!= COMPONENT_REF
8767 && code
!= IMAGPART_EXPR
8768 && code
!= REALPART_EXPR
8770 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8775 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8776 if (!call
|| !gimple_call_internal_p (call
))
8779 internal_fn ifn
= gimple_call_internal_fn (call
);
8780 if (!internal_load_fn_p (ifn
))
8783 scalar_dest
= gimple_call_lhs (call
);
8787 mask_index
= internal_fn_mask_index (ifn
);
8788 /* ??? For SLP the mask operand is always last. */
8789 if (mask_index
>= 0 && slp_node
)
8790 mask_index
= SLP_TREE_CHILDREN (slp_node
).length () - 1;
8792 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
8793 &mask
, NULL
, &mask_dt
, &mask_vectype
))
8797 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8798 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8802 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8803 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8804 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8809 /* Multiple types in SLP are handled by creating the appropriate number of
8810 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8815 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8817 gcc_assert (ncopies
>= 1);
8819 /* FORNOW. This restriction should be relaxed. */
8820 if (nested_in_vect_loop
&& ncopies
> 1)
8822 if (dump_enabled_p ())
8823 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8824 "multiple types in nested loop.\n");
8828 /* Invalidate assumptions made by dependence analysis when vectorization
8829 on the unrolled body effectively re-orders stmts. */
8831 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8832 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8833 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8835 if (dump_enabled_p ())
8836 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8837 "cannot perform implicit CSE when unrolling "
8838 "with negative dependence distance\n");
8842 elem_type
= TREE_TYPE (vectype
);
8843 mode
= TYPE_MODE (vectype
);
8845 /* FORNOW. In some cases can vectorize even if data-type not supported
8846 (e.g. - data copies). */
8847 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8849 if (dump_enabled_p ())
8850 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8851 "Aligned load, but unsupported type.\n");
8855 /* Check if the load is a part of an interleaving chain. */
8856 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8858 grouped_load
= true;
8860 gcc_assert (!nested_in_vect_loop
);
8861 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8863 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8864 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8866 /* Refuse non-SLP vectorization of SLP-only groups. */
8867 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8869 if (dump_enabled_p ())
8870 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8871 "cannot vectorize load in non-SLP mode.\n");
8875 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8881 /* In BB vectorization we may not actually use a loaded vector
8882 accessing elements in excess of DR_GROUP_SIZE. */
8883 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8884 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
8885 unsigned HOST_WIDE_INT nunits
;
8886 unsigned j
, k
, maxk
= 0;
8887 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
8890 tree vectype
= SLP_TREE_VECTYPE (slp_node
);
8891 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
8892 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
8894 if (dump_enabled_p ())
8895 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8896 "BB vectorization with gaps at the end of "
8897 "a load is not supported\n");
8904 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
8907 if (dump_enabled_p ())
8908 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
8910 "unsupported load permutation\n");
8915 /* Invalidate assumptions made by dependence analysis when vectorization
8916 on the unrolled body effectively re-orders stmts. */
8917 if (!PURE_SLP_STMT (stmt_info
)
8918 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8919 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8920 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8922 if (dump_enabled_p ())
8923 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8924 "cannot perform implicit CSE when performing "
8925 "group loads with negative dependence distance\n");
8932 vect_memory_access_type memory_access_type
;
8933 enum dr_alignment_support alignment_support_scheme
;
8936 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, VLS_LOAD
,
8937 ncopies
, &memory_access_type
, &poffset
,
8938 &alignment_support_scheme
, &misalignment
, &gs_info
))
8943 if (memory_access_type
== VMAT_CONTIGUOUS
)
8945 machine_mode vec_mode
= TYPE_MODE (vectype
);
8946 if (!VECTOR_MODE_P (vec_mode
)
8947 || !can_vec_mask_load_store_p (vec_mode
,
8948 TYPE_MODE (mask_vectype
), true))
8951 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8952 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8954 if (dump_enabled_p ())
8955 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8956 "unsupported access type for masked load.\n");
8959 else if (memory_access_type
== VMAT_GATHER_SCATTER
8960 && gs_info
.ifn
== IFN_LAST
8963 if (dump_enabled_p ())
8964 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8965 "unsupported masked emulated gather.\n");
8970 if (!vec_stmt
) /* transformation not required. */
8974 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
8977 if (dump_enabled_p ())
8978 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8979 "incompatible vector types for invariants\n");
8984 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8987 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
8988 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
8989 VLS_LOAD
, group_size
,
8990 memory_access_type
, &gs_info
,
8993 if (dump_enabled_p ()
8994 && memory_access_type
!= VMAT_ELEMENTWISE
8995 && memory_access_type
!= VMAT_GATHER_SCATTER
8996 && alignment_support_scheme
!= dr_aligned
)
8997 dump_printf_loc (MSG_NOTE
, vect_location
,
8998 "Vectorizing an unaligned access.\n");
9000 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9001 vinfo
->any_known_not_updated_vssa
= true;
9003 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
9004 vect_model_load_cost (vinfo
, stmt_info
, ncopies
, vf
, memory_access_type
,
9005 alignment_support_scheme
, misalignment
,
9006 &gs_info
, slp_node
, cost_vec
);
9011 gcc_assert (memory_access_type
9012 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
9014 if (dump_enabled_p ())
9015 dump_printf_loc (MSG_NOTE
, vect_location
,
9016 "transform load. ncopies = %d\n", ncopies
);
9020 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
9021 ensure_base_align (dr_info
);
9023 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
9025 vect_build_gather_load_calls (vinfo
,
9026 stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
9030 if (memory_access_type
== VMAT_INVARIANT
)
9032 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
9033 /* If we have versioned for aliasing or the loop doesn't
9034 have any data dependencies that would preclude this,
9035 then we are sure this is a loop invariant load and
9036 thus we can insert it on the preheader edge. */
9037 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
9038 && !nested_in_vect_loop
9039 && hoist_defs_of_uses (stmt_info
, loop
));
9042 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
9043 if (dump_enabled_p ())
9044 dump_printf_loc (MSG_NOTE
, vect_location
,
9045 "hoisting out of the vectorized loop: %G",
9047 scalar_dest
= copy_ssa_name (scalar_dest
);
9048 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
9049 edge pe
= loop_preheader_edge (loop
);
9050 gphi
*vphi
= get_virtual_phi (loop
->header
);
9053 vuse
= PHI_ARG_DEF_FROM_EDGE (vphi
, pe
);
9055 vuse
= gimple_vuse (gsi_stmt (*gsi
));
9056 gimple
*new_stmt
= gimple_build_assign (scalar_dest
, rhs
);
9057 gimple_set_vuse (new_stmt
, vuse
);
9058 gsi_insert_on_edge_immediate (pe
, new_stmt
);
9060 /* These copies are all equivalent, but currently the representation
9061 requires a separate STMT_VINFO_VEC_STMT for each one. */
9062 gimple_stmt_iterator gsi2
= *gsi
;
9064 for (j
= 0; j
< ncopies
; j
++)
9067 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
9070 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
9072 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9074 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9078 *vec_stmt
= new_stmt
;
9079 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9085 if (memory_access_type
== VMAT_ELEMENTWISE
9086 || memory_access_type
== VMAT_STRIDED_SLP
)
9088 gimple_stmt_iterator incr_gsi
;
9093 vec
<constructor_elt
, va_gc
> *v
= NULL
;
9094 tree stride_base
, stride_step
, alias_off
;
9095 /* Checked by get_load_store_type. */
9096 unsigned int const_nunits
= nunits
.to_constant ();
9097 unsigned HOST_WIDE_INT cst_offset
= 0;
9100 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
));
9101 gcc_assert (!nested_in_vect_loop
);
9105 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9106 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9110 first_stmt_info
= stmt_info
;
9111 first_dr_info
= dr_info
;
9113 if (slp
&& grouped_load
)
9115 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9116 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9122 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
9123 * vect_get_place_in_interleaving_chain (stmt_info
,
9126 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
9129 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
9131 = fold_build_pointer_plus
9132 (DR_BASE_ADDRESS (first_dr_info
->dr
),
9133 size_binop (PLUS_EXPR
,
9134 convert_to_ptrofftype (dr_offset
),
9135 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
9136 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
9138 /* For a load with loop-invariant (but other than power-of-2)
9139 stride (i.e. not a grouped access) like so:
9141 for (i = 0; i < n; i += stride)
9144 we generate a new induction variable and new accesses to
9145 form a new vector (or vectors, depending on ncopies):
9147 for (j = 0; ; j += VF*stride)
9149 tmp2 = array[j + stride];
9151 vectemp = {tmp1, tmp2, ...}
9154 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
9155 build_int_cst (TREE_TYPE (stride_step
), vf
));
9157 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
9159 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
9160 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
9161 create_iv (stride_base
, ivstep
, NULL
,
9162 loop
, &incr_gsi
, insert_after
,
9165 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
9167 running_off
= offvar
;
9168 alias_off
= build_int_cst (ref_type
, 0);
9169 int nloads
= const_nunits
;
9171 tree ltype
= TREE_TYPE (vectype
);
9172 tree lvectype
= vectype
;
9173 auto_vec
<tree
> dr_chain
;
9174 if (memory_access_type
== VMAT_STRIDED_SLP
)
9176 if (group_size
< const_nunits
)
9178 /* First check if vec_init optab supports construction from vector
9179 elts directly. Otherwise avoid emitting a constructor of
9180 vector elements by performing the loads using an integer type
9181 of the same size, constructing a vector of those and then
9182 re-interpreting it as the original vector type. This avoids a
9183 huge runtime penalty due to the general inability to perform
9184 store forwarding from smaller stores to a larger load. */
9187 = vector_vector_composition_type (vectype
,
9188 const_nunits
/ group_size
,
9190 if (vtype
!= NULL_TREE
)
9192 nloads
= const_nunits
/ group_size
;
9201 lnel
= const_nunits
;
9204 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
9206 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9207 else if (nloads
== 1)
9212 /* For SLP permutation support we need to load the whole group,
9213 not only the number of vector stmts the permutation result
9217 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9219 unsigned int const_vf
= vf
.to_constant ();
9220 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
9221 dr_chain
.create (ncopies
);
9224 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9226 unsigned int group_el
= 0;
9227 unsigned HOST_WIDE_INT
9228 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
9229 for (j
= 0; j
< ncopies
; j
++)
9232 vec_alloc (v
, nloads
);
9233 gimple
*new_stmt
= NULL
;
9234 for (i
= 0; i
< nloads
; i
++)
9236 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
9237 group_el
* elsz
+ cst_offset
);
9238 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
9239 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9240 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
9241 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9243 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9244 gimple_assign_lhs (new_stmt
));
9248 || group_el
== group_size
)
9250 tree newoff
= copy_ssa_name (running_off
);
9251 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
9252 running_off
, stride_step
);
9253 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
9255 running_off
= newoff
;
9261 tree vec_inv
= build_constructor (lvectype
, v
);
9262 new_temp
= vect_init_vector (vinfo
, stmt_info
,
9263 vec_inv
, lvectype
, gsi
);
9264 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9265 if (lvectype
!= vectype
)
9267 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
9269 build1 (VIEW_CONVERT_EXPR
,
9270 vectype
, new_temp
));
9271 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9278 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
9280 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9285 *vec_stmt
= new_stmt
;
9286 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9292 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
9298 if (memory_access_type
== VMAT_GATHER_SCATTER
9299 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
9300 grouped_load
= false;
9304 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9305 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9306 /* For SLP vectorization we directly vectorize a subchain
9307 without permutation. */
9308 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9309 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9310 /* For BB vectorization always use the first stmt to base
9311 the data ref pointer on. */
9313 first_stmt_info_for_drptr
9314 = vect_find_first_scalar_stmt_in_slp (slp_node
);
9316 /* Check if the chain of loads is already vectorized. */
9317 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
9318 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9319 ??? But we can only do so if there is exactly one
9320 as we have no way to get at the rest. Leave the CSE
9322 ??? With the group load eventually participating
9323 in multiple different permutations (having multiple
9324 slp nodes which refer to the same group) the CSE
9325 is even wrong code. See PR56270. */
9328 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9331 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9334 /* VEC_NUM is the number of vect stmts to be created for this group. */
9337 grouped_load
= false;
9338 /* If an SLP permutation is from N elements to N elements,
9339 and if one vector holds a whole number of N, we can load
9340 the inputs to the permutation in the same way as an
9341 unpermuted sequence. In other cases we need to load the
9342 whole group, not only the number of vector stmts the
9343 permutation result fits in. */
9344 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
9346 && (group_size
!= scalar_lanes
9347 || !multiple_p (nunits
, group_size
)))
9349 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9350 variable VF; see vect_transform_slp_perm_load. */
9351 unsigned int const_vf
= vf
.to_constant ();
9352 unsigned int const_nunits
= nunits
.to_constant ();
9353 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
9354 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
9358 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9360 = group_size
- scalar_lanes
;
9364 vec_num
= group_size
;
9366 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9370 first_stmt_info
= stmt_info
;
9371 first_dr_info
= dr_info
;
9372 group_size
= vec_num
= 1;
9374 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
9376 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9379 gcc_assert (alignment_support_scheme
);
9380 vec_loop_masks
*loop_masks
9381 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
9382 ? &LOOP_VINFO_MASKS (loop_vinfo
)
9384 vec_loop_lens
*loop_lens
9385 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
9386 ? &LOOP_VINFO_LENS (loop_vinfo
)
9389 /* Shouldn't go with length-based approach if fully masked. */
9390 gcc_assert (!loop_lens
|| !loop_masks
);
9392 /* Targets with store-lane instructions must not require explicit
9393 realignment. vect_supportable_dr_alignment always returns either
9394 dr_aligned or dr_unaligned_supported for masked operations. */
9395 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
9398 || alignment_support_scheme
== dr_aligned
9399 || alignment_support_scheme
== dr_unaligned_supported
);
9401 /* In case the vectorization factor (VF) is bigger than the number
9402 of elements that we can fit in a vectype (nunits), we have to generate
9403 more than one vector stmt - i.e - we need to "unroll" the
9404 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9405 from one copy of the vector stmt to the next, in the field
9406 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9407 stages to find the correct vector defs to be used when vectorizing
9408 stmts that use the defs of the current stmt. The example below
9409 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9410 need to create 4 vectorized stmts):
9412 before vectorization:
9413 RELATED_STMT VEC_STMT
9417 step 1: vectorize stmt S1:
9418 We first create the vector stmt VS1_0, and, as usual, record a
9419 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9420 Next, we create the vector stmt VS1_1, and record a pointer to
9421 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9422 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9424 RELATED_STMT VEC_STMT
9425 VS1_0: vx0 = memref0 VS1_1 -
9426 VS1_1: vx1 = memref1 VS1_2 -
9427 VS1_2: vx2 = memref2 VS1_3 -
9428 VS1_3: vx3 = memref3 - -
9429 S1: x = load - VS1_0
9433 /* In case of interleaving (non-unit grouped access):
9440 Vectorized loads are created in the order of memory accesses
9441 starting from the access of the first stmt of the chain:
9444 VS2: vx1 = &base + vec_size*1
9445 VS3: vx3 = &base + vec_size*2
9446 VS4: vx4 = &base + vec_size*3
9448 Then permutation statements are generated:
9450 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9451 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9454 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9455 (the order of the data-refs in the output of vect_permute_load_chain
9456 corresponds to the order of scalar stmts in the interleaving chain - see
9457 the documentation of vect_permute_load_chain()).
9458 The generation of permutation stmts and recording them in
9459 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9461 In case of both multiple types and interleaving, the vector loads and
9462 permutation stmts above are created for every copy. The result vector
9463 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9464 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9466 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9467 on a target that supports unaligned accesses (dr_unaligned_supported)
9468 we generate the following code:
9472 p = p + indx * vectype_size;
9477 Otherwise, the data reference is potentially unaligned on a target that
9478 does not support unaligned accesses (dr_explicit_realign_optimized) -
9479 then generate the following code, in which the data in each iteration is
9480 obtained by two vector loads, one from the previous iteration, and one
9481 from the current iteration:
9483 msq_init = *(floor(p1))
9484 p2 = initial_addr + VS - 1;
9485 realignment_token = call target_builtin;
9488 p2 = p2 + indx * vectype_size
9490 vec_dest = realign_load (msq, lsq, realignment_token)
9495 /* If the misalignment remains the same throughout the execution of the
9496 loop, we can create the init_addr and permutation mask at the loop
9497 preheader. Otherwise, it needs to be created inside the loop.
9498 This can only occur when vectorizing memory accesses in the inner-loop
9499 nested within an outer-loop that is being vectorized. */
9501 if (nested_in_vect_loop
9502 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9503 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9505 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9506 compute_in_loop
= true;
9509 bool diff_first_stmt_info
9510 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
9512 tree offset
= NULL_TREE
;
9513 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9514 || alignment_support_scheme
== dr_explicit_realign
)
9515 && !compute_in_loop
)
9517 /* If we have different first_stmt_info, we can't set up realignment
9518 here, since we can't guarantee first_stmt_info DR has been
9519 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9520 distance from first_stmt_info DR instead as below. */
9521 if (!diff_first_stmt_info
)
9522 msq
= vect_setup_realignment (vinfo
,
9523 first_stmt_info
, gsi
, &realignment_token
,
9524 alignment_support_scheme
, NULL_TREE
,
9526 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9528 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9529 offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9531 gcc_assert (!first_stmt_info_for_drptr
);
9537 if (!known_eq (poffset
, 0))
9539 ? size_binop (PLUS_EXPR
, offset
, size_int (poffset
))
9540 : size_int (poffset
));
9543 tree vec_offset
= NULL_TREE
;
9544 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9546 aggr_type
= NULL_TREE
;
9549 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9551 aggr_type
= elem_type
;
9552 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9553 &bump
, &vec_offset
);
9557 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9558 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9560 aggr_type
= vectype
;
9561 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
9562 memory_access_type
);
9565 auto_vec
<tree
> vec_offsets
;
9566 auto_vec
<tree
> vec_masks
;
9570 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[mask_index
],
9573 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
, mask
,
9574 &vec_masks
, mask_vectype
);
9576 tree vec_mask
= NULL_TREE
;
9577 poly_uint64 group_elt
= 0;
9578 for (j
= 0; j
< ncopies
; j
++)
9580 /* 1. Create the vector or array pointer update chain. */
9583 bool simd_lane_access_p
9584 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9585 if (simd_lane_access_p
9586 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9587 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9588 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9589 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9590 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9591 get_alias_set (TREE_TYPE (ref_type
)))
9592 && (alignment_support_scheme
== dr_aligned
9593 || alignment_support_scheme
== dr_unaligned_supported
))
9595 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9596 dataref_offset
= build_int_cst (ref_type
, 0);
9598 else if (diff_first_stmt_info
)
9601 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
9602 aggr_type
, at_loop
, offset
, &dummy
,
9603 gsi
, &ptr_incr
, simd_lane_access_p
,
9605 /* Adjust the pointer by the difference to first_stmt. */
9606 data_reference_p ptrdr
9607 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9609 = fold_convert (sizetype
,
9610 size_binop (MINUS_EXPR
,
9611 DR_INIT (first_dr_info
->dr
),
9613 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9615 if (alignment_support_scheme
== dr_explicit_realign
)
9617 msq
= vect_setup_realignment (vinfo
,
9618 first_stmt_info_for_drptr
, gsi
,
9620 alignment_support_scheme
,
9621 dataref_ptr
, &at_loop
);
9622 gcc_assert (!compute_in_loop
);
9625 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9627 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
9628 slp_node
, &gs_info
, &dataref_ptr
,
9633 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9635 offset
, &dummy
, gsi
, &ptr_incr
,
9636 simd_lane_access_p
, bump
);
9638 vec_mask
= vec_masks
[0];
9643 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9645 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9646 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9649 vec_mask
= vec_masks
[j
];
9652 if (grouped_load
|| slp_perm
)
9653 dr_chain
.create (vec_num
);
9655 gimple
*new_stmt
= NULL
;
9656 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9660 vec_array
= create_vector_array (vectype
, vec_num
);
9662 tree final_mask
= NULL_TREE
;
9664 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9667 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
9668 final_mask
, vec_mask
, gsi
);
9674 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9676 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
9677 tree alias_ptr
= build_int_cst (ref_type
, align
);
9678 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9679 dataref_ptr
, alias_ptr
,
9685 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9686 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9687 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9689 gimple_call_set_lhs (call
, vec_array
);
9690 gimple_call_set_nothrow (call
, true);
9691 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9694 /* Extract each vector into an SSA_NAME. */
9695 for (i
= 0; i
< vec_num
; i
++)
9697 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
9699 dr_chain
.quick_push (new_temp
);
9702 /* Record the mapping between SSA_NAMEs and statements. */
9703 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
9705 /* Record that VEC_ARRAY is now dead. */
9706 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
9710 for (i
= 0; i
< vec_num
; i
++)
9712 tree final_mask
= NULL_TREE
;
9714 && memory_access_type
!= VMAT_INVARIANT
)
9715 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9717 vectype
, vec_num
* j
+ i
);
9719 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
9720 final_mask
, vec_mask
, gsi
);
9722 if (i
> 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9723 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9724 gsi
, stmt_info
, bump
);
9726 /* 2. Create the vector-load in the loop. */
9727 switch (alignment_support_scheme
)
9730 case dr_unaligned_supported
:
9732 unsigned int misalign
;
9733 unsigned HOST_WIDE_INT align
;
9735 if (memory_access_type
== VMAT_GATHER_SCATTER
9736 && gs_info
.ifn
!= IFN_LAST
)
9738 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9739 vec_offset
= vec_offsets
[vec_num
* j
+ i
];
9740 tree zero
= build_zero_cst (vectype
);
9741 tree scale
= size_int (gs_info
.scale
);
9744 call
= gimple_build_call_internal
9745 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
9746 vec_offset
, scale
, zero
, final_mask
);
9748 call
= gimple_build_call_internal
9749 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
9750 vec_offset
, scale
, zero
);
9751 gimple_call_set_nothrow (call
, true);
9753 data_ref
= NULL_TREE
;
9756 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9758 /* Emulated gather-scatter. */
9759 gcc_assert (!final_mask
);
9760 unsigned HOST_WIDE_INT const_nunits
9761 = nunits
.to_constant ();
9762 unsigned HOST_WIDE_INT const_offset_nunits
9763 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
)
9765 vec
<constructor_elt
, va_gc
> *ctor_elts
;
9766 vec_alloc (ctor_elts
, const_nunits
);
9767 gimple_seq stmts
= NULL
;
9768 /* We support offset vectors with more elements
9769 than the data vector for now. */
9770 unsigned HOST_WIDE_INT factor
9771 = const_offset_nunits
/ const_nunits
;
9772 vec_offset
= vec_offsets
[j
/ factor
];
9773 unsigned elt_offset
= (j
% factor
) * const_nunits
;
9774 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
9775 tree scale
= size_int (gs_info
.scale
);
9777 = get_object_alignment (DR_REF (first_dr_info
->dr
));
9778 tree ltype
= build_aligned_type (TREE_TYPE (vectype
),
9780 for (unsigned k
= 0; k
< const_nunits
; ++k
)
9782 tree boff
= size_binop (MULT_EXPR
,
9783 TYPE_SIZE (idx_type
),
9786 tree idx
= gimple_build (&stmts
, BIT_FIELD_REF
,
9787 idx_type
, vec_offset
,
9788 TYPE_SIZE (idx_type
),
9790 idx
= gimple_convert (&stmts
, sizetype
, idx
);
9791 idx
= gimple_build (&stmts
, MULT_EXPR
,
9792 sizetype
, idx
, scale
);
9793 tree ptr
= gimple_build (&stmts
, PLUS_EXPR
,
9794 TREE_TYPE (dataref_ptr
),
9796 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
9797 tree elt
= make_ssa_name (TREE_TYPE (vectype
));
9798 tree ref
= build2 (MEM_REF
, ltype
, ptr
,
9799 build_int_cst (ref_type
, 0));
9800 new_stmt
= gimple_build_assign (elt
, ref
);
9801 gimple_set_vuse (new_stmt
,
9802 gimple_vuse (gsi_stmt (*gsi
)));
9803 gimple_seq_add_stmt (&stmts
, new_stmt
);
9804 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
, elt
);
9806 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
9807 new_stmt
= gimple_build_assign (NULL_TREE
,
9809 (vectype
, ctor_elts
));
9810 data_ref
= NULL_TREE
;
9815 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9816 if (alignment_support_scheme
== dr_aligned
)
9818 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
9820 align
= dr_alignment
9821 (vect_dr_behavior (vinfo
, first_dr_info
));
9825 misalign
= misalignment
;
9826 if (dataref_offset
== NULL_TREE
9827 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9828 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9830 align
= least_bit_hwi (misalign
| align
);
9834 tree ptr
= build_int_cst (ref_type
,
9835 align
* BITS_PER_UNIT
);
9837 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9840 gimple_call_set_nothrow (call
, true);
9842 data_ref
= NULL_TREE
;
9844 else if (loop_lens
&& memory_access_type
!= VMAT_INVARIANT
)
9847 = vect_get_loop_len (loop_vinfo
, loop_lens
,
9850 tree ptr
= build_int_cst (ref_type
,
9851 align
* BITS_PER_UNIT
);
9853 machine_mode vmode
= TYPE_MODE (vectype
);
9854 opt_machine_mode new_ovmode
9855 = get_len_load_store_mode (vmode
, true);
9856 machine_mode new_vmode
= new_ovmode
.require ();
9857 tree qi_type
= unsigned_intQI_type_node
;
9859 signed char biasval
=
9860 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
9862 tree bias
= build_int_cst (intQI_type_node
, biasval
);
9865 = gimple_build_call_internal (IFN_LEN_LOAD
, 4,
9868 gimple_call_set_nothrow (call
, true);
9870 data_ref
= NULL_TREE
;
9872 /* Need conversion if it's wrapped with VnQI. */
9873 if (vmode
!= new_vmode
)
9876 = build_vector_type_for_mode (qi_type
, new_vmode
);
9877 tree var
= vect_get_new_ssa_name (new_vtype
,
9879 gimple_set_lhs (call
, var
);
9880 vect_finish_stmt_generation (vinfo
, stmt_info
, call
,
9882 tree op
= build1 (VIEW_CONVERT_EXPR
, vectype
, var
);
9884 = gimple_build_assign (vec_dest
,
9885 VIEW_CONVERT_EXPR
, op
);
9890 tree ltype
= vectype
;
9891 tree new_vtype
= NULL_TREE
;
9892 unsigned HOST_WIDE_INT gap
9893 = DR_GROUP_GAP (first_stmt_info
);
9894 unsigned int vect_align
9895 = vect_known_alignment_in_bytes (first_dr_info
,
9897 unsigned int scalar_dr_size
9898 = vect_get_scalar_dr_size (first_dr_info
);
9899 /* If there's no peeling for gaps but we have a gap
9900 with slp loads then load the lower half of the
9901 vector only. See get_group_load_store_type for
9902 when we apply this optimization. */
9905 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9907 && known_eq (nunits
, (group_size
- gap
) * 2)
9908 && known_eq (nunits
, group_size
)
9909 && gap
>= (vect_align
/ scalar_dr_size
))
9913 = vector_vector_composition_type (vectype
, 2,
9915 if (new_vtype
!= NULL_TREE
)
9919 = (dataref_offset
? dataref_offset
9920 : build_int_cst (ref_type
, 0));
9921 if (ltype
!= vectype
9922 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9924 unsigned HOST_WIDE_INT gap_offset
9925 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
9926 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
9927 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
9930 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
9931 if (alignment_support_scheme
== dr_aligned
)
9934 TREE_TYPE (data_ref
)
9935 = build_aligned_type (TREE_TYPE (data_ref
),
9936 align
* BITS_PER_UNIT
);
9937 if (ltype
!= vectype
)
9939 vect_copy_ref_info (data_ref
,
9940 DR_REF (first_dr_info
->dr
));
9941 tree tem
= make_ssa_name (ltype
);
9942 new_stmt
= gimple_build_assign (tem
, data_ref
);
9943 vect_finish_stmt_generation (vinfo
, stmt_info
,
9946 vec
<constructor_elt
, va_gc
> *v
;
9948 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9950 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9951 build_zero_cst (ltype
));
9952 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9956 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9957 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9958 build_zero_cst (ltype
));
9960 gcc_assert (new_vtype
!= NULL_TREE
);
9961 if (new_vtype
== vectype
)
9962 new_stmt
= gimple_build_assign (
9963 vec_dest
, build_constructor (vectype
, v
));
9966 tree new_vname
= make_ssa_name (new_vtype
);
9967 new_stmt
= gimple_build_assign (
9968 new_vname
, build_constructor (new_vtype
, v
));
9969 vect_finish_stmt_generation (vinfo
, stmt_info
,
9971 new_stmt
= gimple_build_assign (
9972 vec_dest
, build1 (VIEW_CONVERT_EXPR
, vectype
,
9979 case dr_explicit_realign
:
9983 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9985 if (compute_in_loop
)
9986 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
9988 dr_explicit_realign
,
9991 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9992 ptr
= copy_ssa_name (dataref_ptr
);
9994 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9995 // For explicit realign the target alignment should be
9996 // known at compile time.
9997 unsigned HOST_WIDE_INT align
=
9998 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9999 new_stmt
= gimple_build_assign
10000 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
10002 (TREE_TYPE (dataref_ptr
),
10003 -(HOST_WIDE_INT
) align
));
10004 vect_finish_stmt_generation (vinfo
, stmt_info
,
10007 = build2 (MEM_REF
, vectype
, ptr
,
10008 build_int_cst (ref_type
, 0));
10009 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
10010 vec_dest
= vect_create_destination_var (scalar_dest
,
10012 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
10013 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
10014 gimple_assign_set_lhs (new_stmt
, new_temp
);
10015 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
10016 vect_finish_stmt_generation (vinfo
, stmt_info
,
10020 bump
= size_binop (MULT_EXPR
, vs
,
10021 TYPE_SIZE_UNIT (elem_type
));
10022 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
10023 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
,
10025 new_stmt
= gimple_build_assign
10026 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
10028 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
10029 if (TREE_CODE (ptr
) == SSA_NAME
)
10030 ptr
= copy_ssa_name (ptr
, new_stmt
);
10032 ptr
= make_ssa_name (TREE_TYPE (ptr
), new_stmt
);
10033 gimple_assign_set_lhs (new_stmt
, ptr
);
10034 vect_finish_stmt_generation (vinfo
, stmt_info
,
10037 = build2 (MEM_REF
, vectype
, ptr
,
10038 build_int_cst (ref_type
, 0));
10041 case dr_explicit_realign_optimized
:
10043 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
10044 new_temp
= copy_ssa_name (dataref_ptr
);
10046 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
10047 // We should only be doing this if we know the target
10048 // alignment at compile time.
10049 unsigned HOST_WIDE_INT align
=
10050 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
10051 new_stmt
= gimple_build_assign
10052 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
10053 build_int_cst (TREE_TYPE (dataref_ptr
),
10054 -(HOST_WIDE_INT
) align
));
10055 vect_finish_stmt_generation (vinfo
, stmt_info
,
10058 = build2 (MEM_REF
, vectype
, new_temp
,
10059 build_int_cst (ref_type
, 0));
10063 gcc_unreachable ();
10065 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10066 /* DATA_REF is null if we've already built the statement. */
10069 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
10070 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
10072 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
10073 gimple_set_lhs (new_stmt
, new_temp
);
10074 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10076 /* 3. Handle explicit realignment if necessary/supported.
10078 vec_dest = realign_load (msq, lsq, realignment_token) */
10079 if (alignment_support_scheme
== dr_explicit_realign_optimized
10080 || alignment_support_scheme
== dr_explicit_realign
)
10082 lsq
= gimple_assign_lhs (new_stmt
);
10083 if (!realignment_token
)
10084 realignment_token
= dataref_ptr
;
10085 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10086 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
10087 msq
, lsq
, realignment_token
);
10088 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
10089 gimple_assign_set_lhs (new_stmt
, new_temp
);
10090 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10092 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
10095 if (i
== vec_num
- 1 && j
== ncopies
- 1)
10096 add_phi_arg (phi
, lsq
,
10097 loop_latch_edge (containing_loop
),
10103 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
10105 tree perm_mask
= perm_mask_for_reverse (vectype
);
10106 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
10107 perm_mask
, stmt_info
, gsi
);
10108 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
10111 /* Collect vector loads and later create their permutation in
10112 vect_transform_grouped_load (). */
10113 if (grouped_load
|| slp_perm
)
10114 dr_chain
.quick_push (new_temp
);
10116 /* Store vector loads in the corresponding SLP_NODE. */
10117 if (slp
&& !slp_perm
)
10118 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10120 /* With SLP permutation we load the gaps as well, without
10121 we need to skip the gaps after we manage to fully load
10122 all elements. group_gap_adj is DR_GROUP_SIZE here. */
10123 group_elt
+= nunits
;
10124 if (maybe_ne (group_gap_adj
, 0U)
10126 && known_eq (group_elt
, group_size
- group_gap_adj
))
10128 poly_wide_int bump_val
10129 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
10131 if (tree_int_cst_sgn
10132 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
10133 bump_val
= -bump_val
;
10134 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
10135 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10136 gsi
, stmt_info
, bump
);
10140 /* Bump the vector pointer to account for a gap or for excess
10141 elements loaded for a permuted SLP load. */
10142 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
10144 poly_wide_int bump_val
10145 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
10147 if (tree_int_cst_sgn
10148 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
10149 bump_val
= -bump_val
;
10150 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
10151 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
10156 if (slp
&& !slp_perm
)
10162 /* For SLP we know we've seen all possible uses of dr_chain so
10163 direct vect_transform_slp_perm_load to DCE the unused parts.
10164 ??? This is a hack to prevent compile-time issues as seen
10165 in PR101120 and friends. */
10166 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
10167 gsi
, vf
, false, &n_perms
,
10175 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
10176 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
10178 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10182 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10185 dr_chain
.release ();
10188 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10193 /* Function vect_is_simple_cond.
10196 LOOP - the loop that is being vectorized.
10197 COND - Condition that is checked for simple use.
10200 *COMP_VECTYPE - the vector type for the comparison.
10201 *DTS - The def types for the arguments of the comparison
10203 Returns whether a COND can be vectorized. Checks whether
10204 condition operands are supportable using vec_is_simple_use. */
10207 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
10208 slp_tree slp_node
, tree
*comp_vectype
,
10209 enum vect_def_type
*dts
, tree vectype
)
10212 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10216 if (TREE_CODE (cond
) == SSA_NAME
10217 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
10219 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
10220 &slp_op
, &dts
[0], comp_vectype
)
10222 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
10227 if (!COMPARISON_CLASS_P (cond
))
10230 lhs
= TREE_OPERAND (cond
, 0);
10231 rhs
= TREE_OPERAND (cond
, 1);
10233 if (TREE_CODE (lhs
) == SSA_NAME
)
10235 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
10236 &lhs
, &slp_op
, &dts
[0], &vectype1
))
10239 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
10240 || TREE_CODE (lhs
) == FIXED_CST
)
10241 dts
[0] = vect_constant_def
;
10245 if (TREE_CODE (rhs
) == SSA_NAME
)
10247 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
10248 &rhs
, &slp_op
, &dts
[1], &vectype2
))
10251 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
10252 || TREE_CODE (rhs
) == FIXED_CST
)
10253 dts
[1] = vect_constant_def
;
10257 if (vectype1
&& vectype2
10258 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10259 TYPE_VECTOR_SUBPARTS (vectype2
)))
10262 *comp_vectype
= vectype1
? vectype1
: vectype2
;
10263 /* Invariant comparison. */
10264 if (! *comp_vectype
)
10266 tree scalar_type
= TREE_TYPE (lhs
);
10267 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10268 *comp_vectype
= truth_type_for (vectype
);
10271 /* If we can widen the comparison to match vectype do so. */
10272 if (INTEGRAL_TYPE_P (scalar_type
)
10274 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
10275 TYPE_SIZE (TREE_TYPE (vectype
))))
10276 scalar_type
= build_nonstandard_integer_type
10277 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
10278 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
10286 /* vectorizable_condition.
10288 Check if STMT_INFO is conditional modify expression that can be vectorized.
10289 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10290 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10293 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10295 Return true if STMT_INFO is vectorizable in this way. */
10298 vectorizable_condition (vec_info
*vinfo
,
10299 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10301 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10303 tree scalar_dest
= NULL_TREE
;
10304 tree vec_dest
= NULL_TREE
;
10305 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
10306 tree then_clause
, else_clause
;
10307 tree comp_vectype
= NULL_TREE
;
10308 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
10309 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
10312 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10313 enum vect_def_type dts
[4]
10314 = {vect_unknown_def_type
, vect_unknown_def_type
,
10315 vect_unknown_def_type
, vect_unknown_def_type
};
10319 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10321 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10322 vec
<tree
> vec_oprnds0
= vNULL
;
10323 vec
<tree
> vec_oprnds1
= vNULL
;
10324 vec
<tree
> vec_oprnds2
= vNULL
;
10325 vec
<tree
> vec_oprnds3
= vNULL
;
10327 bool masked
= false;
10329 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10332 /* Is vectorizable conditional operation? */
10333 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10337 code
= gimple_assign_rhs_code (stmt
);
10338 if (code
!= COND_EXPR
)
10341 stmt_vec_info reduc_info
= NULL
;
10342 int reduc_index
= -1;
10343 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
10345 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
10348 if (STMT_SLP_TYPE (stmt_info
))
10350 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
10351 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
10352 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
10353 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
10354 || reduc_index
!= -1);
10358 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10362 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10363 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10368 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10372 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10376 gcc_assert (ncopies
>= 1);
10377 if (for_reduction
&& ncopies
> 1)
10378 return false; /* FORNOW */
10380 cond_expr
= gimple_assign_rhs1 (stmt
);
10382 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
10383 &comp_vectype
, &dts
[0], vectype
)
10387 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
10388 slp_tree then_slp_node
, else_slp_node
;
10389 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
10390 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
10392 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
10393 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
10396 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
10399 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
10402 masked
= !COMPARISON_CLASS_P (cond_expr
);
10403 vec_cmp_type
= truth_type_for (comp_vectype
);
10405 if (vec_cmp_type
== NULL_TREE
)
10408 cond_code
= TREE_CODE (cond_expr
);
10411 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
10412 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
10415 /* For conditional reductions, the "then" value needs to be the candidate
10416 value calculated by this iteration while the "else" value needs to be
10417 the result carried over from previous iterations. If the COND_EXPR
10418 is the other way around, we need to swap it. */
10419 bool must_invert_cmp_result
= false;
10420 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
10423 must_invert_cmp_result
= true;
10426 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
10427 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
10428 if (new_code
== ERROR_MARK
)
10429 must_invert_cmp_result
= true;
10432 cond_code
= new_code
;
10433 /* Make sure we don't accidentally use the old condition. */
10434 cond_expr
= NULL_TREE
;
10437 std::swap (then_clause
, else_clause
);
10440 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
10442 /* Boolean values may have another representation in vectors
10443 and therefore we prefer bit operations over comparison for
10444 them (which also works for scalar masks). We store opcodes
10445 to use in bitop1 and bitop2. Statement is vectorized as
10446 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10447 depending on bitop1 and bitop2 arity. */
10451 bitop1
= BIT_NOT_EXPR
;
10452 bitop2
= BIT_AND_EXPR
;
10455 bitop1
= BIT_NOT_EXPR
;
10456 bitop2
= BIT_IOR_EXPR
;
10459 bitop1
= BIT_NOT_EXPR
;
10460 bitop2
= BIT_AND_EXPR
;
10461 std::swap (cond_expr0
, cond_expr1
);
10464 bitop1
= BIT_NOT_EXPR
;
10465 bitop2
= BIT_IOR_EXPR
;
10466 std::swap (cond_expr0
, cond_expr1
);
10469 bitop1
= BIT_XOR_EXPR
;
10472 bitop1
= BIT_XOR_EXPR
;
10473 bitop2
= BIT_NOT_EXPR
;
10478 cond_code
= SSA_NAME
;
10481 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
10482 && reduction_type
== EXTRACT_LAST_REDUCTION
10483 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
10485 if (dump_enabled_p ())
10486 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10487 "reduction comparison operation not supported.\n");
10493 if (bitop1
!= NOP_EXPR
)
10495 machine_mode mode
= TYPE_MODE (comp_vectype
);
10498 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
10499 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10502 if (bitop2
!= NOP_EXPR
)
10504 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
10506 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10511 vect_cost_for_stmt kind
= vector_stmt
;
10512 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10513 /* Count one reduction-like operation per vector. */
10514 kind
= vec_to_scalar
;
10515 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
))
10519 && (!vect_maybe_update_slp_op_vectype
10520 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
10522 && !vect_maybe_update_slp_op_vectype
10523 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
10524 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
10525 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
10527 if (dump_enabled_p ())
10528 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10529 "incompatible vector types for invariants\n");
10533 if (loop_vinfo
&& for_reduction
10534 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
10536 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10537 vect_record_loop_mask (loop_vinfo
, &LOOP_VINFO_MASKS (loop_vinfo
),
10538 ncopies
* vec_num
, vectype
, NULL
);
10539 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10540 else if (STMT_VINFO_DEF_TYPE (reduc_info
) != vect_nested_cycle
)
10542 if (dump_enabled_p ())
10543 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10544 "conditional reduction prevents the use"
10545 " of partial vectors.\n");
10546 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
10550 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10551 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10559 scalar_dest
= gimple_assign_lhs (stmt
);
10560 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10561 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10563 bool swap_cond_operands
= false;
10565 /* See whether another part of the vectorized code applies a loop
10566 mask to the condition, or to its inverse. */
10568 vec_loop_masks
*masks
= NULL
;
10569 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10571 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10572 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10575 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10576 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10577 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10580 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10581 tree_code orig_code
= cond
.code
;
10582 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10583 if (!masked
&& loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10585 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10586 cond_code
= cond
.code
;
10587 swap_cond_operands
= true;
10591 /* Try the inverse of the current mask. We check if the
10592 inverse mask is live and if so we generate a negate of
10593 the current mask such that we still honor NaNs. */
10594 cond
.inverted_p
= true;
10595 cond
.code
= orig_code
;
10596 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10598 bitop1
= orig_code
;
10599 bitop2
= BIT_NOT_EXPR
;
10600 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10601 cond_code
= cond
.code
;
10602 swap_cond_operands
= true;
10609 /* Handle cond expr. */
10611 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10612 cond_expr
, &vec_oprnds0
, comp_vectype
,
10613 then_clause
, &vec_oprnds2
, vectype
,
10614 reduction_type
!= EXTRACT_LAST_REDUCTION
10615 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10617 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10618 cond_expr0
, &vec_oprnds0
, comp_vectype
,
10619 cond_expr1
, &vec_oprnds1
, comp_vectype
,
10620 then_clause
, &vec_oprnds2
, vectype
,
10621 reduction_type
!= EXTRACT_LAST_REDUCTION
10622 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10624 /* Arguments are ready. Create the new vector stmt. */
10625 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10627 vec_then_clause
= vec_oprnds2
[i
];
10628 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10629 vec_else_clause
= vec_oprnds3
[i
];
10631 if (swap_cond_operands
)
10632 std::swap (vec_then_clause
, vec_else_clause
);
10635 vec_compare
= vec_cond_lhs
;
10638 vec_cond_rhs
= vec_oprnds1
[i
];
10639 if (bitop1
== NOP_EXPR
)
10641 gimple_seq stmts
= NULL
;
10642 vec_compare
= gimple_build (&stmts
, cond_code
, vec_cmp_type
,
10643 vec_cond_lhs
, vec_cond_rhs
);
10644 gsi_insert_before (gsi
, stmts
, GSI_SAME_STMT
);
10648 new_temp
= make_ssa_name (vec_cmp_type
);
10650 if (bitop1
== BIT_NOT_EXPR
)
10651 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10655 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10657 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10658 if (bitop2
== NOP_EXPR
)
10659 vec_compare
= new_temp
;
10660 else if (bitop2
== BIT_NOT_EXPR
)
10662 /* Instead of doing ~x ? y : z do x ? z : y. */
10663 vec_compare
= new_temp
;
10664 std::swap (vec_then_clause
, vec_else_clause
);
10668 vec_compare
= make_ssa_name (vec_cmp_type
);
10670 = gimple_build_assign (vec_compare
, bitop2
,
10671 vec_cond_lhs
, new_temp
);
10672 vect_finish_stmt_generation (vinfo
, stmt_info
,
10678 /* If we decided to apply a loop mask to the result of the vector
10679 comparison, AND the comparison with the mask now. Later passes
10680 should then be able to reuse the AND results between mulitple
10684 for (int i = 0; i < 100; ++i)
10685 x[i] = y[i] ? z[i] : 10;
10687 results in following optimized GIMPLE:
10689 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10690 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10691 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10692 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10693 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10694 vect_iftmp.11_47, { 10, ... }>;
10696 instead of using a masked and unmasked forms of
10697 vec != { 0, ... } (masked in the MASK_LOAD,
10698 unmasked in the VEC_COND_EXPR). */
10700 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10701 in cases where that's necessary. */
10703 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
10705 if (!is_gimple_val (vec_compare
))
10707 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10708 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10710 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10711 vec_compare
= vec_compare_name
;
10714 if (must_invert_cmp_result
)
10716 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10717 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10720 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10721 vec_compare
= vec_compare_name
;
10727 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10729 tree tmp2
= make_ssa_name (vec_cmp_type
);
10731 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10733 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
10734 vec_compare
= tmp2
;
10739 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10741 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
10742 tree lhs
= gimple_get_lhs (old_stmt
);
10743 new_stmt
= gimple_build_call_internal
10744 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10746 gimple_call_set_lhs (new_stmt
, lhs
);
10747 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
10748 if (old_stmt
== gsi_stmt (*gsi
))
10749 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
10752 /* In this case we're moving the definition to later in the
10753 block. That doesn't matter because the only uses of the
10754 lhs are in phi statements. */
10755 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
10756 gsi_remove (&old_gsi
, true);
10757 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10762 new_temp
= make_ssa_name (vec_dest
);
10763 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10764 vec_then_clause
, vec_else_clause
);
10765 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10768 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10770 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10774 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10776 vec_oprnds0
.release ();
10777 vec_oprnds1
.release ();
10778 vec_oprnds2
.release ();
10779 vec_oprnds3
.release ();
10784 /* vectorizable_comparison.
10786 Check if STMT_INFO is comparison expression that can be vectorized.
10787 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10788 comparison, put it in VEC_STMT, and insert it at GSI.
10790 Return true if STMT_INFO is vectorizable in this way. */
10793 vectorizable_comparison (vec_info
*vinfo
,
10794 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10796 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10798 tree lhs
, rhs1
, rhs2
;
10799 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10800 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10801 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10803 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10804 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10806 poly_uint64 nunits
;
10808 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10810 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10811 vec
<tree
> vec_oprnds0
= vNULL
;
10812 vec
<tree
> vec_oprnds1
= vNULL
;
10816 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10819 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10822 mask_type
= vectype
;
10823 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10828 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10830 gcc_assert (ncopies
>= 1);
10831 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10834 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10838 code
= gimple_assign_rhs_code (stmt
);
10840 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10843 slp_tree slp_rhs1
, slp_rhs2
;
10844 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10845 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
10848 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10849 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
10852 if (vectype1
&& vectype2
10853 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10854 TYPE_VECTOR_SUBPARTS (vectype2
)))
10857 vectype
= vectype1
? vectype1
: vectype2
;
10859 /* Invariant comparison. */
10862 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10863 vectype
= mask_type
;
10865 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
10867 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10870 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10873 /* Can't compare mask and non-mask types. */
10874 if (vectype1
&& vectype2
10875 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10878 /* Boolean values may have another representation in vectors
10879 and therefore we prefer bit operations over comparison for
10880 them (which also works for scalar masks). We store opcodes
10881 to use in bitop1 and bitop2. Statement is vectorized as
10882 BITOP2 (rhs1 BITOP1 rhs2) or
10883 rhs1 BITOP2 (BITOP1 rhs2)
10884 depending on bitop1 and bitop2 arity. */
10885 bool swap_p
= false;
10886 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10888 if (code
== GT_EXPR
)
10890 bitop1
= BIT_NOT_EXPR
;
10891 bitop2
= BIT_AND_EXPR
;
10893 else if (code
== GE_EXPR
)
10895 bitop1
= BIT_NOT_EXPR
;
10896 bitop2
= BIT_IOR_EXPR
;
10898 else if (code
== LT_EXPR
)
10900 bitop1
= BIT_NOT_EXPR
;
10901 bitop2
= BIT_AND_EXPR
;
10904 else if (code
== LE_EXPR
)
10906 bitop1
= BIT_NOT_EXPR
;
10907 bitop2
= BIT_IOR_EXPR
;
10912 bitop1
= BIT_XOR_EXPR
;
10913 if (code
== EQ_EXPR
)
10914 bitop2
= BIT_NOT_EXPR
;
10920 if (bitop1
== NOP_EXPR
)
10922 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10927 machine_mode mode
= TYPE_MODE (vectype
);
10930 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10931 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10934 if (bitop2
!= NOP_EXPR
)
10936 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10937 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10942 /* Put types on constant and invariant SLP children. */
10944 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
10945 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
10947 if (dump_enabled_p ())
10948 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10949 "incompatible vector types for invariants\n");
10953 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10954 vect_model_simple_cost (vinfo
, stmt_info
,
10955 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10956 dts
, ndts
, slp_node
, cost_vec
);
10963 lhs
= gimple_assign_lhs (stmt
);
10964 mask
= vect_create_destination_var (lhs
, mask_type
);
10966 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10967 rhs1
, &vec_oprnds0
, vectype
,
10968 rhs2
, &vec_oprnds1
, vectype
);
10970 std::swap (vec_oprnds0
, vec_oprnds1
);
10972 /* Arguments are ready. Create the new vector stmt. */
10973 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10976 vec_rhs2
= vec_oprnds1
[i
];
10978 new_temp
= make_ssa_name (mask
);
10979 if (bitop1
== NOP_EXPR
)
10981 new_stmt
= gimple_build_assign (new_temp
, code
,
10982 vec_rhs1
, vec_rhs2
);
10983 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10987 if (bitop1
== BIT_NOT_EXPR
)
10988 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10990 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10992 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10993 if (bitop2
!= NOP_EXPR
)
10995 tree res
= make_ssa_name (mask
);
10996 if (bitop2
== BIT_NOT_EXPR
)
10997 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10999 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
11001 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11005 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
11007 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
11011 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11013 vec_oprnds0
.release ();
11014 vec_oprnds1
.release ();
11019 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
11020 can handle all live statements in the node. Otherwise return true
11021 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
11022 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
11025 can_vectorize_live_stmts (vec_info
*vinfo
,
11026 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11027 slp_tree slp_node
, slp_instance slp_node_instance
,
11029 stmt_vector_for_cost
*cost_vec
)
11033 stmt_vec_info slp_stmt_info
;
11035 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
11037 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
11038 && !vectorizable_live_operation (vinfo
,
11039 slp_stmt_info
, gsi
, slp_node
,
11040 slp_node_instance
, i
,
11041 vec_stmt_p
, cost_vec
))
11045 else if (STMT_VINFO_LIVE_P (stmt_info
)
11046 && !vectorizable_live_operation (vinfo
, stmt_info
, gsi
,
11047 slp_node
, slp_node_instance
, -1,
11048 vec_stmt_p
, cost_vec
))
11054 /* Make sure the statement is vectorizable. */
11057 vect_analyze_stmt (vec_info
*vinfo
,
11058 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
11059 slp_tree node
, slp_instance node_instance
,
11060 stmt_vector_for_cost
*cost_vec
)
11062 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
11063 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
11065 gimple_seq pattern_def_seq
;
11067 if (dump_enabled_p ())
11068 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
11071 if (gimple_has_volatile_ops (stmt_info
->stmt
))
11072 return opt_result::failure_at (stmt_info
->stmt
,
11074 " stmt has volatile operands: %G\n",
11077 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11079 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
11081 gimple_stmt_iterator si
;
11083 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
11085 stmt_vec_info pattern_def_stmt_info
11086 = vinfo
->lookup_stmt (gsi_stmt (si
));
11087 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
11088 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
11090 /* Analyze def stmt of STMT if it's a pattern stmt. */
11091 if (dump_enabled_p ())
11092 dump_printf_loc (MSG_NOTE
, vect_location
,
11093 "==> examining pattern def statement: %G",
11094 pattern_def_stmt_info
->stmt
);
11097 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
11098 need_to_vectorize
, node
, node_instance
,
11106 /* Skip stmts that do not need to be vectorized. In loops this is expected
11108 - the COND_EXPR which is the loop exit condition
11109 - any LABEL_EXPRs in the loop
11110 - computations that are used only for array indexing or loop control.
11111 In basic blocks we only analyze statements that are a part of some SLP
11112 instance, therefore, all the statements are relevant.
11114 Pattern statement needs to be analyzed instead of the original statement
11115 if the original statement is not relevant. Otherwise, we analyze both
11116 statements. In basic blocks we are called from some SLP instance
11117 traversal, don't analyze pattern stmts instead, the pattern stmts
11118 already will be part of SLP instance. */
11120 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
11121 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
11122 && !STMT_VINFO_LIVE_P (stmt_info
))
11124 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11125 && pattern_stmt_info
11126 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
11127 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
11129 /* Analyze PATTERN_STMT instead of the original stmt. */
11130 stmt_info
= pattern_stmt_info
;
11131 if (dump_enabled_p ())
11132 dump_printf_loc (MSG_NOTE
, vect_location
,
11133 "==> examining pattern statement: %G",
11138 if (dump_enabled_p ())
11139 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
11141 return opt_result::success ();
11144 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11146 && pattern_stmt_info
11147 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
11148 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
11150 /* Analyze PATTERN_STMT too. */
11151 if (dump_enabled_p ())
11152 dump_printf_loc (MSG_NOTE
, vect_location
,
11153 "==> examining pattern statement: %G",
11154 pattern_stmt_info
->stmt
);
11157 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
11158 node_instance
, cost_vec
);
11163 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
11165 case vect_internal_def
:
11168 case vect_reduction_def
:
11169 case vect_nested_cycle
:
11170 gcc_assert (!bb_vinfo
11171 && (relevance
== vect_used_in_outer
11172 || relevance
== vect_used_in_outer_by_reduction
11173 || relevance
== vect_used_by_reduction
11174 || relevance
== vect_unused_in_scope
11175 || relevance
== vect_used_only_live
));
11178 case vect_induction_def
:
11179 gcc_assert (!bb_vinfo
);
11182 case vect_constant_def
:
11183 case vect_external_def
:
11184 case vect_unknown_def_type
:
11186 gcc_unreachable ();
11189 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11191 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (node
);
11193 if (STMT_VINFO_RELEVANT_P (stmt_info
))
11195 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
11196 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
11197 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
11198 *need_to_vectorize
= true;
11201 if (PURE_SLP_STMT (stmt_info
) && !node
)
11203 if (dump_enabled_p ())
11204 dump_printf_loc (MSG_NOTE
, vect_location
,
11205 "handled only by SLP analysis\n");
11206 return opt_result::success ();
11211 && (STMT_VINFO_RELEVANT_P (stmt_info
)
11212 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
11213 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11214 -mveclibabi= takes preference over library functions with
11215 the simd attribute. */
11216 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11217 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
11219 || vectorizable_conversion (vinfo
, stmt_info
,
11220 NULL
, NULL
, node
, cost_vec
)
11221 || vectorizable_operation (vinfo
, stmt_info
,
11222 NULL
, NULL
, node
, cost_vec
)
11223 || vectorizable_assignment (vinfo
, stmt_info
,
11224 NULL
, NULL
, node
, cost_vec
)
11225 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11226 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11227 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11228 node
, node_instance
, cost_vec
)
11229 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11230 NULL
, node
, cost_vec
)
11231 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11232 || vectorizable_condition (vinfo
, stmt_info
,
11233 NULL
, NULL
, node
, cost_vec
)
11234 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11236 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11237 stmt_info
, NULL
, node
));
11241 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11242 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
11243 NULL
, NULL
, node
, cost_vec
)
11244 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
11246 || vectorizable_shift (vinfo
, stmt_info
,
11247 NULL
, NULL
, node
, cost_vec
)
11248 || vectorizable_operation (vinfo
, stmt_info
,
11249 NULL
, NULL
, node
, cost_vec
)
11250 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
11252 || vectorizable_load (vinfo
, stmt_info
,
11253 NULL
, NULL
, node
, cost_vec
)
11254 || vectorizable_store (vinfo
, stmt_info
,
11255 NULL
, NULL
, node
, cost_vec
)
11256 || vectorizable_condition (vinfo
, stmt_info
,
11257 NULL
, NULL
, node
, cost_vec
)
11258 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11260 || vectorizable_phi (vinfo
, stmt_info
, NULL
, node
, cost_vec
));
11264 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11267 return opt_result::failure_at (stmt_info
->stmt
,
11269 " relevant stmt not supported: %G",
11272 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11273 need extra handling, except for vectorizable reductions. */
11275 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11276 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
11277 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
11278 stmt_info
, NULL
, node
, node_instance
,
11280 return opt_result::failure_at (stmt_info
->stmt
,
11282 " live stmt not supported: %G",
11285 return opt_result::success ();
11289 /* Function vect_transform_stmt.
11291 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11294 vect_transform_stmt (vec_info
*vinfo
,
11295 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11296 slp_tree slp_node
, slp_instance slp_node_instance
)
11298 bool is_store
= false;
11299 gimple
*vec_stmt
= NULL
;
11302 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
11304 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11306 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (slp_node
);
11308 switch (STMT_VINFO_TYPE (stmt_info
))
11310 case type_demotion_vec_info_type
:
11311 case type_promotion_vec_info_type
:
11312 case type_conversion_vec_info_type
:
11313 done
= vectorizable_conversion (vinfo
, stmt_info
,
11314 gsi
, &vec_stmt
, slp_node
, NULL
);
11318 case induc_vec_info_type
:
11319 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
11320 stmt_info
, &vec_stmt
, slp_node
,
11325 case shift_vec_info_type
:
11326 done
= vectorizable_shift (vinfo
, stmt_info
,
11327 gsi
, &vec_stmt
, slp_node
, NULL
);
11331 case op_vec_info_type
:
11332 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11337 case assignment_vec_info_type
:
11338 done
= vectorizable_assignment (vinfo
, stmt_info
,
11339 gsi
, &vec_stmt
, slp_node
, NULL
);
11343 case load_vec_info_type
:
11344 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11349 case store_vec_info_type
:
11350 done
= vectorizable_store (vinfo
, stmt_info
,
11351 gsi
, &vec_stmt
, slp_node
, NULL
);
11353 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
11355 /* In case of interleaving, the whole chain is vectorized when the
11356 last store in the chain is reached. Store stmts before the last
11357 one are skipped, and there vec_stmt_info shouldn't be freed
11359 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
11360 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
11367 case condition_vec_info_type
:
11368 done
= vectorizable_condition (vinfo
, stmt_info
,
11369 gsi
, &vec_stmt
, slp_node
, NULL
);
11373 case comparison_vec_info_type
:
11374 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11379 case call_vec_info_type
:
11380 done
= vectorizable_call (vinfo
, stmt_info
,
11381 gsi
, &vec_stmt
, slp_node
, NULL
);
11384 case call_simd_clone_vec_info_type
:
11385 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11389 case reduc_vec_info_type
:
11390 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11391 gsi
, &vec_stmt
, slp_node
);
11395 case cycle_phi_info_type
:
11396 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11397 &vec_stmt
, slp_node
, slp_node_instance
);
11401 case lc_phi_info_type
:
11402 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11403 stmt_info
, &vec_stmt
, slp_node
);
11407 case phi_info_type
:
11408 done
= vectorizable_phi (vinfo
, stmt_info
, &vec_stmt
, slp_node
, NULL
);
11413 if (!STMT_VINFO_LIVE_P (stmt_info
))
11415 if (dump_enabled_p ())
11416 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11417 "stmt not supported.\n");
11418 gcc_unreachable ();
11423 if (!slp_node
&& vec_stmt
)
11424 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
11426 if (STMT_VINFO_TYPE (stmt_info
) != store_vec_info_type
)
11428 /* Handle stmts whose DEF is used outside the loop-nest that is
11429 being vectorized. */
11430 done
= can_vectorize_live_stmts (vinfo
, stmt_info
, gsi
, slp_node
,
11431 slp_node_instance
, true, NULL
);
11436 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11442 /* Remove a group of stores (for SLP or interleaving), free their
11446 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
11448 stmt_vec_info next_stmt_info
= first_stmt_info
;
11450 while (next_stmt_info
)
11452 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
11453 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
11454 /* Free the attached stmt_vec_info and remove the stmt. */
11455 vinfo
->remove_stmt (next_stmt_info
);
11456 next_stmt_info
= tmp
;
11460 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11461 elements of type SCALAR_TYPE, or null if the target doesn't support
11464 If NUNITS is zero, return a vector type that contains elements of
11465 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11467 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11468 for this vectorization region and want to "autodetect" the best choice.
11469 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11470 and we want the new type to be interoperable with it. PREVAILING_MODE
11471 in this case can be a scalar integer mode or a vector mode; when it
11472 is a vector mode, the function acts like a tree-level version of
11473 related_vector_mode. */
11476 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
11477 tree scalar_type
, poly_uint64 nunits
)
11479 tree orig_scalar_type
= scalar_type
;
11480 scalar_mode inner_mode
;
11481 machine_mode simd_mode
;
11484 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
11485 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
11488 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
11490 /* Interoperability between modes requires one to be a constant multiple
11491 of the other, so that the number of vectors required for each operation
11492 is a compile-time constant. */
11493 if (prevailing_mode
!= VOIDmode
11494 && !constant_multiple_p (nunits
* nbytes
,
11495 GET_MODE_SIZE (prevailing_mode
))
11496 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode
),
11500 /* For vector types of elements whose mode precision doesn't
11501 match their types precision we use a element type of mode
11502 precision. The vectorization routines will have to make sure
11503 they support the proper result truncation/extension.
11504 We also make sure to build vector types with INTEGER_TYPE
11505 component type only. */
11506 if (INTEGRAL_TYPE_P (scalar_type
)
11507 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
11508 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
11509 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
11510 TYPE_UNSIGNED (scalar_type
));
11512 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11513 When the component mode passes the above test simply use a type
11514 corresponding to that mode. The theory is that any use that
11515 would cause problems with this will disable vectorization anyway. */
11516 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
11517 && !INTEGRAL_TYPE_P (scalar_type
))
11518 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
11520 /* We can't build a vector type of elements with alignment bigger than
11522 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
11523 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
11524 TYPE_UNSIGNED (scalar_type
));
11526 /* If we felt back to using the mode fail if there was
11527 no scalar type for it. */
11528 if (scalar_type
== NULL_TREE
)
11531 /* If no prevailing mode was supplied, use the mode the target prefers.
11532 Otherwise lookup a vector mode based on the prevailing mode. */
11533 if (prevailing_mode
== VOIDmode
)
11535 gcc_assert (known_eq (nunits
, 0U));
11536 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
11537 if (SCALAR_INT_MODE_P (simd_mode
))
11539 /* Traditional behavior is not to take the integer mode
11540 literally, but simply to use it as a way of determining
11541 the vector size. It is up to mode_for_vector to decide
11542 what the TYPE_MODE should be.
11544 Note that nunits == 1 is allowed in order to support single
11545 element vector types. */
11546 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
11547 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11551 else if (SCALAR_INT_MODE_P (prevailing_mode
)
11552 || !related_vector_mode (prevailing_mode
,
11553 inner_mode
, nunits
).exists (&simd_mode
))
11555 /* Fall back to using mode_for_vector, mostly in the hope of being
11556 able to use an integer mode. */
11557 if (known_eq (nunits
, 0U)
11558 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
11561 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11565 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
11567 /* In cases where the mode was chosen by mode_for_vector, check that
11568 the target actually supports the chosen mode, or that it at least
11569 allows the vector mode to be replaced by a like-sized integer. */
11570 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11571 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11574 /* Re-attach the address-space qualifier if we canonicalized the scalar
11576 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11577 return build_qualified_type
11578 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11583 /* Function get_vectype_for_scalar_type.
11585 Returns the vector type corresponding to SCALAR_TYPE as supported
11586 by the target. If GROUP_SIZE is nonzero and we're performing BB
11587 vectorization, make sure that the number of elements in the vector
11588 is no bigger than GROUP_SIZE. */
11591 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11592 unsigned int group_size
)
11594 /* For BB vectorization, we should always have a group size once we've
11595 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11596 are tentative requests during things like early data reference
11597 analysis and pattern recognition. */
11598 if (is_a
<bb_vec_info
> (vinfo
))
11599 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11603 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11605 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
11606 vinfo
->vector_mode
= TYPE_MODE (vectype
);
11608 /* Register the natural choice of vector type, before the group size
11609 has been applied. */
11611 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
11613 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11614 try again with an explicit number of elements. */
11617 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
11619 /* Start with the biggest number of units that fits within
11620 GROUP_SIZE and halve it until we find a valid vector type.
11621 Usually either the first attempt will succeed or all will
11622 fail (in the latter case because GROUP_SIZE is too small
11623 for the target), but it's possible that a target could have
11624 a hole between supported vector types.
11626 If GROUP_SIZE is not a power of 2, this has the effect of
11627 trying the largest power of 2 that fits within the group,
11628 even though the group is not a multiple of that vector size.
11629 The BB vectorizer will then try to carve up the group into
11631 unsigned int nunits
= 1 << floor_log2 (group_size
);
11634 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11635 scalar_type
, nunits
);
11638 while (nunits
> 1 && !vectype
);
11644 /* Return the vector type corresponding to SCALAR_TYPE as supported
11645 by the target. NODE, if nonnull, is the SLP tree node that will
11646 use the returned vector type. */
11649 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
11651 unsigned int group_size
= 0;
11653 group_size
= SLP_TREE_LANES (node
);
11654 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11657 /* Function get_mask_type_for_scalar_type.
11659 Returns the mask type corresponding to a result of comparison
11660 of vectors of specified SCALAR_TYPE as supported by target.
11661 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11662 make sure that the number of elements in the vector is no bigger
11663 than GROUP_SIZE. */
11666 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11667 unsigned int group_size
)
11669 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11674 return truth_type_for (vectype
);
11677 /* Function get_same_sized_vectype
11679 Returns a vector type corresponding to SCALAR_TYPE of size
11680 VECTOR_TYPE if supported by the target. */
11683 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11685 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11686 return truth_type_for (vector_type
);
11688 poly_uint64 nunits
;
11689 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
11690 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
11693 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
11694 scalar_type
, nunits
);
11697 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11698 would not change the chosen vector modes. */
11701 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
11703 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
11704 i
!= vinfo
->used_vector_modes
.end (); ++i
)
11705 if (!VECTOR_MODE_P (*i
)
11706 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
11711 /* Function vect_is_simple_use.
11714 VINFO - the vect info of the loop or basic block that is being vectorized.
11715 OPERAND - operand in the loop or bb.
11717 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11718 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11719 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11720 the definition could be anywhere in the function
11721 DT - the type of definition
11723 Returns whether a stmt with OPERAND can be vectorized.
11724 For loops, supportable operands are constants, loop invariants, and operands
11725 that are defined by the current iteration of the loop. Unsupportable
11726 operands are those that are defined by a previous iteration of the loop (as
11727 is the case in reduction/induction computations).
11728 For basic blocks, supportable operands are constants and bb invariants.
11729 For now, operands defined outside the basic block are not supported. */
11732 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11733 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11735 if (def_stmt_info_out
)
11736 *def_stmt_info_out
= NULL
;
11738 *def_stmt_out
= NULL
;
11739 *dt
= vect_unknown_def_type
;
11741 if (dump_enabled_p ())
11743 dump_printf_loc (MSG_NOTE
, vect_location
,
11744 "vect_is_simple_use: operand ");
11745 if (TREE_CODE (operand
) == SSA_NAME
11746 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11747 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11749 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11752 if (CONSTANT_CLASS_P (operand
))
11753 *dt
= vect_constant_def
;
11754 else if (is_gimple_min_invariant (operand
))
11755 *dt
= vect_external_def
;
11756 else if (TREE_CODE (operand
) != SSA_NAME
)
11757 *dt
= vect_unknown_def_type
;
11758 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11759 *dt
= vect_external_def
;
11762 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11763 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11765 *dt
= vect_external_def
;
11768 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11769 def_stmt
= stmt_vinfo
->stmt
;
11770 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11771 if (def_stmt_info_out
)
11772 *def_stmt_info_out
= stmt_vinfo
;
11775 *def_stmt_out
= def_stmt
;
11778 if (dump_enabled_p ())
11780 dump_printf (MSG_NOTE
, ", type of def: ");
11783 case vect_uninitialized_def
:
11784 dump_printf (MSG_NOTE
, "uninitialized\n");
11786 case vect_constant_def
:
11787 dump_printf (MSG_NOTE
, "constant\n");
11789 case vect_external_def
:
11790 dump_printf (MSG_NOTE
, "external\n");
11792 case vect_internal_def
:
11793 dump_printf (MSG_NOTE
, "internal\n");
11795 case vect_induction_def
:
11796 dump_printf (MSG_NOTE
, "induction\n");
11798 case vect_reduction_def
:
11799 dump_printf (MSG_NOTE
, "reduction\n");
11801 case vect_double_reduction_def
:
11802 dump_printf (MSG_NOTE
, "double reduction\n");
11804 case vect_nested_cycle
:
11805 dump_printf (MSG_NOTE
, "nested cycle\n");
11807 case vect_unknown_def_type
:
11808 dump_printf (MSG_NOTE
, "unknown\n");
11813 if (*dt
== vect_unknown_def_type
)
11815 if (dump_enabled_p ())
11816 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11817 "Unsupported pattern.\n");
11824 /* Function vect_is_simple_use.
11826 Same as vect_is_simple_use but also determines the vector operand
11827 type of OPERAND and stores it to *VECTYPE. If the definition of
11828 OPERAND is vect_uninitialized_def, vect_constant_def or
11829 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11830 is responsible to compute the best suited vector type for the
11834 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11835 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11836 gimple
**def_stmt_out
)
11838 stmt_vec_info def_stmt_info
;
11840 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11844 *def_stmt_out
= def_stmt
;
11845 if (def_stmt_info_out
)
11846 *def_stmt_info_out
= def_stmt_info
;
11848 /* Now get a vector type if the def is internal, otherwise supply
11849 NULL_TREE and leave it up to the caller to figure out a proper
11850 type for the use stmt. */
11851 if (*dt
== vect_internal_def
11852 || *dt
== vect_induction_def
11853 || *dt
== vect_reduction_def
11854 || *dt
== vect_double_reduction_def
11855 || *dt
== vect_nested_cycle
)
11857 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11858 gcc_assert (*vectype
!= NULL_TREE
);
11859 if (dump_enabled_p ())
11860 dump_printf_loc (MSG_NOTE
, vect_location
,
11861 "vect_is_simple_use: vectype %T\n", *vectype
);
11863 else if (*dt
== vect_uninitialized_def
11864 || *dt
== vect_constant_def
11865 || *dt
== vect_external_def
)
11866 *vectype
= NULL_TREE
;
11868 gcc_unreachable ();
11873 /* Function vect_is_simple_use.
11875 Same as vect_is_simple_use but determines the operand by operand
11876 position OPERAND from either STMT or SLP_NODE, filling in *OP
11877 and *SLP_DEF (when SLP_NODE is not NULL). */
11880 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
11881 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
11882 enum vect_def_type
*dt
,
11883 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
11887 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
11889 *vectype
= SLP_TREE_VECTYPE (child
);
11890 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
11892 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
11893 return vect_is_simple_use (*op
, vinfo
, dt
, def_stmt_info_out
);
11897 if (def_stmt_info_out
)
11898 *def_stmt_info_out
= NULL
;
11899 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
11900 *dt
= SLP_TREE_DEF_TYPE (child
);
11907 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
11909 if (gimple_assign_rhs_code (ass
) == COND_EXPR
11910 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
11913 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
11915 *op
= gimple_op (ass
, operand
);
11917 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
11918 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
11920 *op
= gimple_op (ass
, operand
+ 1);
11922 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
11923 *op
= gimple_call_arg (call
, operand
);
11925 gcc_unreachable ();
11926 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
11930 /* If OP is not NULL and is external or constant update its vector
11931 type with VECTYPE. Returns true if successful or false if not,
11932 for example when conflicting vector types are present. */
11935 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
11937 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
11939 if (SLP_TREE_VECTYPE (op
))
11940 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
11941 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
11942 should be handled by patters. Allow vect_constant_def for now. */
11943 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
11944 && SLP_TREE_DEF_TYPE (op
) == vect_external_def
)
11946 SLP_TREE_VECTYPE (op
) = vectype
;
11950 /* Function supportable_widening_operation
11952 Check whether an operation represented by the code CODE is a
11953 widening operation that is supported by the target platform in
11954 vector form (i.e., when operating on arguments of type VECTYPE_IN
11955 producing a result of type VECTYPE_OUT).
11957 Widening operations we currently support are NOP (CONVERT), FLOAT,
11958 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11959 are supported by the target platform either directly (via vector
11960 tree-codes), or via target builtins.
11963 - CODE1 and CODE2 are codes of vector operations to be used when
11964 vectorizing the operation, if available.
11965 - MULTI_STEP_CVT determines the number of required intermediate steps in
11966 case of multi-step conversion (like char->short->int - in that case
11967 MULTI_STEP_CVT will be 1).
11968 - INTERM_TYPES contains the intermediate type required to perform the
11969 widening operation (short in the above example). */
11972 supportable_widening_operation (vec_info
*vinfo
,
11973 enum tree_code code
, stmt_vec_info stmt_info
,
11974 tree vectype_out
, tree vectype_in
,
11975 enum tree_code
*code1
, enum tree_code
*code2
,
11976 int *multi_step_cvt
,
11977 vec
<tree
> *interm_types
)
11979 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
11980 class loop
*vect_loop
= NULL
;
11981 machine_mode vec_mode
;
11982 enum insn_code icode1
, icode2
;
11983 optab optab1
, optab2
;
11984 tree vectype
= vectype_in
;
11985 tree wide_vectype
= vectype_out
;
11986 enum tree_code c1
, c2
;
11988 tree prev_type
, intermediate_type
;
11989 machine_mode intermediate_mode
, prev_mode
;
11990 optab optab3
, optab4
;
11992 *multi_step_cvt
= 0;
11994 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11998 case WIDEN_MULT_EXPR
:
11999 /* The result of a vectorized widening operation usually requires
12000 two vectors (because the widened results do not fit into one vector).
12001 The generated vector results would normally be expected to be
12002 generated in the same order as in the original scalar computation,
12003 i.e. if 8 results are generated in each vector iteration, they are
12004 to be organized as follows:
12005 vect1: [res1,res2,res3,res4],
12006 vect2: [res5,res6,res7,res8].
12008 However, in the special case that the result of the widening
12009 operation is used in a reduction computation only, the order doesn't
12010 matter (because when vectorizing a reduction we change the order of
12011 the computation). Some targets can take advantage of this and
12012 generate more efficient code. For example, targets like Altivec,
12013 that support widen_mult using a sequence of {mult_even,mult_odd}
12014 generate the following vectors:
12015 vect1: [res1,res3,res5,res7],
12016 vect2: [res2,res4,res6,res8].
12018 When vectorizing outer-loops, we execute the inner-loop sequentially
12019 (each vectorized inner-loop iteration contributes to VF outer-loop
12020 iterations in parallel). We therefore don't allow to change the
12021 order of the computation in the inner-loop during outer-loop
12023 /* TODO: Another case in which order doesn't *really* matter is when we
12024 widen and then contract again, e.g. (short)((int)x * y >> 8).
12025 Normally, pack_trunc performs an even/odd permute, whereas the
12026 repack from an even/odd expansion would be an interleave, which
12027 would be significantly simpler for e.g. AVX2. */
12028 /* In any case, in order to avoid duplicating the code below, recurse
12029 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
12030 are properly set up for the caller. If we fail, we'll continue with
12031 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
12033 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
12034 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
12035 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
12036 stmt_info
, vectype_out
,
12037 vectype_in
, code1
, code2
,
12038 multi_step_cvt
, interm_types
))
12040 /* Elements in a vector with vect_used_by_reduction property cannot
12041 be reordered if the use chain with this property does not have the
12042 same operation. One such an example is s += a * b, where elements
12043 in a and b cannot be reordered. Here we check if the vector defined
12044 by STMT is only directly used in the reduction statement. */
12045 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
12046 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
12048 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
12051 c1
= VEC_WIDEN_MULT_LO_EXPR
;
12052 c2
= VEC_WIDEN_MULT_HI_EXPR
;
12055 case DOT_PROD_EXPR
:
12056 c1
= DOT_PROD_EXPR
;
12057 c2
= DOT_PROD_EXPR
;
12065 case VEC_WIDEN_MULT_EVEN_EXPR
:
12066 /* Support the recursion induced just above. */
12067 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
12068 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
12071 case WIDEN_LSHIFT_EXPR
:
12072 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
12073 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
12076 case WIDEN_PLUS_EXPR
:
12077 c1
= VEC_WIDEN_PLUS_LO_EXPR
;
12078 c2
= VEC_WIDEN_PLUS_HI_EXPR
;
12081 case WIDEN_MINUS_EXPR
:
12082 c1
= VEC_WIDEN_MINUS_LO_EXPR
;
12083 c2
= VEC_WIDEN_MINUS_HI_EXPR
;
12087 c1
= VEC_UNPACK_LO_EXPR
;
12088 c2
= VEC_UNPACK_HI_EXPR
;
12092 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
12093 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
12096 case FIX_TRUNC_EXPR
:
12097 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
12098 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
12102 gcc_unreachable ();
12105 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
12106 std::swap (c1
, c2
);
12108 if (code
== FIX_TRUNC_EXPR
)
12110 /* The signedness is determined from output operand. */
12111 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12112 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
12114 else if (CONVERT_EXPR_CODE_P (code
)
12115 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
12116 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12117 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
12118 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
12120 /* If the input and result modes are the same, a different optab
12121 is needed where we pass in the number of units in vectype. */
12122 optab1
= vec_unpacks_sbool_lo_optab
;
12123 optab2
= vec_unpacks_sbool_hi_optab
;
12127 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12128 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
12131 if (!optab1
|| !optab2
)
12134 vec_mode
= TYPE_MODE (vectype
);
12135 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
12136 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
12142 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12143 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12145 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12147 /* For scalar masks we may have different boolean
12148 vector types having the same QImode. Thus we
12149 add additional check for elements number. */
12150 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
12151 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12155 /* Check if it's a multi-step conversion that can be done using intermediate
12158 prev_type
= vectype
;
12159 prev_mode
= vec_mode
;
12161 if (!CONVERT_EXPR_CODE_P (code
))
12164 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12165 intermediate steps in promotion sequence. We try
12166 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12168 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12169 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12171 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12172 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12174 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
12177 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
12178 TYPE_UNSIGNED (prev_type
));
12180 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12181 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12182 && intermediate_mode
== prev_mode
12183 && SCALAR_INT_MODE_P (prev_mode
))
12185 /* If the input and result modes are the same, a different optab
12186 is needed where we pass in the number of units in vectype. */
12187 optab3
= vec_unpacks_sbool_lo_optab
;
12188 optab4
= vec_unpacks_sbool_hi_optab
;
12192 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12193 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
12196 if (!optab3
|| !optab4
12197 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
12198 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12199 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
12200 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
12201 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
12202 == CODE_FOR_nothing
)
12203 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
12204 == CODE_FOR_nothing
))
12207 interm_types
->quick_push (intermediate_type
);
12208 (*multi_step_cvt
)++;
12210 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12211 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12213 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12215 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
12216 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12220 prev_type
= intermediate_type
;
12221 prev_mode
= intermediate_mode
;
12224 interm_types
->release ();
12229 /* Function supportable_narrowing_operation
12231 Check whether an operation represented by the code CODE is a
12232 narrowing operation that is supported by the target platform in
12233 vector form (i.e., when operating on arguments of type VECTYPE_IN
12234 and producing a result of type VECTYPE_OUT).
12236 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12237 and FLOAT. This function checks if these operations are supported by
12238 the target platform directly via vector tree-codes.
12241 - CODE1 is the code of a vector operation to be used when
12242 vectorizing the operation, if available.
12243 - MULTI_STEP_CVT determines the number of required intermediate steps in
12244 case of multi-step conversion (like int->short->char - in that case
12245 MULTI_STEP_CVT will be 1).
12246 - INTERM_TYPES contains the intermediate type required to perform the
12247 narrowing operation (short in the above example). */
12250 supportable_narrowing_operation (enum tree_code code
,
12251 tree vectype_out
, tree vectype_in
,
12252 enum tree_code
*code1
, int *multi_step_cvt
,
12253 vec
<tree
> *interm_types
)
12255 machine_mode vec_mode
;
12256 enum insn_code icode1
;
12257 optab optab1
, interm_optab
;
12258 tree vectype
= vectype_in
;
12259 tree narrow_vectype
= vectype_out
;
12261 tree intermediate_type
, prev_type
;
12262 machine_mode intermediate_mode
, prev_mode
;
12264 unsigned HOST_WIDE_INT n_elts
;
12267 *multi_step_cvt
= 0;
12271 c1
= VEC_PACK_TRUNC_EXPR
;
12272 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
12273 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12274 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
))
12275 && TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&n_elts
)
12276 && n_elts
< BITS_PER_UNIT
)
12277 optab1
= vec_pack_sbool_trunc_optab
;
12279 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12282 case FIX_TRUNC_EXPR
:
12283 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
12284 /* The signedness is determined from output operand. */
12285 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12289 c1
= VEC_PACK_FLOAT_EXPR
;
12290 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12294 gcc_unreachable ();
12300 vec_mode
= TYPE_MODE (vectype
);
12301 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
12306 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12308 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12310 /* For scalar masks we may have different boolean
12311 vector types having the same QImode. Thus we
12312 add additional check for elements number. */
12313 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
12314 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12318 if (code
== FLOAT_EXPR
)
12321 /* Check if it's a multi-step conversion that can be done using intermediate
12323 prev_mode
= vec_mode
;
12324 prev_type
= vectype
;
12325 if (code
== FIX_TRUNC_EXPR
)
12326 uns
= TYPE_UNSIGNED (vectype_out
);
12328 uns
= TYPE_UNSIGNED (vectype
);
12330 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12331 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12332 costly than signed. */
12333 if (code
== FIX_TRUNC_EXPR
&& uns
)
12335 enum insn_code icode2
;
12338 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
12340 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12341 if (interm_optab
!= unknown_optab
12342 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
12343 && insn_data
[icode1
].operand
[0].mode
12344 == insn_data
[icode2
].operand
[0].mode
)
12347 optab1
= interm_optab
;
12352 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12353 intermediate steps in promotion sequence. We try
12354 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12355 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12356 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12358 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12359 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12361 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
12364 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
12365 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12366 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12367 && SCALAR_INT_MODE_P (prev_mode
)
12368 && TYPE_VECTOR_SUBPARTS (intermediate_type
).is_constant (&n_elts
)
12369 && n_elts
< BITS_PER_UNIT
)
12370 interm_optab
= vec_pack_sbool_trunc_optab
;
12373 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
12376 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
12377 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12378 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
12379 == CODE_FOR_nothing
))
12382 interm_types
->quick_push (intermediate_type
);
12383 (*multi_step_cvt
)++;
12385 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12387 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12389 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
12390 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12394 prev_mode
= intermediate_mode
;
12395 prev_type
= intermediate_type
;
12396 optab1
= interm_optab
;
12399 interm_types
->release ();
12403 /* Generate and return a vector mask of MASK_TYPE such that
12404 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12405 Add the statements to SEQ. */
12408 vect_gen_while (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12409 tree end_index
, const char *name
)
12411 tree cmp_type
= TREE_TYPE (start_index
);
12412 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
12413 cmp_type
, mask_type
,
12414 OPTIMIZE_FOR_SPEED
));
12415 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
12416 start_index
, end_index
,
12417 build_zero_cst (mask_type
));
12420 tmp
= make_temp_ssa_name (mask_type
, NULL
, name
);
12422 tmp
= make_ssa_name (mask_type
);
12423 gimple_call_set_lhs (call
, tmp
);
12424 gimple_seq_add_stmt (seq
, call
);
12428 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12429 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12432 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12435 tree tmp
= vect_gen_while (seq
, mask_type
, start_index
, end_index
);
12436 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
12439 /* Try to compute the vector types required to vectorize STMT_INFO,
12440 returning true on success and false if vectorization isn't possible.
12441 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12442 take sure that the number of elements in the vectors is no bigger
12447 - Set *STMT_VECTYPE_OUT to:
12448 - NULL_TREE if the statement doesn't need to be vectorized;
12449 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12451 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12452 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12453 statement does not help to determine the overall number of units. */
12456 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12457 tree
*stmt_vectype_out
,
12458 tree
*nunits_vectype_out
,
12459 unsigned int group_size
)
12461 gimple
*stmt
= stmt_info
->stmt
;
12463 /* For BB vectorization, we should always have a group size once we've
12464 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12465 are tentative requests during things like early data reference
12466 analysis and pattern recognition. */
12467 if (is_a
<bb_vec_info
> (vinfo
))
12468 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
12472 *stmt_vectype_out
= NULL_TREE
;
12473 *nunits_vectype_out
= NULL_TREE
;
12475 if (gimple_get_lhs (stmt
) == NULL_TREE
12476 /* MASK_STORE has no lhs, but is ok. */
12477 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12479 if (is_a
<gcall
*> (stmt
))
12481 /* Ignore calls with no lhs. These must be calls to
12482 #pragma omp simd functions, and what vectorization factor
12483 it really needs can't be determined until
12484 vectorizable_simd_clone_call. */
12485 if (dump_enabled_p ())
12486 dump_printf_loc (MSG_NOTE
, vect_location
,
12487 "defer to SIMD clone analysis.\n");
12488 return opt_result::success ();
12491 return opt_result::failure_at (stmt
,
12492 "not vectorized: irregular stmt.%G", stmt
);
12496 tree scalar_type
= NULL_TREE
;
12497 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
12499 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12500 if (dump_enabled_p ())
12501 dump_printf_loc (MSG_NOTE
, vect_location
,
12502 "precomputed vectype: %T\n", vectype
);
12504 else if (vect_use_mask_type_p (stmt_info
))
12506 unsigned int precision
= stmt_info
->mask_precision
;
12507 scalar_type
= build_nonstandard_integer_type (precision
, 1);
12508 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
12510 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
12511 " data-type %T\n", scalar_type
);
12512 if (dump_enabled_p ())
12513 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12517 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
12518 scalar_type
= TREE_TYPE (DR_REF (dr
));
12519 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12520 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
12522 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
12524 if (dump_enabled_p ())
12527 dump_printf_loc (MSG_NOTE
, vect_location
,
12528 "get vectype for scalar type (group size %d):"
12529 " %T\n", group_size
, scalar_type
);
12531 dump_printf_loc (MSG_NOTE
, vect_location
,
12532 "get vectype for scalar type: %T\n", scalar_type
);
12534 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
12536 return opt_result::failure_at (stmt
,
12538 " unsupported data-type %T\n",
12541 if (dump_enabled_p ())
12542 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12545 if (scalar_type
&& VECTOR_MODE_P (TYPE_MODE (scalar_type
)))
12546 return opt_result::failure_at (stmt
,
12547 "not vectorized: vector stmt in loop:%G",
12550 *stmt_vectype_out
= vectype
;
12552 /* Don't try to compute scalar types if the stmt produces a boolean
12553 vector; use the existing vector type instead. */
12554 tree nunits_vectype
= vectype
;
12555 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12557 /* The number of units is set according to the smallest scalar
12558 type (or the largest vector size, but we only support one
12559 vector size per vectorization). */
12560 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
12561 TREE_TYPE (vectype
));
12562 if (scalar_type
!= TREE_TYPE (vectype
))
12564 if (dump_enabled_p ())
12565 dump_printf_loc (MSG_NOTE
, vect_location
,
12566 "get vectype for smallest scalar type: %T\n",
12568 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12570 if (!nunits_vectype
)
12571 return opt_result::failure_at
12572 (stmt
, "not vectorized: unsupported data-type %T\n",
12574 if (dump_enabled_p ())
12575 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
12580 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
12581 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)))
12582 return opt_result::failure_at (stmt
,
12583 "Not vectorized: Incompatible number "
12584 "of vector subparts between %T and %T\n",
12585 nunits_vectype
, *stmt_vectype_out
);
12587 if (dump_enabled_p ())
12589 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12590 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12591 dump_printf (MSG_NOTE
, "\n");
12594 *nunits_vectype_out
= nunits_vectype
;
12595 return opt_result::success ();
12598 /* Generate and return statement sequence that sets vector length LEN that is:
12600 min_of_start_and_end = min (START_INDEX, END_INDEX);
12601 left_len = END_INDEX - min_of_start_and_end;
12602 rhs = min (left_len, LEN_LIMIT);
12605 Note: the cost of the code generated by this function is modeled
12606 by vect_estimate_min_profitable_iters, so changes here may need
12607 corresponding changes there. */
12610 vect_gen_len (tree len
, tree start_index
, tree end_index
, tree len_limit
)
12612 gimple_seq stmts
= NULL
;
12613 tree len_type
= TREE_TYPE (len
);
12614 gcc_assert (TREE_TYPE (start_index
) == len_type
);
12616 tree min
= gimple_build (&stmts
, MIN_EXPR
, len_type
, start_index
, end_index
);
12617 tree left_len
= gimple_build (&stmts
, MINUS_EXPR
, len_type
, end_index
, min
);
12618 tree rhs
= gimple_build (&stmts
, MIN_EXPR
, len_type
, left_len
, len_limit
);
12619 gimple
* stmt
= gimple_build_assign (len
, rhs
);
12620 gimple_seq_add_stmt (&stmts
, stmt
);