1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
95 tree vectype
, int misalign
,
96 enum vect_cost_model_location where
)
98 if ((kind
== vector_load
|| kind
== unaligned_load
)
99 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
100 kind
= vector_gather_load
;
101 if ((kind
== vector_store
|| kind
== unaligned_store
)
102 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
103 kind
= vector_scatter_store
;
105 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, vectype
, misalign
};
106 body_cost_vec
->safe_push (si
);
109 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
117 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
127 read_vector_array (vec_info
*vinfo
,
128 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
129 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
131 tree vect_type
, vect
, vect_name
, array_ref
;
134 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
135 vect_type
= TREE_TYPE (TREE_TYPE (array
));
136 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
137 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
138 build_int_cst (size_type_node
, n
),
139 NULL_TREE
, NULL_TREE
);
141 new_stmt
= gimple_build_assign (vect
, array_ref
);
142 vect_name
= make_ssa_name (vect
, new_stmt
);
143 gimple_assign_set_lhs (new_stmt
, vect_name
);
144 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
154 write_vector_array (vec_info
*vinfo
,
155 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
156 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
161 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
162 build_int_cst (size_type_node
, n
),
163 NULL_TREE
, NULL_TREE
);
165 new_stmt
= gimple_build_assign (array_ref
, vect
);
166 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
174 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
178 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
188 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
189 gimple_stmt_iterator
*gsi
, tree var
)
191 tree clobber
= build_clobber (TREE_TYPE (var
));
192 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
193 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
203 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
204 enum vect_relevant relevant
, bool live_p
)
206 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
207 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE
, vect_location
,
211 "mark relevant %d, live %d: %G", relevant
, live_p
,
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE
, vect_location
,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info
= stmt_info
;
230 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
232 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
233 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
236 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
237 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
238 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
240 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE
, vect_location
,
245 "already marked relevant/live.\n");
249 worklist
->safe_push (stmt_info
);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
259 loop_vec_info loop_vinfo
)
264 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
268 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
270 enum vect_def_type dt
= vect_uninitialized_def
;
272 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
276 "use not simple.\n");
280 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
299 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
300 enum vect_relevant
*relevant
, bool *live_p
)
302 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
304 imm_use_iterator imm_iter
;
308 *relevant
= vect_unused_in_scope
;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info
->stmt
)
313 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
314 *relevant
= vect_used_in_scope
;
316 /* changing memory. */
317 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
318 if (gimple_vdef (stmt_info
->stmt
)
319 && !gimple_clobber_p (stmt_info
->stmt
))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE
, vect_location
,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant
= vect_used_in_scope
;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
330 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
332 basic_block bb
= gimple_bb (USE_STMT (use_p
));
333 if (!flow_bb_inside_loop_p (loop
, bb
))
335 if (is_gimple_debug (USE_STMT (use_p
)))
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE
, vect_location
,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
345 gcc_assert (bb
== single_exit (loop
)->dest
);
352 if (*live_p
&& *relevant
== vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE
, vect_location
,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant
= vect_used_only_live
;
361 return (*live_p
|| *relevant
);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
371 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info
))
381 /* STMT has a data_ref. FORNOW this means that its of one of
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
395 if (!assign
|| !gimple_assign_copy_p (assign
))
397 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
398 if (call
&& gimple_call_internal_p (call
))
400 internal_fn ifn
= gimple_call_internal_fn (call
);
401 int mask_index
= internal_fn_mask_index (ifn
);
403 && use
== gimple_call_arg (call
, mask_index
))
405 int stored_value_index
= internal_fn_stored_value_index (ifn
);
406 if (stored_value_index
>= 0
407 && use
== gimple_call_arg (call
, stored_value_index
))
409 if (internal_gather_scatter_fn_p (ifn
)
410 && use
== gimple_call_arg (call
, 1))
416 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
418 operand
= gimple_assign_rhs1 (assign
);
419 if (TREE_CODE (operand
) != SSA_NAME
)
430 Function process_use.
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
457 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
458 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
461 stmt_vec_info dstmt_vinfo
;
462 enum vect_def_type dt
;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
470 return opt_result::failure_at (stmt_vinfo
->stmt
,
472 " unsupported use in stmt.\n");
475 return opt_result::success ();
477 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
478 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
485 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
487 && bb
->loop_father
== def_bb
->loop_father
)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
503 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE
, vect_location
,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
511 case vect_unused_in_scope
:
512 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
513 vect_used_in_scope
: vect_unused_in_scope
;
516 case vect_used_in_outer_by_reduction
:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
518 relevant
= vect_used_by_reduction
;
521 case vect_used_in_outer
:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
523 relevant
= vect_used_in_scope
;
526 case vect_used_in_scope
:
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
541 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE
, vect_location
,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
549 case vect_unused_in_scope
:
550 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
552 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
555 case vect_used_by_reduction
:
556 case vect_used_only_live
:
557 relevant
= vect_used_in_outer_by_reduction
;
560 case vect_used_in_scope
:
561 relevant
= vect_used_in_outer
;
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
572 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
576 loop_latch_edge (bb
->loop_father
))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE
, vect_location
,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
610 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
611 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
612 unsigned int nbbs
= loop
->num_nodes
;
613 gimple_stmt_iterator si
;
617 enum vect_relevant relevant
;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec
<stmt_vec_info
, 64> worklist
;
623 /* 1. Init worklist. */
624 for (i
= 0; i
< nbbs
; i
++)
627 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
629 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
634 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
635 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
637 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
639 if (is_gimple_debug (gsi_stmt (si
)))
641 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE
, vect_location
,
644 "init: stmt relevant? %G", stmt_info
->stmt
);
646 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
647 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
651 /* 2. Process_worklist */
652 while (worklist
.length () > 0)
657 stmt_vec_info stmt_vinfo
= worklist
.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE
, vect_location
,
660 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
665 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
680 case vect_reduction_def
:
681 gcc_assert (relevant
!= vect_unused_in_scope
);
682 if (relevant
!= vect_unused_in_scope
683 && relevant
!= vect_used_in_scope
684 && relevant
!= vect_used_by_reduction
685 && relevant
!= vect_used_only_live
)
686 return opt_result::failure_at
687 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
690 case vect_nested_cycle
:
691 if (relevant
!= vect_unused_in_scope
692 && relevant
!= vect_used_in_outer_by_reduction
693 && relevant
!= vect_used_in_outer
)
694 return opt_result::failure_at
695 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
698 case vect_double_reduction_def
:
699 if (relevant
!= vect_unused_in_scope
700 && relevant
!= vect_used_by_reduction
701 && relevant
!= vect_used_only_live
)
702 return opt_result::failure_at
703 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
710 if (is_pattern_stmt_p (stmt_vinfo
))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
717 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
718 tree op
= gimple_assign_rhs1 (assign
);
721 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
724 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
725 loop_vinfo
, relevant
, &worklist
, false);
728 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
729 loop_vinfo
, relevant
, &worklist
, false);
734 for (; i
< gimple_num_ops (assign
); i
++)
736 op
= gimple_op (assign
, i
);
737 if (TREE_CODE (op
) == SSA_NAME
)
740 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
747 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
749 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
751 tree arg
= gimple_call_arg (call
, i
);
753 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
761 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
763 tree op
= USE_FROM_PTR (use_p
);
765 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
773 gather_scatter_info gs_info
;
774 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
777 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
786 } /* while worklist */
788 return opt_result::success ();
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
798 vect_model_simple_cost (vec_info
*,
799 stmt_vec_info stmt_info
, int ncopies
,
800 enum vect_def_type
*dt
,
803 stmt_vector_for_cost
*cost_vec
,
804 vect_cost_for_stmt kind
= vector_stmt
)
806 int inside_cost
= 0, prologue_cost
= 0;
808 gcc_assert (cost_vec
!= NULL
);
810 /* ??? Somehow we need to fix this at the callers. */
812 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
818 for (int i
= 0; i
< ndts
; i
++)
819 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
820 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
821 stmt_info
, 0, vect_prologue
);
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
825 stmt_info
, 0, vect_body
);
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE
, vect_location
,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. If WIDEN_ARITH
840 is true the stmt is doing widening arithmetic. */
843 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
844 enum vect_def_type
*dt
,
845 unsigned int ncopies
, int pwr
,
846 stmt_vector_for_cost
*cost_vec
,
850 int inside_cost
= 0, prologue_cost
= 0;
852 for (i
= 0; i
< pwr
+ 1; i
++)
854 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
,
856 ? vector_stmt
: vec_promote_demote
,
857 stmt_info
, 0, vect_body
);
861 /* FORNOW: Assuming maximum 2 args per stmts. */
862 for (i
= 0; i
< 2; i
++)
863 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
864 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
865 stmt_info
, 0, vect_prologue
);
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE
, vect_location
,
869 "vect_model_promotion_demotion_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
873 /* Returns true if the current function returns DECL. */
876 cfun_returns (tree decl
)
880 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
882 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
885 if (gimple_return_retval (ret
) == decl
)
887 /* We often end up with an aggregate copy to the result decl,
888 handle that case as well. First skip intermediate clobbers
893 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
895 while (gimple_clobber_p (def
));
896 if (is_a
<gassign
*> (def
)
897 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
898 && gimple_assign_rhs1 (def
) == decl
)
904 /* Function vect_model_store_cost
906 Models cost for stores. In the case of grouped accesses, one access
907 has the overhead of the grouped access attributed to it. */
910 vect_model_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
911 vect_memory_access_type memory_access_type
,
912 vec_load_store_type vls_type
, slp_tree slp_node
,
913 stmt_vector_for_cost
*cost_vec
)
915 unsigned int inside_cost
= 0, prologue_cost
= 0;
916 stmt_vec_info first_stmt_info
= stmt_info
;
917 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
919 /* ??? Somehow we need to fix this at the callers. */
921 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
923 if (vls_type
== VLS_STORE_INVARIANT
)
926 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
927 stmt_info
, 0, vect_prologue
);
930 /* Grouped stores update all elements in the group at once,
931 so we want the DR for the first statement. */
932 if (!slp_node
&& grouped_access_p
)
933 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
935 /* True if we should include any once-per-group costs as well as
936 the cost of the statement itself. For SLP we only get called
937 once per group anyhow. */
938 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
940 /* We assume that the cost of a single store-lanes instruction is
941 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
942 access is instead being provided by a permute-and-store operation,
943 include the cost of the permutes. */
945 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
947 /* Uses a high and low interleave or shuffle operations for each
949 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
950 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
951 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
952 stmt_info
, 0, vect_body
);
954 if (dump_enabled_p ())
955 dump_printf_loc (MSG_NOTE
, vect_location
,
956 "vect_model_store_cost: strided group_size = %d .\n",
960 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
961 /* Costs of the stores. */
962 if (memory_access_type
== VMAT_ELEMENTWISE
963 || memory_access_type
== VMAT_GATHER_SCATTER
)
965 /* N scalar stores plus extracting the elements. */
966 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
967 inside_cost
+= record_stmt_cost (cost_vec
,
968 ncopies
* assumed_nunits
,
969 scalar_store
, stmt_info
, 0, vect_body
);
972 vect_get_store_cost (vinfo
, stmt_info
, ncopies
, &inside_cost
, cost_vec
);
974 if (memory_access_type
== VMAT_ELEMENTWISE
975 || memory_access_type
== VMAT_STRIDED_SLP
)
977 /* N scalar stores plus extracting the elements. */
978 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
979 inside_cost
+= record_stmt_cost (cost_vec
,
980 ncopies
* assumed_nunits
,
981 vec_to_scalar
, stmt_info
, 0, vect_body
);
984 /* When vectorizing a store into the function result assign
985 a penalty if the function returns in a multi-register location.
986 In this case we assume we'll end up with having to spill the
987 vector result and do piecewise loads as a conservative estimate. */
988 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
990 && (TREE_CODE (base
) == RESULT_DECL
991 || (DECL_P (base
) && cfun_returns (base
)))
992 && !aggregate_value_p (base
, cfun
->decl
))
994 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
995 /* ??? Handle PARALLEL in some way. */
998 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
999 /* Assume that a single reg-reg move is possible and cheap,
1000 do not account for vector to gp register move cost. */
1004 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1006 stmt_info
, 0, vect_epilogue
);
1008 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1010 stmt_info
, 0, vect_epilogue
);
1015 if (dump_enabled_p ())
1016 dump_printf_loc (MSG_NOTE
, vect_location
,
1017 "vect_model_store_cost: inside_cost = %d, "
1018 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1022 /* Calculate cost of DR's memory access. */
1024 vect_get_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
1025 unsigned int *inside_cost
,
1026 stmt_vector_for_cost
*body_cost_vec
)
1028 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1029 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1030 int alignment_support_scheme
1031 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
, false);
1033 switch (alignment_support_scheme
)
1037 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1038 vector_store
, stmt_info
, 0,
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_NOTE
, vect_location
,
1043 "vect_model_store_cost: aligned.\n");
1047 case dr_unaligned_supported
:
1049 /* Here, we assign an additional cost for the unaligned store. */
1050 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1051 unaligned_store
, stmt_info
,
1052 dr_misalignment (dr_info
, vectype
),
1054 if (dump_enabled_p ())
1055 dump_printf_loc (MSG_NOTE
, vect_location
,
1056 "vect_model_store_cost: unaligned supported by "
1061 case dr_unaligned_unsupported
:
1063 *inside_cost
= VECT_MAX_COST
;
1065 if (dump_enabled_p ())
1066 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1067 "vect_model_store_cost: unsupported access.\n");
1077 /* Function vect_model_load_cost
1079 Models cost for loads. In the case of grouped accesses, one access has
1080 the overhead of the grouped access attributed to it. Since unaligned
1081 accesses are supported for loads, we also account for the costs of the
1082 access scheme chosen. */
1085 vect_model_load_cost (vec_info
*vinfo
,
1086 stmt_vec_info stmt_info
, unsigned ncopies
, poly_uint64 vf
,
1087 vect_memory_access_type memory_access_type
,
1088 gather_scatter_info
*gs_info
,
1090 stmt_vector_for_cost
*cost_vec
)
1092 unsigned int inside_cost
= 0, prologue_cost
= 0;
1093 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1095 gcc_assert (cost_vec
);
1097 /* ??? Somehow we need to fix this at the callers. */
1099 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1101 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1103 /* If the load is permuted then the alignment is determined by
1104 the first group element not by the first scalar stmt DR. */
1105 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1106 /* Record the cost for the permutation. */
1107 unsigned n_perms
, n_loads
;
1108 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
,
1109 vf
, true, &n_perms
, &n_loads
);
1110 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1111 first_stmt_info
, 0, vect_body
);
1113 /* And adjust the number of loads performed. This handles
1114 redundancies as well as loads that are later dead. */
1118 /* Grouped loads read all elements in the group at once,
1119 so we want the DR for the first statement. */
1120 stmt_vec_info first_stmt_info
= stmt_info
;
1121 if (!slp_node
&& grouped_access_p
)
1122 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1124 /* True if we should include any once-per-group costs as well as
1125 the cost of the statement itself. For SLP we only get called
1126 once per group anyhow. */
1127 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1129 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1130 ones we actually need. Account for the cost of unused results. */
1131 if (first_stmt_p
&& !slp_node
&& memory_access_type
== VMAT_LOAD_STORE_LANES
)
1133 unsigned int gaps
= DR_GROUP_SIZE (first_stmt_info
);
1134 stmt_vec_info next_stmt_info
= first_stmt_info
;
1138 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
1140 while (next_stmt_info
);
1143 if (dump_enabled_p ())
1144 dump_printf_loc (MSG_NOTE
, vect_location
,
1145 "vect_model_load_cost: %d unused vectors.\n",
1147 vect_get_load_cost (vinfo
, stmt_info
, ncopies
* gaps
, false,
1148 &inside_cost
, &prologue_cost
,
1149 cost_vec
, cost_vec
, true);
1153 /* We assume that the cost of a single load-lanes instruction is
1154 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1155 access is instead being provided by a load-and-permute operation,
1156 include the cost of the permutes. */
1158 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1160 /* Uses an even and odd extract operations or shuffle operations
1161 for each needed permute. */
1162 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1163 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1164 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1165 stmt_info
, 0, vect_body
);
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE
, vect_location
,
1169 "vect_model_load_cost: strided group_size = %d .\n",
1173 /* The loads themselves. */
1174 if (memory_access_type
== VMAT_ELEMENTWISE
1175 || memory_access_type
== VMAT_GATHER_SCATTER
)
1177 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1178 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1179 if (memory_access_type
== VMAT_GATHER_SCATTER
1180 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
1181 /* For emulated gathers N offset vector element extracts
1182 (we assume the scalar scaling and ptr + offset add is consumed by
1184 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
* assumed_nunits
,
1185 vec_to_scalar
, stmt_info
, 0,
1187 /* N scalar loads plus gathering them into a vector. */
1188 inside_cost
+= record_stmt_cost (cost_vec
,
1189 ncopies
* assumed_nunits
,
1190 scalar_load
, stmt_info
, 0, vect_body
);
1193 vect_get_load_cost (vinfo
, stmt_info
, ncopies
, first_stmt_p
,
1194 &inside_cost
, &prologue_cost
,
1195 cost_vec
, cost_vec
, true);
1196 if (memory_access_type
== VMAT_ELEMENTWISE
1197 || memory_access_type
== VMAT_STRIDED_SLP
1198 || (memory_access_type
== VMAT_GATHER_SCATTER
1199 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
))
1200 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1201 stmt_info
, 0, vect_body
);
1203 if (dump_enabled_p ())
1204 dump_printf_loc (MSG_NOTE
, vect_location
,
1205 "vect_model_load_cost: inside_cost = %d, "
1206 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1210 /* Calculate cost of DR's memory access. */
1212 vect_get_load_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
1213 bool add_realign_cost
, unsigned int *inside_cost
,
1214 unsigned int *prologue_cost
,
1215 stmt_vector_for_cost
*prologue_cost_vec
,
1216 stmt_vector_for_cost
*body_cost_vec
,
1217 bool record_prologue_costs
)
1219 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1220 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1221 int alignment_support_scheme
1222 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
, false);
1224 switch (alignment_support_scheme
)
1228 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1229 stmt_info
, 0, vect_body
);
1231 if (dump_enabled_p ())
1232 dump_printf_loc (MSG_NOTE
, vect_location
,
1233 "vect_model_load_cost: aligned.\n");
1237 case dr_unaligned_supported
:
1239 /* Here, we assign an additional cost for the unaligned load. */
1240 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1241 unaligned_load
, stmt_info
,
1242 dr_misalignment (dr_info
, vectype
),
1245 if (dump_enabled_p ())
1246 dump_printf_loc (MSG_NOTE
, vect_location
,
1247 "vect_model_load_cost: unaligned supported by "
1252 case dr_explicit_realign
:
1254 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1255 vector_load
, stmt_info
, 0, vect_body
);
1256 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1257 vec_perm
, stmt_info
, 0, vect_body
);
1259 /* FIXME: If the misalignment remains fixed across the iterations of
1260 the containing loop, the following cost should be added to the
1262 if (targetm
.vectorize
.builtin_mask_for_load
)
1263 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1264 stmt_info
, 0, vect_body
);
1266 if (dump_enabled_p ())
1267 dump_printf_loc (MSG_NOTE
, vect_location
,
1268 "vect_model_load_cost: explicit realign\n");
1272 case dr_explicit_realign_optimized
:
1274 if (dump_enabled_p ())
1275 dump_printf_loc (MSG_NOTE
, vect_location
,
1276 "vect_model_load_cost: unaligned software "
1279 /* Unaligned software pipeline has a load of an address, an initial
1280 load, and possibly a mask operation to "prime" the loop. However,
1281 if this is an access in a group of loads, which provide grouped
1282 access, then the above cost should only be considered for one
1283 access in the group. Inside the loop, there is a load op
1284 and a realignment op. */
1286 if (add_realign_cost
&& record_prologue_costs
)
1288 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1289 vector_stmt
, stmt_info
,
1291 if (targetm
.vectorize
.builtin_mask_for_load
)
1292 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1293 vector_stmt
, stmt_info
,
1297 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1298 stmt_info
, 0, vect_body
);
1299 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1300 stmt_info
, 0, vect_body
);
1302 if (dump_enabled_p ())
1303 dump_printf_loc (MSG_NOTE
, vect_location
,
1304 "vect_model_load_cost: explicit realign optimized"
1310 case dr_unaligned_unsupported
:
1312 *inside_cost
= VECT_MAX_COST
;
1314 if (dump_enabled_p ())
1315 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1316 "vect_model_load_cost: unsupported access.\n");
1325 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1326 the loop preheader for the vectorized stmt STMT_VINFO. */
1329 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1330 gimple_stmt_iterator
*gsi
)
1333 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1335 vinfo
->insert_on_entry (stmt_vinfo
, new_stmt
);
1337 if (dump_enabled_p ())
1338 dump_printf_loc (MSG_NOTE
, vect_location
,
1339 "created new init_stmt: %G", new_stmt
);
1342 /* Function vect_init_vector.
1344 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1345 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1346 vector type a vector with all elements equal to VAL is created first.
1347 Place the initialization at GSI if it is not NULL. Otherwise, place the
1348 initialization at the loop preheader.
1349 Return the DEF of INIT_STMT.
1350 It will be used in the vectorization of STMT_INFO. */
1353 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1354 gimple_stmt_iterator
*gsi
)
1359 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1360 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1362 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1363 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1365 /* Scalar boolean value should be transformed into
1366 all zeros or all ones value before building a vector. */
1367 if (VECTOR_BOOLEAN_TYPE_P (type
))
1369 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1370 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1372 if (CONSTANT_CLASS_P (val
))
1373 val
= integer_zerop (val
) ? false_val
: true_val
;
1376 new_temp
= make_ssa_name (TREE_TYPE (type
));
1377 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1378 val
, true_val
, false_val
);
1379 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1385 gimple_seq stmts
= NULL
;
1386 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1387 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1388 TREE_TYPE (type
), val
);
1390 /* ??? Condition vectorization expects us to do
1391 promotion of invariant/external defs. */
1392 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1393 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1394 !gsi_end_p (gsi2
); )
1396 init_stmt
= gsi_stmt (gsi2
);
1397 gsi_remove (&gsi2
, false);
1398 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1402 val
= build_vector_from_val (type
, val
);
1405 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1406 init_stmt
= gimple_build_assign (new_temp
, val
);
1407 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1412 /* Function vect_get_vec_defs_for_operand.
1414 OP is an operand in STMT_VINFO. This function returns a vector of
1415 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1417 In the case that OP is an SSA_NAME which is defined in the loop, then
1418 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1420 In case OP is an invariant or constant, a new stmt that creates a vector def
1421 needs to be introduced. VECTYPE may be used to specify a required type for
1422 vector invariant. */
1425 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1427 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1430 enum vect_def_type dt
;
1432 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1434 if (dump_enabled_p ())
1435 dump_printf_loc (MSG_NOTE
, vect_location
,
1436 "vect_get_vec_defs_for_operand: %T\n", op
);
1438 stmt_vec_info def_stmt_info
;
1439 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1440 &def_stmt_info
, &def_stmt
);
1441 gcc_assert (is_simple_use
);
1442 if (def_stmt
&& dump_enabled_p ())
1443 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1445 vec_oprnds
->create (ncopies
);
1446 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1448 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1452 vector_type
= vectype
;
1453 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1454 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1455 vector_type
= truth_type_for (stmt_vectype
);
1457 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1459 gcc_assert (vector_type
);
1460 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1462 vec_oprnds
->quick_push (vop
);
1466 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1467 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1468 for (unsigned i
= 0; i
< ncopies
; ++i
)
1469 vec_oprnds
->quick_push (gimple_get_lhs
1470 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1475 /* Get vectorized definitions for OP0 and OP1. */
1478 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1480 tree op0
, vec
<tree
> *vec_oprnds0
, tree vectype0
,
1481 tree op1
, vec
<tree
> *vec_oprnds1
, tree vectype1
,
1482 tree op2
, vec
<tree
> *vec_oprnds2
, tree vectype2
,
1483 tree op3
, vec
<tree
> *vec_oprnds3
, tree vectype3
)
1488 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1490 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1492 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1494 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1499 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1500 op0
, vec_oprnds0
, vectype0
);
1502 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1503 op1
, vec_oprnds1
, vectype1
);
1505 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1506 op2
, vec_oprnds2
, vectype2
);
1508 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1509 op3
, vec_oprnds3
, vectype3
);
1514 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1516 tree op0
, vec
<tree
> *vec_oprnds0
,
1517 tree op1
, vec
<tree
> *vec_oprnds1
,
1518 tree op2
, vec
<tree
> *vec_oprnds2
,
1519 tree op3
, vec
<tree
> *vec_oprnds3
)
1521 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1522 op0
, vec_oprnds0
, NULL_TREE
,
1523 op1
, vec_oprnds1
, NULL_TREE
,
1524 op2
, vec_oprnds2
, NULL_TREE
,
1525 op3
, vec_oprnds3
, NULL_TREE
);
1528 /* Helper function called by vect_finish_replace_stmt and
1529 vect_finish_stmt_generation. Set the location of the new
1530 statement and create and return a stmt_vec_info for it. */
1533 vect_finish_stmt_generation_1 (vec_info
*,
1534 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1536 if (dump_enabled_p ())
1537 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1541 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1543 /* While EH edges will generally prevent vectorization, stmt might
1544 e.g. be in a must-not-throw region. Ensure newly created stmts
1545 that could throw are part of the same region. */
1546 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1547 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1548 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1551 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1554 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1555 which sets the same scalar result as STMT_INFO did. Create and return a
1556 stmt_vec_info for VEC_STMT. */
1559 vect_finish_replace_stmt (vec_info
*vinfo
,
1560 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1562 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1563 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1565 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1566 gsi_replace (&gsi
, vec_stmt
, true);
1568 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1571 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1572 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1575 vect_finish_stmt_generation (vec_info
*vinfo
,
1576 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1577 gimple_stmt_iterator
*gsi
)
1579 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1581 if (!gsi_end_p (*gsi
)
1582 && gimple_has_mem_ops (vec_stmt
))
1584 gimple
*at_stmt
= gsi_stmt (*gsi
);
1585 tree vuse
= gimple_vuse (at_stmt
);
1586 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1588 tree vdef
= gimple_vdef (at_stmt
);
1589 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1590 gimple_set_modified (vec_stmt
, true);
1591 /* If we have an SSA vuse and insert a store, update virtual
1592 SSA form to avoid triggering the renamer. Do so only
1593 if we can easily see all uses - which is what almost always
1594 happens with the way vectorized stmts are inserted. */
1595 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1596 && ((is_gimple_assign (vec_stmt
)
1597 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1598 || (is_gimple_call (vec_stmt
)
1599 && !(gimple_call_flags (vec_stmt
)
1600 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1602 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1603 gimple_set_vdef (vec_stmt
, new_vdef
);
1604 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1608 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1609 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1612 /* We want to vectorize a call to combined function CFN with function
1613 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1614 as the types of all inputs. Check whether this is possible using
1615 an internal function, returning its code if so or IFN_LAST if not. */
1618 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1619 tree vectype_out
, tree vectype_in
)
1622 if (internal_fn_p (cfn
))
1623 ifn
= as_internal_fn (cfn
);
1625 ifn
= associated_internal_fn (fndecl
);
1626 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1628 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1629 if (info
.vectorizable
)
1631 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1632 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1633 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1634 OPTIMIZE_FOR_SPEED
))
1642 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1643 gimple_stmt_iterator
*);
1645 /* Check whether a load or store statement in the loop described by
1646 LOOP_VINFO is possible in a loop using partial vectors. This is
1647 testing whether the vectorizer pass has the appropriate support,
1648 as well as whether the target does.
1650 VLS_TYPE says whether the statement is a load or store and VECTYPE
1651 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1652 says how the load or store is going to be implemented and GROUP_SIZE
1653 is the number of load or store statements in the containing group.
1654 If the access is a gather load or scatter store, GS_INFO describes
1655 its arguments. If the load or store is conditional, SCALAR_MASK is the
1656 condition under which it occurs.
1658 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1659 vectors is not supported, otherwise record the required rgroup control
1663 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo
, tree vectype
,
1664 vec_load_store_type vls_type
,
1666 vect_memory_access_type
1668 gather_scatter_info
*gs_info
,
1671 /* Invariant loads need no special support. */
1672 if (memory_access_type
== VMAT_INVARIANT
)
1675 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1676 machine_mode vecmode
= TYPE_MODE (vectype
);
1677 bool is_load
= (vls_type
== VLS_LOAD
);
1678 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1681 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1682 : !vect_store_lanes_supported (vectype
, group_size
, true))
1684 if (dump_enabled_p ())
1685 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1686 "can't operate on partial vectors because"
1687 " the target doesn't have an appropriate"
1688 " load/store-lanes instruction.\n");
1689 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1692 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1693 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1697 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1699 internal_fn ifn
= (is_load
1700 ? IFN_MASK_GATHER_LOAD
1701 : IFN_MASK_SCATTER_STORE
);
1702 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1703 gs_info
->memory_type
,
1704 gs_info
->offset_vectype
,
1707 if (dump_enabled_p ())
1708 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1709 "can't operate on partial vectors because"
1710 " the target doesn't have an appropriate"
1711 " gather load or scatter store instruction.\n");
1712 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1715 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1716 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1720 if (memory_access_type
!= VMAT_CONTIGUOUS
1721 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1723 /* Element X of the data must come from iteration i * VF + X of the
1724 scalar loop. We need more work to support other mappings. */
1725 if (dump_enabled_p ())
1726 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1727 "can't operate on partial vectors because an"
1728 " access isn't contiguous.\n");
1729 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1733 if (!VECTOR_MODE_P (vecmode
))
1735 if (dump_enabled_p ())
1736 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1737 "can't operate on partial vectors when emulating"
1738 " vector operations.\n");
1739 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1743 /* We might load more scalars than we need for permuting SLP loads.
1744 We checked in get_group_load_store_type that the extra elements
1745 don't leak into a new vector. */
1746 auto get_valid_nvectors
= [] (poly_uint64 size
, poly_uint64 nunits
)
1748 unsigned int nvectors
;
1749 if (can_div_away_from_zero_p (size
, nunits
, &nvectors
))
1754 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1755 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1756 machine_mode mask_mode
;
1757 bool using_partial_vectors_p
= false;
1758 if (targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1759 && can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1761 unsigned int nvectors
= get_valid_nvectors (group_size
* vf
, nunits
);
1762 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1763 using_partial_vectors_p
= true;
1767 if (get_len_load_store_mode (vecmode
, is_load
).exists (&vmode
))
1769 unsigned int nvectors
= get_valid_nvectors (group_size
* vf
, nunits
);
1770 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
1771 unsigned factor
= (vecmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vecmode
);
1772 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, factor
);
1773 using_partial_vectors_p
= true;
1776 if (!using_partial_vectors_p
)
1778 if (dump_enabled_p ())
1779 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1780 "can't operate on partial vectors because the"
1781 " target doesn't have the appropriate partial"
1782 " vectorization load or store.\n");
1783 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1787 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1788 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1789 that needs to be applied to all loads and stores in a vectorized loop.
1790 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1792 MASK_TYPE is the type of both masks. If new statements are needed,
1793 insert them before GSI. */
1796 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1797 gimple_stmt_iterator
*gsi
)
1799 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1803 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1804 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1805 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1806 vec_mask
, loop_mask
);
1807 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1811 /* Determine whether we can use a gather load or scatter store to vectorize
1812 strided load or store STMT_INFO by truncating the current offset to a
1813 smaller width. We need to be able to construct an offset vector:
1815 { 0, X, X*2, X*3, ... }
1817 without loss of precision, where X is STMT_INFO's DR_STEP.
1819 Return true if this is possible, describing the gather load or scatter
1820 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1823 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1824 loop_vec_info loop_vinfo
, bool masked_p
,
1825 gather_scatter_info
*gs_info
)
1827 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1828 data_reference
*dr
= dr_info
->dr
;
1829 tree step
= DR_STEP (dr
);
1830 if (TREE_CODE (step
) != INTEGER_CST
)
1832 /* ??? Perhaps we could use range information here? */
1833 if (dump_enabled_p ())
1834 dump_printf_loc (MSG_NOTE
, vect_location
,
1835 "cannot truncate variable step.\n");
1839 /* Get the number of bits in an element. */
1840 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1841 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1842 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1844 /* Set COUNT to the upper limit on the number of elements - 1.
1845 Start with the maximum vectorization factor. */
1846 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1848 /* Try lowering COUNT to the number of scalar latch iterations. */
1849 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1850 widest_int max_iters
;
1851 if (max_loop_iterations (loop
, &max_iters
)
1852 && max_iters
< count
)
1853 count
= max_iters
.to_shwi ();
1855 /* Try scales of 1 and the element size. */
1856 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1857 wi::overflow_type overflow
= wi::OVF_NONE
;
1858 for (int i
= 0; i
< 2; ++i
)
1860 int scale
= scales
[i
];
1862 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1865 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1866 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1869 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1870 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1872 /* Find the narrowest viable offset type. */
1873 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1874 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1877 /* See whether the target supports the operation with an offset
1878 no narrower than OFFSET_TYPE. */
1879 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1880 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1881 vectype
, memory_type
, offset_type
, scale
,
1882 &gs_info
->ifn
, &gs_info
->offset_vectype
)
1883 || gs_info
->ifn
== IFN_LAST
)
1886 gs_info
->decl
= NULL_TREE
;
1887 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1888 but we don't need to store that here. */
1889 gs_info
->base
= NULL_TREE
;
1890 gs_info
->element_type
= TREE_TYPE (vectype
);
1891 gs_info
->offset
= fold_convert (offset_type
, step
);
1892 gs_info
->offset_dt
= vect_constant_def
;
1893 gs_info
->scale
= scale
;
1894 gs_info
->memory_type
= memory_type
;
1898 if (overflow
&& dump_enabled_p ())
1899 dump_printf_loc (MSG_NOTE
, vect_location
,
1900 "truncating gather/scatter offset to %d bits"
1901 " might change its value.\n", element_bits
);
1906 /* Return true if we can use gather/scatter internal functions to
1907 vectorize STMT_INFO, which is a grouped or strided load or store.
1908 MASKED_P is true if load or store is conditional. When returning
1909 true, fill in GS_INFO with the information required to perform the
1913 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1914 loop_vec_info loop_vinfo
, bool masked_p
,
1915 gather_scatter_info
*gs_info
)
1917 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1918 || gs_info
->ifn
== IFN_LAST
)
1919 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1922 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1923 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1925 gcc_assert (TYPE_PRECISION (new_offset_type
)
1926 >= TYPE_PRECISION (old_offset_type
));
1927 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1929 if (dump_enabled_p ())
1930 dump_printf_loc (MSG_NOTE
, vect_location
,
1931 "using gather/scatter for strided/grouped access,"
1932 " scale = %d\n", gs_info
->scale
);
1937 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1938 elements with a known constant step. Return -1 if that step
1939 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1942 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
1944 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1945 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
1949 /* If the target supports a permute mask that reverses the elements in
1950 a vector of type VECTYPE, return that mask, otherwise return null. */
1953 perm_mask_for_reverse (tree vectype
)
1955 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1957 /* The encoding has a single stepped pattern. */
1958 vec_perm_builder
sel (nunits
, 1, 3);
1959 for (int i
= 0; i
< 3; ++i
)
1960 sel
.quick_push (nunits
- 1 - i
);
1962 vec_perm_indices
indices (sel
, 1, nunits
);
1963 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
1965 return vect_gen_perm_mask_checked (vectype
, indices
);
1968 /* A subroutine of get_load_store_type, with a subset of the same
1969 arguments. Handle the case where STMT_INFO is a load or store that
1970 accesses consecutive elements with a negative step. */
1972 static vect_memory_access_type
1973 get_negative_load_store_type (vec_info
*vinfo
,
1974 stmt_vec_info stmt_info
, tree vectype
,
1975 vec_load_store_type vls_type
,
1976 unsigned int ncopies
)
1978 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1979 dr_alignment_support alignment_support_scheme
;
1983 if (dump_enabled_p ())
1984 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1985 "multiple types with negative step.\n");
1986 return VMAT_ELEMENTWISE
;
1989 alignment_support_scheme
= vect_supportable_dr_alignment (vinfo
, dr_info
,
1991 if (alignment_support_scheme
!= dr_aligned
1992 && alignment_support_scheme
!= dr_unaligned_supported
)
1994 if (dump_enabled_p ())
1995 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1996 "negative step but alignment required.\n");
1997 return VMAT_ELEMENTWISE
;
2000 if (vls_type
== VLS_STORE_INVARIANT
)
2002 if (dump_enabled_p ())
2003 dump_printf_loc (MSG_NOTE
, vect_location
,
2004 "negative step with invariant source;"
2005 " no permute needed.\n");
2006 return VMAT_CONTIGUOUS_DOWN
;
2009 if (!perm_mask_for_reverse (vectype
))
2011 if (dump_enabled_p ())
2012 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2013 "negative step and reversing not supported.\n");
2014 return VMAT_ELEMENTWISE
;
2017 return VMAT_CONTIGUOUS_REVERSE
;
2020 /* STMT_INFO is either a masked or unconditional store. Return the value
2024 vect_get_store_rhs (stmt_vec_info stmt_info
)
2026 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2028 gcc_assert (gimple_assign_single_p (assign
));
2029 return gimple_assign_rhs1 (assign
);
2031 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2033 internal_fn ifn
= gimple_call_internal_fn (call
);
2034 int index
= internal_fn_stored_value_index (ifn
);
2035 gcc_assert (index
>= 0);
2036 return gimple_call_arg (call
, index
);
2041 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2043 This function returns a vector type which can be composed with NETLS pieces,
2044 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2045 same vector size as the return vector. It checks target whether supports
2046 pieces-size vector mode for construction firstly, if target fails to, check
2047 pieces-size scalar mode for construction further. It returns NULL_TREE if
2048 fails to find the available composition.
2050 For example, for (vtype=V16QI, nelts=4), we can probably get:
2051 - V16QI with PTYPE V4QI.
2052 - V4SI with PTYPE SI.
2056 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
2058 gcc_assert (VECTOR_TYPE_P (vtype
));
2059 gcc_assert (known_gt (nelts
, 0U));
2061 machine_mode vmode
= TYPE_MODE (vtype
);
2062 if (!VECTOR_MODE_P (vmode
))
2065 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
2066 unsigned int pbsize
;
2067 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
2069 /* First check if vec_init optab supports construction from
2070 vector pieces directly. */
2071 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
2072 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
2074 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
2075 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
2076 != CODE_FOR_nothing
))
2078 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
2082 /* Otherwise check if exists an integer type of the same piece size and
2083 if vec_init optab supports construction from it directly. */
2084 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
2085 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
2086 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
2087 != CODE_FOR_nothing
))
2089 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
2090 return build_vector_type (*ptype
, nelts
);
2097 /* A subroutine of get_load_store_type, with a subset of the same
2098 arguments. Handle the case where STMT_INFO is part of a grouped load
2101 For stores, the statements in the group are all consecutive
2102 and there is no gap at the end. For loads, the statements in the
2103 group might not be consecutive; there can be gaps between statements
2104 as well as at the end. */
2107 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2108 tree vectype
, slp_tree slp_node
,
2109 bool masked_p
, vec_load_store_type vls_type
,
2110 vect_memory_access_type
*memory_access_type
,
2111 dr_alignment_support
*alignment_support_scheme
,
2112 gather_scatter_info
*gs_info
)
2114 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2115 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2116 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2117 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2118 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2119 bool single_element_p
= (stmt_info
== first_stmt_info
2120 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2121 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2122 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2124 /* True if the vectorized statements would access beyond the last
2125 statement in the group. */
2126 bool overrun_p
= false;
2128 /* True if we can cope with such overrun by peeling for gaps, so that
2129 there is at least one final scalar iteration after the vector loop. */
2130 bool can_overrun_p
= (!masked_p
2131 && vls_type
== VLS_LOAD
2135 /* There can only be a gap at the end of the group if the stride is
2136 known at compile time. */
2137 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2139 /* Stores can't yet have gaps. */
2140 gcc_assert (slp_node
|| vls_type
== VLS_LOAD
|| gap
== 0);
2144 /* For SLP vectorization we directly vectorize a subchain
2145 without permutation. */
2146 if (! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
2148 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node
)[0]);
2149 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2151 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2152 separated by the stride, until we have a complete vector.
2153 Fall back to scalar accesses if that isn't possible. */
2154 if (multiple_p (nunits
, group_size
))
2155 *memory_access_type
= VMAT_STRIDED_SLP
;
2157 *memory_access_type
= VMAT_ELEMENTWISE
;
2161 overrun_p
= loop_vinfo
&& gap
!= 0;
2162 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2164 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2165 "Grouped store with gaps requires"
2166 " non-consecutive accesses\n");
2169 /* An overrun is fine if the trailing elements are smaller
2170 than the alignment boundary B. Every vector access will
2171 be a multiple of B and so we are guaranteed to access a
2172 non-gap element in the same B-sized block. */
2174 && gap
< (vect_known_alignment_in_bytes (first_dr_info
,
2176 / vect_get_scalar_dr_size (first_dr_info
)))
2179 /* If the gap splits the vector in half and the target
2180 can do half-vector operations avoid the epilogue peeling
2181 by simply loading half of the vector only. Usually
2182 the construction with an upper zero half will be elided. */
2183 dr_alignment_support alignment_support_scheme
;
2187 && (((alignment_support_scheme
2188 = vect_supportable_dr_alignment (vinfo
, first_dr_info
,
2191 || alignment_support_scheme
== dr_unaligned_supported
)
2192 && known_eq (nunits
, (group_size
- gap
) * 2)
2193 && known_eq (nunits
, group_size
)
2194 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2198 if (overrun_p
&& !can_overrun_p
)
2200 if (dump_enabled_p ())
2201 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2202 "Peeling for outer loop is not supported\n");
2205 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2208 if (single_element_p
)
2209 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2210 only correct for single element "interleaving" SLP. */
2211 *memory_access_type
= get_negative_load_store_type
2212 (vinfo
, stmt_info
, vectype
, vls_type
, 1);
2215 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2216 separated by the stride, until we have a complete vector.
2217 Fall back to scalar accesses if that isn't possible. */
2218 if (multiple_p (nunits
, group_size
))
2219 *memory_access_type
= VMAT_STRIDED_SLP
;
2221 *memory_access_type
= VMAT_ELEMENTWISE
;
2226 gcc_assert (!loop_vinfo
|| cmp
> 0);
2227 *memory_access_type
= VMAT_CONTIGUOUS
;
2233 /* We can always handle this case using elementwise accesses,
2234 but see if something more efficient is available. */
2235 *memory_access_type
= VMAT_ELEMENTWISE
;
2237 /* If there is a gap at the end of the group then these optimizations
2238 would access excess elements in the last iteration. */
2239 bool would_overrun_p
= (gap
!= 0);
2240 /* An overrun is fine if the trailing elements are smaller than the
2241 alignment boundary B. Every vector access will be a multiple of B
2242 and so we are guaranteed to access a non-gap element in the
2243 same B-sized block. */
2246 && gap
< (vect_known_alignment_in_bytes (first_dr_info
, vectype
)
2247 / vect_get_scalar_dr_size (first_dr_info
)))
2248 would_overrun_p
= false;
2250 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2251 && (can_overrun_p
|| !would_overrun_p
)
2252 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2254 /* First cope with the degenerate case of a single-element
2256 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2259 /* Otherwise try using LOAD/STORE_LANES. */
2260 else if (vls_type
== VLS_LOAD
2261 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2262 : vect_store_lanes_supported (vectype
, group_size
,
2265 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2266 overrun_p
= would_overrun_p
;
2269 /* If that fails, try using permuting loads. */
2270 else if (vls_type
== VLS_LOAD
2271 ? vect_grouped_load_supported (vectype
, single_element_p
,
2273 : vect_grouped_store_supported (vectype
, group_size
))
2275 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2276 overrun_p
= would_overrun_p
;
2280 /* As a last resort, trying using a gather load or scatter store.
2282 ??? Although the code can handle all group sizes correctly,
2283 it probably isn't a win to use separate strided accesses based
2284 on nearby locations. Or, even if it's a win over scalar code,
2285 it might not be a win over vectorizing at a lower VF, if that
2286 allows us to use contiguous accesses. */
2287 if (*memory_access_type
== VMAT_ELEMENTWISE
2290 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2292 *memory_access_type
= VMAT_GATHER_SCATTER
;
2295 if (*memory_access_type
== VMAT_GATHER_SCATTER
2296 || *memory_access_type
== VMAT_ELEMENTWISE
)
2297 *alignment_support_scheme
= dr_unaligned_supported
;
2299 *alignment_support_scheme
2300 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, vectype
, false);
2302 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2304 /* STMT is the leader of the group. Check the operands of all the
2305 stmts of the group. */
2306 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2307 while (next_stmt_info
)
2309 tree op
= vect_get_store_rhs (next_stmt_info
);
2310 enum vect_def_type dt
;
2311 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2313 if (dump_enabled_p ())
2314 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2315 "use not simple.\n");
2318 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2324 gcc_assert (can_overrun_p
);
2325 if (dump_enabled_p ())
2326 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2327 "Data access with gaps requires scalar "
2329 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2335 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2336 if there is a memory access type that the vectorized form can use,
2337 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2338 or scatters, fill in GS_INFO accordingly. In addition
2339 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2340 the target does not support the alignment scheme.
2342 SLP says whether we're performing SLP rather than loop vectorization.
2343 MASKED_P is true if the statement is conditional on a vectorized mask.
2344 VECTYPE is the vector type that the vectorized statements will use.
2345 NCOPIES is the number of vector statements that will be needed. */
2348 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2349 tree vectype
, slp_tree slp_node
,
2350 bool masked_p
, vec_load_store_type vls_type
,
2351 unsigned int ncopies
,
2352 vect_memory_access_type
*memory_access_type
,
2353 dr_alignment_support
*alignment_support_scheme
,
2354 gather_scatter_info
*gs_info
)
2356 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2357 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2358 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2360 *memory_access_type
= VMAT_GATHER_SCATTER
;
2361 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2363 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2364 &gs_info
->offset_dt
,
2365 &gs_info
->offset_vectype
))
2367 if (dump_enabled_p ())
2368 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2369 "%s index use not simple.\n",
2370 vls_type
== VLS_LOAD
? "gather" : "scatter");
2373 else if (gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
2375 if (vls_type
!= VLS_LOAD
)
2377 if (dump_enabled_p ())
2378 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2379 "unsupported emulated scatter.\n");
2382 else if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant ()
2383 || !TYPE_VECTOR_SUBPARTS
2384 (gs_info
->offset_vectype
).is_constant ()
2385 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2386 (gs_info
->offset_vectype
),
2387 TYPE_VECTOR_SUBPARTS (vectype
)))
2389 if (dump_enabled_p ())
2390 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2391 "unsupported vector types for emulated "
2396 /* Gather-scatter accesses perform only component accesses, alignment
2397 is irrelevant for them. */
2398 *alignment_support_scheme
= dr_unaligned_supported
;
2400 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2402 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
,
2404 vls_type
, memory_access_type
,
2405 alignment_support_scheme
, gs_info
))
2408 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2410 gcc_assert (!slp_node
);
2412 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2414 *memory_access_type
= VMAT_GATHER_SCATTER
;
2416 *memory_access_type
= VMAT_ELEMENTWISE
;
2417 /* Alignment is irrelevant here. */
2418 *alignment_support_scheme
= dr_unaligned_supported
;
2422 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2425 gcc_assert (vls_type
== VLS_LOAD
);
2426 *memory_access_type
= VMAT_INVARIANT
;
2427 /* Invariant accesses perform only component accesses, alignment
2428 is irrelevant for them. */
2429 *alignment_support_scheme
= dr_unaligned_supported
;
2434 *memory_access_type
= get_negative_load_store_type
2435 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
);
2437 *memory_access_type
= VMAT_CONTIGUOUS
;
2438 *alignment_support_scheme
2439 = vect_supportable_dr_alignment (vinfo
,
2440 STMT_VINFO_DR_INFO (stmt_info
),
2445 if ((*memory_access_type
== VMAT_ELEMENTWISE
2446 || *memory_access_type
== VMAT_STRIDED_SLP
)
2447 && !nunits
.is_constant ())
2449 if (dump_enabled_p ())
2450 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2451 "Not using elementwise accesses due to variable "
2452 "vectorization factor.\n");
2456 if (*alignment_support_scheme
== dr_unaligned_unsupported
)
2458 if (dump_enabled_p ())
2459 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2460 "unsupported unaligned access\n");
2464 /* FIXME: At the moment the cost model seems to underestimate the
2465 cost of using elementwise accesses. This check preserves the
2466 traditional behavior until that can be fixed. */
2467 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2468 if (!first_stmt_info
)
2469 first_stmt_info
= stmt_info
;
2470 if (*memory_access_type
== VMAT_ELEMENTWISE
2471 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2472 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2473 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2474 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2476 if (dump_enabled_p ())
2477 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2478 "not falling back to elementwise accesses\n");
2484 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2485 conditional operation STMT_INFO. When returning true, store the mask
2486 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2487 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2488 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2491 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2492 slp_tree slp_node
, unsigned mask_index
,
2493 tree
*mask
, slp_tree
*mask_node
,
2494 vect_def_type
*mask_dt_out
, tree
*mask_vectype_out
)
2496 enum vect_def_type mask_dt
;
2498 slp_tree mask_node_1
;
2499 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, mask_index
,
2500 mask
, &mask_node_1
, &mask_dt
, &mask_vectype
))
2502 if (dump_enabled_p ())
2503 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2504 "mask use not simple.\n");
2508 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask
)))
2510 if (dump_enabled_p ())
2511 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2512 "mask argument is not a boolean.\n");
2516 /* If the caller is not prepared for adjusting an external/constant
2517 SLP mask vector type fail. */
2520 && SLP_TREE_DEF_TYPE (mask_node_1
) != vect_internal_def
)
2522 if (dump_enabled_p ())
2523 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2524 "SLP mask argument is not vectorized.\n");
2528 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2530 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2532 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2534 if (dump_enabled_p ())
2535 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2536 "could not find an appropriate vector mask type.\n");
2540 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2541 TYPE_VECTOR_SUBPARTS (vectype
)))
2543 if (dump_enabled_p ())
2544 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2545 "vector mask type %T"
2546 " does not match vector data type %T.\n",
2547 mask_vectype
, vectype
);
2552 *mask_dt_out
= mask_dt
;
2553 *mask_vectype_out
= mask_vectype
;
2555 *mask_node
= mask_node_1
;
2559 /* Return true if stored value RHS is suitable for vectorizing store
2560 statement STMT_INFO. When returning true, store the type of the
2561 definition in *RHS_DT_OUT, the type of the vectorized store value in
2562 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2565 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2566 slp_tree slp_node
, tree rhs
,
2567 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2568 vec_load_store_type
*vls_type_out
)
2570 /* In the case this is a store from a constant make sure
2571 native_encode_expr can handle it. */
2572 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2574 if (dump_enabled_p ())
2575 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2576 "cannot encode constant as a byte sequence.\n");
2581 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2583 if (gimple_call_internal_p (call
)
2584 && internal_store_fn_p (gimple_call_internal_fn (call
)))
2585 op_no
= internal_fn_stored_value_index (gimple_call_internal_fn (call
));
2588 enum vect_def_type rhs_dt
;
2591 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, op_no
,
2592 &rhs
, &slp_op
, &rhs_dt
, &rhs_vectype
))
2594 if (dump_enabled_p ())
2595 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2596 "use not simple.\n");
2600 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2601 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2603 if (dump_enabled_p ())
2604 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2605 "incompatible vector types.\n");
2609 *rhs_dt_out
= rhs_dt
;
2610 *rhs_vectype_out
= rhs_vectype
;
2611 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2612 *vls_type_out
= VLS_STORE_INVARIANT
;
2614 *vls_type_out
= VLS_STORE
;
2618 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2619 Note that we support masks with floating-point type, in which case the
2620 floats are interpreted as a bitmask. */
2623 vect_build_all_ones_mask (vec_info
*vinfo
,
2624 stmt_vec_info stmt_info
, tree masktype
)
2626 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2627 return build_int_cst (masktype
, -1);
2628 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2630 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2631 mask
= build_vector_from_val (masktype
, mask
);
2632 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2634 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2638 for (int j
= 0; j
< 6; ++j
)
2640 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2641 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2642 mask
= build_vector_from_val (masktype
, mask
);
2643 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2648 /* Build an all-zero merge value of type VECTYPE while vectorizing
2649 STMT_INFO as a gather load. */
2652 vect_build_zero_merge_argument (vec_info
*vinfo
,
2653 stmt_vec_info stmt_info
, tree vectype
)
2656 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2657 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2658 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2662 for (int j
= 0; j
< 6; ++j
)
2664 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2665 merge
= build_real (TREE_TYPE (vectype
), r
);
2669 merge
= build_vector_from_val (vectype
, merge
);
2670 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2673 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2674 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2675 the gather load operation. If the load is conditional, MASK is the
2676 unvectorized condition and MASK_DT is its definition type, otherwise
2680 vect_build_gather_load_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2681 gimple_stmt_iterator
*gsi
,
2683 gather_scatter_info
*gs_info
,
2686 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2687 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2688 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2689 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2690 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2691 edge pe
= loop_preheader_edge (loop
);
2692 enum { NARROW
, NONE
, WIDEN
} modifier
;
2693 poly_uint64 gather_off_nunits
2694 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2696 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2697 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2698 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2699 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2700 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2701 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2702 tree scaletype
= TREE_VALUE (arglist
);
2703 tree real_masktype
= masktype
;
2704 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2706 || TREE_CODE (masktype
) == INTEGER_TYPE
2707 || types_compatible_p (srctype
, masktype
)));
2708 if (mask
&& TREE_CODE (masktype
) == INTEGER_TYPE
)
2709 masktype
= truth_type_for (srctype
);
2711 tree mask_halftype
= masktype
;
2712 tree perm_mask
= NULL_TREE
;
2713 tree mask_perm_mask
= NULL_TREE
;
2714 if (known_eq (nunits
, gather_off_nunits
))
2716 else if (known_eq (nunits
* 2, gather_off_nunits
))
2720 /* Currently widening gathers and scatters are only supported for
2721 fixed-length vectors. */
2722 int count
= gather_off_nunits
.to_constant ();
2723 vec_perm_builder
sel (count
, count
, 1);
2724 for (int i
= 0; i
< count
; ++i
)
2725 sel
.quick_push (i
| (count
/ 2));
2727 vec_perm_indices
indices (sel
, 1, count
);
2728 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2731 else if (known_eq (nunits
, gather_off_nunits
* 2))
2735 /* Currently narrowing gathers and scatters are only supported for
2736 fixed-length vectors. */
2737 int count
= nunits
.to_constant ();
2738 vec_perm_builder
sel (count
, count
, 1);
2739 sel
.quick_grow (count
);
2740 for (int i
= 0; i
< count
; ++i
)
2741 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2742 vec_perm_indices
indices (sel
, 2, count
);
2743 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2747 if (mask
&& masktype
== real_masktype
)
2749 for (int i
= 0; i
< count
; ++i
)
2750 sel
[i
] = i
| (count
/ 2);
2751 indices
.new_vector (sel
, 2, count
);
2752 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2755 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2760 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2761 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2763 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2764 if (!is_gimple_min_invariant (ptr
))
2767 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2768 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2769 gcc_assert (!new_bb
);
2772 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2774 tree vec_oprnd0
= NULL_TREE
;
2775 tree vec_mask
= NULL_TREE
;
2776 tree src_op
= NULL_TREE
;
2777 tree mask_op
= NULL_TREE
;
2778 tree prev_res
= NULL_TREE
;
2782 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2783 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2786 auto_vec
<tree
> vec_oprnds0
;
2787 auto_vec
<tree
> vec_masks
;
2788 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2789 modifier
== WIDEN
? ncopies
/ 2 : ncopies
,
2790 gs_info
->offset
, &vec_oprnds0
);
2792 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2793 modifier
== NARROW
? ncopies
/ 2 : ncopies
,
2795 for (int j
= 0; j
< ncopies
; ++j
)
2798 if (modifier
== WIDEN
&& (j
& 1))
2799 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
2800 perm_mask
, stmt_info
, gsi
);
2802 op
= vec_oprnd0
= vec_oprnds0
[modifier
== WIDEN
? j
/ 2 : j
];
2804 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2806 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2807 TYPE_VECTOR_SUBPARTS (idxtype
)));
2808 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2809 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2810 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2811 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2817 if (mask_perm_mask
&& (j
& 1))
2818 mask_op
= permute_vec_elements (vinfo
, mask_op
, mask_op
,
2819 mask_perm_mask
, stmt_info
, gsi
);
2822 if (modifier
== NARROW
)
2825 vec_mask
= vec_masks
[j
/ 2];
2828 vec_mask
= vec_masks
[j
];
2831 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2833 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2834 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2835 gcc_assert (known_eq (sub1
, sub2
));
2836 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2837 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2839 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2840 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2844 if (modifier
== NARROW
&& masktype
!= real_masktype
)
2846 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2848 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2849 : VEC_UNPACK_LO_EXPR
,
2851 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2857 tree mask_arg
= mask_op
;
2858 if (masktype
!= real_masktype
)
2860 tree utype
, optype
= TREE_TYPE (mask_op
);
2861 if (TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2862 utype
= real_masktype
;
2864 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2865 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2866 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2868 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2869 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2871 if (!useless_type_conversion_p (real_masktype
, utype
))
2873 gcc_assert (TYPE_PRECISION (utype
)
2874 <= TYPE_PRECISION (real_masktype
));
2875 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2876 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2877 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2880 src_op
= build_zero_cst (srctype
);
2882 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2885 if (!useless_type_conversion_p (vectype
, rettype
))
2887 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2888 TYPE_VECTOR_SUBPARTS (rettype
)));
2889 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2890 gimple_call_set_lhs (new_stmt
, op
);
2891 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2892 var
= make_ssa_name (vec_dest
);
2893 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2894 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2895 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2899 var
= make_ssa_name (vec_dest
, new_stmt
);
2900 gimple_call_set_lhs (new_stmt
, var
);
2901 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2904 if (modifier
== NARROW
)
2911 var
= permute_vec_elements (vinfo
, prev_res
, var
, perm_mask
,
2913 new_stmt
= SSA_NAME_DEF_STMT (var
);
2916 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
2918 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
2921 /* Prepare the base and offset in GS_INFO for vectorization.
2922 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2923 to the vectorized offset argument for the first copy of STMT_INFO.
2924 STMT_INFO is the statement described by GS_INFO and LOOP is the
2928 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo
,
2929 class loop
*loop
, stmt_vec_info stmt_info
,
2930 gather_scatter_info
*gs_info
,
2931 tree
*dataref_ptr
, vec
<tree
> *vec_offset
)
2933 gimple_seq stmts
= NULL
;
2934 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2938 edge pe
= loop_preheader_edge (loop
);
2939 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2940 gcc_assert (!new_bb
);
2942 unsigned ncopies
= vect_get_num_copies (loop_vinfo
, gs_info
->offset_vectype
);
2943 vect_get_vec_defs_for_operand (loop_vinfo
, stmt_info
, ncopies
,
2944 gs_info
->offset
, vec_offset
,
2945 gs_info
->offset_vectype
);
2948 /* Prepare to implement a grouped or strided load or store using
2949 the gather load or scatter store operation described by GS_INFO.
2950 STMT_INFO is the load or store statement.
2952 Set *DATAREF_BUMP to the amount that should be added to the base
2953 address after each copy of the vectorized statement. Set *VEC_OFFSET
2954 to an invariant offset vector in which element I has the value
2955 I * DR_STEP / SCALE. */
2958 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2959 loop_vec_info loop_vinfo
,
2960 gather_scatter_info
*gs_info
,
2961 tree
*dataref_bump
, tree
*vec_offset
)
2963 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2964 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2966 tree bump
= size_binop (MULT_EXPR
,
2967 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
2968 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2969 *dataref_bump
= cse_and_gimplify_to_preheader (loop_vinfo
, bump
);
2971 /* The offset given in GS_INFO can have pointer type, so use the element
2972 type of the vector instead. */
2973 tree offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
2975 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2976 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
2977 ssize_int (gs_info
->scale
));
2978 step
= fold_convert (offset_type
, step
);
2980 /* Create {0, X, X*2, X*3, ...}. */
2981 tree offset
= fold_build2 (VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
2982 build_zero_cst (offset_type
), step
);
2983 *vec_offset
= cse_and_gimplify_to_preheader (loop_vinfo
, offset
);
2986 /* Return the amount that should be added to a vector pointer to move
2987 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2988 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2992 vect_get_data_ptr_increment (vec_info
*vinfo
,
2993 dr_vec_info
*dr_info
, tree aggr_type
,
2994 vect_memory_access_type memory_access_type
)
2996 if (memory_access_type
== VMAT_INVARIANT
)
2997 return size_zero_node
;
2999 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3000 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3001 if (tree_int_cst_sgn (step
) == -1)
3002 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3006 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3009 vectorizable_bswap (vec_info
*vinfo
,
3010 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3011 gimple
**vec_stmt
, slp_tree slp_node
,
3013 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3016 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3017 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3020 op
= gimple_call_arg (stmt
, 0);
3021 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3022 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3024 /* Multiple types in SLP are handled by creating the appropriate number of
3025 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3030 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3032 gcc_assert (ncopies
>= 1);
3034 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3038 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3039 unsigned word_bytes
;
3040 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3043 /* The encoding uses one stepped pattern for each byte in the word. */
3044 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3045 for (unsigned i
= 0; i
< 3; ++i
)
3046 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3047 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3049 vec_perm_indices
indices (elts
, 1, num_bytes
);
3050 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
3056 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3058 if (dump_enabled_p ())
3059 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3060 "incompatible vector types for invariants\n");
3064 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3065 DUMP_VECT_SCOPE ("vectorizable_bswap");
3066 record_stmt_cost (cost_vec
,
3067 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3068 record_stmt_cost (cost_vec
,
3070 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
,
3071 vec_perm
, stmt_info
, 0, vect_body
);
3075 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3078 vec
<tree
> vec_oprnds
= vNULL
;
3079 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
3081 /* Arguments are ready. create the new vector stmt. */
3084 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3087 tree tem
= make_ssa_name (char_vectype
);
3088 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3089 char_vectype
, vop
));
3090 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3091 tree tem2
= make_ssa_name (char_vectype
);
3092 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3093 tem
, tem
, bswap_vconst
);
3094 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3095 tem
= make_ssa_name (vectype
);
3096 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3098 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3100 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3102 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3106 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3108 vec_oprnds
.release ();
3112 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3113 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3114 in a single step. On success, store the binary pack code in
3118 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3119 tree_code
*convert_code
)
3121 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3122 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3126 int multi_step_cvt
= 0;
3127 auto_vec
<tree
, 8> interm_types
;
3128 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3129 &code
, &multi_step_cvt
, &interm_types
)
3133 *convert_code
= code
;
3137 /* Function vectorizable_call.
3139 Check if STMT_INFO performs a function call that can be vectorized.
3140 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3141 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3142 Return true if STMT_INFO is vectorizable in this way. */
3145 vectorizable_call (vec_info
*vinfo
,
3146 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3147 gimple
**vec_stmt
, slp_tree slp_node
,
3148 stmt_vector_for_cost
*cost_vec
)
3154 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3155 tree vectype_out
, vectype_in
;
3156 poly_uint64 nunits_in
;
3157 poly_uint64 nunits_out
;
3158 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3159 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3160 tree fndecl
, new_temp
, rhs_type
;
3161 enum vect_def_type dt
[4]
3162 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3163 vect_unknown_def_type
};
3164 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3165 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3166 int ndts
= ARRAY_SIZE (dt
);
3168 auto_vec
<tree
, 8> vargs
;
3169 auto_vec
<tree
, 8> orig_vargs
;
3170 enum { NARROW
, NONE
, WIDEN
} modifier
;
3174 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3177 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3181 /* Is STMT_INFO a vectorizable call? */
3182 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3186 if (gimple_call_internal_p (stmt
)
3187 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3188 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3189 /* Handled by vectorizable_load and vectorizable_store. */
3192 if (gimple_call_lhs (stmt
) == NULL_TREE
3193 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3196 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3198 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3200 /* Process function arguments. */
3201 rhs_type
= NULL_TREE
;
3202 vectype_in
= NULL_TREE
;
3203 nargs
= gimple_call_num_args (stmt
);
3205 /* Bail out if the function has more than four arguments, we do not have
3206 interesting builtin functions to vectorize with more than two arguments
3207 except for fma. No arguments is also not good. */
3208 if (nargs
== 0 || nargs
> 4)
3211 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3212 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3213 if (cfn
== CFN_GOMP_SIMD_LANE
)
3216 rhs_type
= unsigned_type_node
;
3220 if (internal_fn_p (cfn
))
3221 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3223 for (i
= 0; i
< nargs
; i
++)
3225 if ((int) i
== mask_opno
)
3227 if (!vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_opno
,
3228 &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3233 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3234 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3236 if (dump_enabled_p ())
3237 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3238 "use not simple.\n");
3242 /* We can only handle calls with arguments of the same type. */
3244 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3246 if (dump_enabled_p ())
3247 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3248 "argument types differ.\n");
3252 rhs_type
= TREE_TYPE (op
);
3255 vectype_in
= vectypes
[i
];
3256 else if (vectypes
[i
]
3257 && !types_compatible_p (vectypes
[i
], vectype_in
))
3259 if (dump_enabled_p ())
3260 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3261 "argument vector types differ.\n");
3265 /* If all arguments are external or constant defs, infer the vector type
3266 from the scalar type. */
3268 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3270 gcc_assert (vectype_in
);
3273 if (dump_enabled_p ())
3274 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3275 "no vectype for scalar type %T\n", rhs_type
);
3279 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3280 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3281 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3282 by a pack of the two vectors into an SI vector. We would need
3283 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3284 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3286 if (dump_enabled_p ())
3287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3288 "mismatched vector sizes %T and %T\n",
3289 vectype_in
, vectype_out
);
3293 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3294 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3296 if (dump_enabled_p ())
3297 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3298 "mixed mask and nonmask vector types\n");
3303 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3304 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3305 if (known_eq (nunits_in
* 2, nunits_out
))
3307 else if (known_eq (nunits_out
, nunits_in
))
3309 else if (known_eq (nunits_out
* 2, nunits_in
))
3314 /* We only handle functions that do not read or clobber memory. */
3315 if (gimple_vuse (stmt
))
3317 if (dump_enabled_p ())
3318 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3319 "function reads from or writes to memory.\n");
3323 /* For now, we only vectorize functions if a target specific builtin
3324 is available. TODO -- in some cases, it might be profitable to
3325 insert the calls for pieces of the vector, in order to be able
3326 to vectorize other operations in the loop. */
3328 internal_fn ifn
= IFN_LAST
;
3329 tree callee
= gimple_call_fndecl (stmt
);
3331 /* First try using an internal function. */
3332 tree_code convert_code
= ERROR_MARK
;
3334 && (modifier
== NONE
3335 || (modifier
== NARROW
3336 && simple_integer_narrowing (vectype_out
, vectype_in
,
3338 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3341 /* If that fails, try asking for a target-specific built-in function. */
3342 if (ifn
== IFN_LAST
)
3344 if (cfn
!= CFN_LAST
)
3345 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3346 (cfn
, vectype_out
, vectype_in
);
3347 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3348 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3349 (callee
, vectype_out
, vectype_in
);
3352 if (ifn
== IFN_LAST
&& !fndecl
)
3354 if (cfn
== CFN_GOMP_SIMD_LANE
3357 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3358 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3359 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3360 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3362 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3363 { 0, 1, 2, ... vf - 1 } vector. */
3364 gcc_assert (nargs
== 0);
3366 else if (modifier
== NONE
3367 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3368 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3369 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3370 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3371 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3372 slp_op
, vectype_in
, cost_vec
);
3375 if (dump_enabled_p ())
3376 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3377 "function is not vectorizable.\n");
3384 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3385 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3387 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3389 /* Sanity check: make sure that at least one copy of the vectorized stmt
3390 needs to be generated. */
3391 gcc_assert (ncopies
>= 1);
3393 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3394 if (!vec_stmt
) /* transformation not required. */
3397 for (i
= 0; i
< nargs
; ++i
)
3398 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
], vectype_in
))
3400 if (dump_enabled_p ())
3401 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3402 "incompatible vector types for invariants\n");
3405 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3406 DUMP_VECT_SCOPE ("vectorizable_call");
3407 vect_model_simple_cost (vinfo
, stmt_info
,
3408 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3409 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3410 record_stmt_cost (cost_vec
, ncopies
/ 2,
3411 vec_promote_demote
, stmt_info
, 0, vect_body
);
3413 if (loop_vinfo
&& mask_opno
>= 0)
3415 unsigned int nvectors
= (slp_node
3416 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3418 tree scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3419 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3420 vectype_out
, scalar_mask
);
3427 if (dump_enabled_p ())
3428 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3431 scalar_dest
= gimple_call_lhs (stmt
);
3432 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3434 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3436 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3438 tree prev_res
= NULL_TREE
;
3439 vargs
.safe_grow (nargs
, true);
3440 orig_vargs
.safe_grow (nargs
, true);
3441 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3442 for (j
= 0; j
< ncopies
; ++j
)
3444 /* Build argument list for the vectorized call. */
3447 vec
<tree
> vec_oprnds0
;
3449 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3450 vec_oprnds0
= vec_defs
[0];
3452 /* Arguments are ready. Create the new vector stmt. */
3453 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3456 for (k
= 0; k
< nargs
; k
++)
3458 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3459 vargs
[k
] = vec_oprndsk
[i
];
3462 if (modifier
== NARROW
)
3464 /* We don't define any narrowing conditional functions
3466 gcc_assert (mask_opno
< 0);
3467 tree half_res
= make_ssa_name (vectype_in
);
3469 = gimple_build_call_internal_vec (ifn
, vargs
);
3470 gimple_call_set_lhs (call
, half_res
);
3471 gimple_call_set_nothrow (call
, true);
3472 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3475 prev_res
= half_res
;
3478 new_temp
= make_ssa_name (vec_dest
);
3479 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3480 prev_res
, half_res
);
3481 vect_finish_stmt_generation (vinfo
, stmt_info
,
3486 if (mask_opno
>= 0 && masked_loop_p
)
3488 unsigned int vec_num
= vec_oprnds0
.length ();
3489 /* Always true for SLP. */
3490 gcc_assert (ncopies
== 1);
3491 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3493 vargs
[mask_opno
] = prepare_load_store_mask
3494 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3498 if (ifn
!= IFN_LAST
)
3499 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3501 call
= gimple_build_call_vec (fndecl
, vargs
);
3502 new_temp
= make_ssa_name (vec_dest
, call
);
3503 gimple_call_set_lhs (call
, new_temp
);
3504 gimple_call_set_nothrow (call
, true);
3505 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3508 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3513 for (i
= 0; i
< nargs
; i
++)
3515 op
= gimple_call_arg (stmt
, i
);
3518 vec_defs
.quick_push (vNULL
);
3519 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3523 orig_vargs
[i
] = vargs
[i
] = vec_defs
[i
][j
];
3526 if (mask_opno
>= 0 && masked_loop_p
)
3528 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3531 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3532 vargs
[mask_opno
], gsi
);
3536 if (cfn
== CFN_GOMP_SIMD_LANE
)
3538 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3540 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3541 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3542 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3543 new_temp
= make_ssa_name (vec_dest
);
3544 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3545 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3547 else if (modifier
== NARROW
)
3549 /* We don't define any narrowing conditional functions at
3551 gcc_assert (mask_opno
< 0);
3552 tree half_res
= make_ssa_name (vectype_in
);
3553 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3554 gimple_call_set_lhs (call
, half_res
);
3555 gimple_call_set_nothrow (call
, true);
3556 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3559 prev_res
= half_res
;
3562 new_temp
= make_ssa_name (vec_dest
);
3563 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3564 prev_res
, half_res
);
3565 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3570 if (ifn
!= IFN_LAST
)
3571 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3573 call
= gimple_build_call_vec (fndecl
, vargs
);
3574 new_temp
= make_ssa_name (vec_dest
, call
);
3575 gimple_call_set_lhs (call
, new_temp
);
3576 gimple_call_set_nothrow (call
, true);
3577 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3581 if (j
== (modifier
== NARROW
? 1 : 0))
3582 *vec_stmt
= new_stmt
;
3583 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3585 for (i
= 0; i
< nargs
; i
++)
3587 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3588 vec_oprndsi
.release ();
3591 else if (modifier
== NARROW
)
3593 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3594 /* We don't define any narrowing conditional functions at present. */
3595 gcc_assert (mask_opno
< 0);
3596 for (j
= 0; j
< ncopies
; ++j
)
3598 /* Build argument list for the vectorized call. */
3600 vargs
.create (nargs
* 2);
3606 vec
<tree
> vec_oprnds0
;
3608 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3609 vec_oprnds0
= vec_defs
[0];
3611 /* Arguments are ready. Create the new vector stmt. */
3612 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3616 for (k
= 0; k
< nargs
; k
++)
3618 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3619 vargs
.quick_push (vec_oprndsk
[i
]);
3620 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3623 if (ifn
!= IFN_LAST
)
3624 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3626 call
= gimple_build_call_vec (fndecl
, vargs
);
3627 new_temp
= make_ssa_name (vec_dest
, call
);
3628 gimple_call_set_lhs (call
, new_temp
);
3629 gimple_call_set_nothrow (call
, true);
3630 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3631 SLP_TREE_VEC_STMTS (slp_node
).quick_push (call
);
3636 for (i
= 0; i
< nargs
; i
++)
3638 op
= gimple_call_arg (stmt
, i
);
3641 vec_defs
.quick_push (vNULL
);
3642 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
3643 op
, &vec_defs
[i
], vectypes
[i
]);
3645 vec_oprnd0
= vec_defs
[i
][2*j
];
3646 vec_oprnd1
= vec_defs
[i
][2*j
+1];
3648 vargs
.quick_push (vec_oprnd0
);
3649 vargs
.quick_push (vec_oprnd1
);
3652 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3653 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3654 gimple_call_set_lhs (new_stmt
, new_temp
);
3655 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3657 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3661 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3663 for (i
= 0; i
< nargs
; i
++)
3665 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3666 vec_oprndsi
.release ();
3670 /* No current target implements this case. */
3675 /* The call in STMT might prevent it from being removed in dce.
3676 We however cannot remove it here, due to the way the ssa name
3677 it defines is mapped to the new definition. So just replace
3678 rhs of the statement with something harmless. */
3683 stmt_info
= vect_orig_stmt (stmt_info
);
3684 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3687 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3688 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3694 struct simd_call_arg_info
3698 HOST_WIDE_INT linear_step
;
3699 enum vect_def_type dt
;
3701 bool simd_lane_linear
;
3704 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3705 is linear within simd lane (but not within whole loop), note it in
3709 vect_simd_lane_linear (tree op
, class loop
*loop
,
3710 struct simd_call_arg_info
*arginfo
)
3712 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3714 if (!is_gimple_assign (def_stmt
)
3715 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3716 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3719 tree base
= gimple_assign_rhs1 (def_stmt
);
3720 HOST_WIDE_INT linear_step
= 0;
3721 tree v
= gimple_assign_rhs2 (def_stmt
);
3722 while (TREE_CODE (v
) == SSA_NAME
)
3725 def_stmt
= SSA_NAME_DEF_STMT (v
);
3726 if (is_gimple_assign (def_stmt
))
3727 switch (gimple_assign_rhs_code (def_stmt
))
3730 t
= gimple_assign_rhs2 (def_stmt
);
3731 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3733 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3734 v
= gimple_assign_rhs1 (def_stmt
);
3737 t
= gimple_assign_rhs2 (def_stmt
);
3738 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3740 linear_step
= tree_to_shwi (t
);
3741 v
= gimple_assign_rhs1 (def_stmt
);
3744 t
= gimple_assign_rhs1 (def_stmt
);
3745 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3746 || (TYPE_PRECISION (TREE_TYPE (v
))
3747 < TYPE_PRECISION (TREE_TYPE (t
))))
3756 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3758 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3759 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3764 arginfo
->linear_step
= linear_step
;
3766 arginfo
->simd_lane_linear
= true;
3772 /* Return the number of elements in vector type VECTYPE, which is associated
3773 with a SIMD clone. At present these vectors always have a constant
3776 static unsigned HOST_WIDE_INT
3777 simd_clone_subparts (tree vectype
)
3779 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3782 /* Function vectorizable_simd_clone_call.
3784 Check if STMT_INFO performs a function call that can be vectorized
3785 by calling a simd clone of the function.
3786 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3787 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3788 Return true if STMT_INFO is vectorizable in this way. */
3791 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3792 gimple_stmt_iterator
*gsi
,
3793 gimple
**vec_stmt
, slp_tree slp_node
,
3794 stmt_vector_for_cost
*)
3799 tree vec_oprnd0
= NULL_TREE
;
3802 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3803 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3804 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3805 tree fndecl
, new_temp
;
3807 auto_vec
<simd_call_arg_info
> arginfo
;
3808 vec
<tree
> vargs
= vNULL
;
3810 tree lhs
, rtype
, ratype
;
3811 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3813 /* Is STMT a vectorizable call? */
3814 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3818 fndecl
= gimple_call_fndecl (stmt
);
3819 if (fndecl
== NULL_TREE
)
3822 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3823 if (node
== NULL
|| node
->simd_clones
== NULL
)
3826 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3829 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3833 if (gimple_call_lhs (stmt
)
3834 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3837 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3839 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3841 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3848 /* Process function arguments. */
3849 nargs
= gimple_call_num_args (stmt
);
3851 /* Bail out if the function has zero arguments. */
3855 arginfo
.reserve (nargs
, true);
3857 for (i
= 0; i
< nargs
; i
++)
3859 simd_call_arg_info thisarginfo
;
3862 thisarginfo
.linear_step
= 0;
3863 thisarginfo
.align
= 0;
3864 thisarginfo
.op
= NULL_TREE
;
3865 thisarginfo
.simd_lane_linear
= false;
3867 op
= gimple_call_arg (stmt
, i
);
3868 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3869 &thisarginfo
.vectype
)
3870 || thisarginfo
.dt
== vect_uninitialized_def
)
3872 if (dump_enabled_p ())
3873 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3874 "use not simple.\n");
3878 if (thisarginfo
.dt
== vect_constant_def
3879 || thisarginfo
.dt
== vect_external_def
)
3880 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3883 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3884 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo
.vectype
))
3886 if (dump_enabled_p ())
3887 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3888 "vector mask arguments are not supported\n");
3893 /* For linear arguments, the analyze phase should have saved
3894 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3895 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3896 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3898 gcc_assert (vec_stmt
);
3899 thisarginfo
.linear_step
3900 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3902 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3903 thisarginfo
.simd_lane_linear
3904 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3905 == boolean_true_node
);
3906 /* If loop has been peeled for alignment, we need to adjust it. */
3907 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3908 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3909 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3911 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3912 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3913 tree opt
= TREE_TYPE (thisarginfo
.op
);
3914 bias
= fold_convert (TREE_TYPE (step
), bias
);
3915 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3917 = fold_build2 (POINTER_TYPE_P (opt
)
3918 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3919 thisarginfo
.op
, bias
);
3923 && thisarginfo
.dt
!= vect_constant_def
3924 && thisarginfo
.dt
!= vect_external_def
3926 && TREE_CODE (op
) == SSA_NAME
3927 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3929 && tree_fits_shwi_p (iv
.step
))
3931 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3932 thisarginfo
.op
= iv
.base
;
3934 else if ((thisarginfo
.dt
== vect_constant_def
3935 || thisarginfo
.dt
== vect_external_def
)
3936 && POINTER_TYPE_P (TREE_TYPE (op
)))
3937 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3938 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3940 if (POINTER_TYPE_P (TREE_TYPE (op
))
3941 && !thisarginfo
.linear_step
3943 && thisarginfo
.dt
!= vect_constant_def
3944 && thisarginfo
.dt
!= vect_external_def
3947 && TREE_CODE (op
) == SSA_NAME
)
3948 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3950 arginfo
.quick_push (thisarginfo
);
3953 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3954 if (!vf
.is_constant ())
3956 if (dump_enabled_p ())
3957 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3958 "not considering SIMD clones; not yet supported"
3959 " for variable-width vectors.\n");
3963 unsigned int badness
= 0;
3964 struct cgraph_node
*bestn
= NULL
;
3965 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3966 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3968 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3969 n
= n
->simdclone
->next_clone
)
3971 unsigned int this_badness
= 0;
3972 unsigned int num_calls
;
3973 if (!constant_multiple_p (vf
, n
->simdclone
->simdlen
, &num_calls
)
3974 || n
->simdclone
->nargs
!= nargs
)
3977 this_badness
+= exact_log2 (num_calls
) * 4096;
3978 if (n
->simdclone
->inbranch
)
3979 this_badness
+= 8192;
3980 int target_badness
= targetm
.simd_clone
.usable (n
);
3981 if (target_badness
< 0)
3983 this_badness
+= target_badness
* 512;
3984 /* FORNOW: Have to add code to add the mask argument. */
3985 if (n
->simdclone
->inbranch
)
3987 for (i
= 0; i
< nargs
; i
++)
3989 switch (n
->simdclone
->args
[i
].arg_type
)
3991 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3992 if (!useless_type_conversion_p
3993 (n
->simdclone
->args
[i
].orig_type
,
3994 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3996 else if (arginfo
[i
].dt
== vect_constant_def
3997 || arginfo
[i
].dt
== vect_external_def
3998 || arginfo
[i
].linear_step
)
4001 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4002 if (arginfo
[i
].dt
!= vect_constant_def
4003 && arginfo
[i
].dt
!= vect_external_def
)
4006 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4007 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4008 if (arginfo
[i
].dt
== vect_constant_def
4009 || arginfo
[i
].dt
== vect_external_def
4010 || (arginfo
[i
].linear_step
4011 != n
->simdclone
->args
[i
].linear_step
))
4014 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4015 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4016 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4017 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4018 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4019 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4023 case SIMD_CLONE_ARG_TYPE_MASK
:
4026 if (i
== (size_t) -1)
4028 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4033 if (arginfo
[i
].align
)
4034 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4035 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4037 if (i
== (size_t) -1)
4039 if (bestn
== NULL
|| this_badness
< badness
)
4042 badness
= this_badness
;
4049 for (i
= 0; i
< nargs
; i
++)
4050 if ((arginfo
[i
].dt
== vect_constant_def
4051 || arginfo
[i
].dt
== vect_external_def
)
4052 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4054 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
));
4055 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4057 if (arginfo
[i
].vectype
== NULL
4058 || !constant_multiple_p (bestn
->simdclone
->simdlen
,
4059 simd_clone_subparts (arginfo
[i
].vectype
)))
4063 fndecl
= bestn
->decl
;
4064 nunits
= bestn
->simdclone
->simdlen
;
4065 ncopies
= vector_unroll_factor (vf
, nunits
);
4067 /* If the function isn't const, only allow it in simd loops where user
4068 has asserted that at least nunits consecutive iterations can be
4069 performed using SIMD instructions. */
4070 if ((loop
== NULL
|| maybe_lt ((unsigned) loop
->safelen
, nunits
))
4071 && gimple_vuse (stmt
))
4074 /* Sanity check: make sure that at least one copy of the vectorized stmt
4075 needs to be generated. */
4076 gcc_assert (ncopies
>= 1);
4078 if (!vec_stmt
) /* transformation not required. */
4080 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4081 for (i
= 0; i
< nargs
; i
++)
4082 if ((bestn
->simdclone
->args
[i
].arg_type
4083 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4084 || (bestn
->simdclone
->args
[i
].arg_type
4085 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4087 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4090 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4091 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4092 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4093 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4094 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4095 tree sll
= arginfo
[i
].simd_lane_linear
4096 ? boolean_true_node
: boolean_false_node
;
4097 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4099 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4100 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4101 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4102 dt, slp_node, cost_vec); */
4108 if (dump_enabled_p ())
4109 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4112 scalar_dest
= gimple_call_lhs (stmt
);
4113 vec_dest
= NULL_TREE
;
4118 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4119 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4120 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4123 rtype
= TREE_TYPE (ratype
);
4127 auto_vec
<vec
<tree
> > vec_oprnds
;
4128 auto_vec
<unsigned> vec_oprnds_i
;
4129 vec_oprnds
.safe_grow_cleared (nargs
, true);
4130 vec_oprnds_i
.safe_grow_cleared (nargs
, true);
4131 for (j
= 0; j
< ncopies
; ++j
)
4133 /* Build argument list for the vectorized call. */
4135 vargs
.create (nargs
);
4139 for (i
= 0; i
< nargs
; i
++)
4141 unsigned int k
, l
, m
, o
;
4143 op
= gimple_call_arg (stmt
, i
);
4144 switch (bestn
->simdclone
->args
[i
].arg_type
)
4146 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4147 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4148 o
= vector_unroll_factor (nunits
,
4149 simd_clone_subparts (atype
));
4150 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4152 if (simd_clone_subparts (atype
)
4153 < simd_clone_subparts (arginfo
[i
].vectype
))
4155 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4156 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4157 / simd_clone_subparts (atype
));
4158 gcc_assert ((k
& (k
- 1)) == 0);
4161 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4162 ncopies
* o
/ k
, op
,
4164 vec_oprnds_i
[i
] = 0;
4165 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4169 vec_oprnd0
= arginfo
[i
].op
;
4170 if ((m
& (k
- 1)) == 0)
4171 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4173 arginfo
[i
].op
= vec_oprnd0
;
4175 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4177 bitsize_int ((m
& (k
- 1)) * prec
));
4179 = gimple_build_assign (make_ssa_name (atype
),
4181 vect_finish_stmt_generation (vinfo
, stmt_info
,
4183 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4187 k
= (simd_clone_subparts (atype
)
4188 / simd_clone_subparts (arginfo
[i
].vectype
));
4189 gcc_assert ((k
& (k
- 1)) == 0);
4190 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4192 vec_alloc (ctor_elts
, k
);
4195 for (l
= 0; l
< k
; l
++)
4197 if (m
== 0 && l
== 0)
4199 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4203 vec_oprnds_i
[i
] = 0;
4204 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4207 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4208 arginfo
[i
].op
= vec_oprnd0
;
4211 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4215 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0
),
4219 = build1 (VIEW_CONVERT_EXPR
, atype
, vec_oprnd0
);
4221 = gimple_build_assign (make_ssa_name (atype
),
4223 vect_finish_stmt_generation (vinfo
, stmt_info
,
4225 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4228 vargs
.safe_push (vec_oprnd0
);
4231 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4233 = gimple_build_assign (make_ssa_name (atype
),
4235 vect_finish_stmt_generation (vinfo
, stmt_info
,
4237 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4242 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4243 vargs
.safe_push (op
);
4245 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4246 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4251 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4252 &stmts
, true, NULL_TREE
);
4256 edge pe
= loop_preheader_edge (loop
);
4257 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4258 gcc_assert (!new_bb
);
4260 if (arginfo
[i
].simd_lane_linear
)
4262 vargs
.safe_push (arginfo
[i
].op
);
4265 tree phi_res
= copy_ssa_name (op
);
4266 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4267 add_phi_arg (new_phi
, arginfo
[i
].op
,
4268 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4270 = POINTER_TYPE_P (TREE_TYPE (op
))
4271 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4272 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4273 ? sizetype
: TREE_TYPE (op
);
4275 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4277 tree tcst
= wide_int_to_tree (type
, cst
);
4278 tree phi_arg
= copy_ssa_name (op
);
4280 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4281 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4282 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4283 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4285 arginfo
[i
].op
= phi_res
;
4286 vargs
.safe_push (phi_res
);
4291 = POINTER_TYPE_P (TREE_TYPE (op
))
4292 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4293 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4294 ? sizetype
: TREE_TYPE (op
);
4296 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4298 tree tcst
= wide_int_to_tree (type
, cst
);
4299 new_temp
= make_ssa_name (TREE_TYPE (op
));
4301 = gimple_build_assign (new_temp
, code
,
4302 arginfo
[i
].op
, tcst
);
4303 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4304 vargs
.safe_push (new_temp
);
4307 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4308 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4309 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4310 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4311 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4312 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4318 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4322 || known_eq (simd_clone_subparts (rtype
), nunits
));
4324 new_temp
= create_tmp_var (ratype
);
4325 else if (useless_type_conversion_p (vectype
, rtype
))
4326 new_temp
= make_ssa_name (vec_dest
, new_call
);
4328 new_temp
= make_ssa_name (rtype
, new_call
);
4329 gimple_call_set_lhs (new_call
, new_temp
);
4331 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4332 gimple
*new_stmt
= new_call
;
4336 if (!multiple_p (simd_clone_subparts (vectype
), nunits
))
4339 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4340 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4341 k
= vector_unroll_factor (nunits
,
4342 simd_clone_subparts (vectype
));
4343 gcc_assert ((k
& (k
- 1)) == 0);
4344 for (l
= 0; l
< k
; l
++)
4349 t
= build_fold_addr_expr (new_temp
);
4350 t
= build2 (MEM_REF
, vectype
, t
,
4351 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4354 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4355 bitsize_int (prec
), bitsize_int (l
* prec
));
4356 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4357 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4359 if (j
== 0 && l
== 0)
4360 *vec_stmt
= new_stmt
;
4361 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4365 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4368 else if (!multiple_p (nunits
, simd_clone_subparts (vectype
)))
4370 unsigned int k
= (simd_clone_subparts (vectype
)
4371 / simd_clone_subparts (rtype
));
4372 gcc_assert ((k
& (k
- 1)) == 0);
4373 if ((j
& (k
- 1)) == 0)
4374 vec_alloc (ret_ctor_elts
, k
);
4378 o
= vector_unroll_factor (nunits
,
4379 simd_clone_subparts (rtype
));
4380 for (m
= 0; m
< o
; m
++)
4382 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4383 size_int (m
), NULL_TREE
, NULL_TREE
);
4384 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4386 vect_finish_stmt_generation (vinfo
, stmt_info
,
4388 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4389 gimple_assign_lhs (new_stmt
));
4391 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4394 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4395 if ((j
& (k
- 1)) != k
- 1)
4397 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4399 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4400 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4402 if ((unsigned) j
== k
- 1)
4403 *vec_stmt
= new_stmt
;
4404 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4409 tree t
= build_fold_addr_expr (new_temp
);
4410 t
= build2 (MEM_REF
, vectype
, t
,
4411 build_int_cst (TREE_TYPE (t
), 0));
4412 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4413 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4414 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4416 else if (!useless_type_conversion_p (vectype
, rtype
))
4418 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
4420 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4421 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4426 *vec_stmt
= new_stmt
;
4427 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4430 for (i
= 0; i
< nargs
; ++i
)
4432 vec
<tree
> oprndsi
= vec_oprnds
[i
];
4437 /* The call in STMT might prevent it from being removed in dce.
4438 We however cannot remove it here, due to the way the ssa name
4439 it defines is mapped to the new definition. So just replace
4440 rhs of the statement with something harmless. */
4448 type
= TREE_TYPE (scalar_dest
);
4449 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4450 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4453 new_stmt
= gimple_build_nop ();
4454 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4455 unlink_stmt_vdef (stmt
);
4461 /* Function vect_gen_widened_results_half
4463 Create a vector stmt whose code, type, number of arguments, and result
4464 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4465 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4466 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4467 needs to be created (DECL is a function-decl of a target-builtin).
4468 STMT_INFO is the original scalar stmt that we are vectorizing. */
4471 vect_gen_widened_results_half (vec_info
*vinfo
, enum tree_code code
,
4472 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4473 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4474 stmt_vec_info stmt_info
)
4479 /* Generate half of the widened result: */
4480 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4481 if (op_type
!= binary_op
)
4483 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4484 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4485 gimple_assign_set_lhs (new_stmt
, new_temp
);
4486 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4492 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4493 For multi-step conversions store the resulting vectors and call the function
4497 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4499 stmt_vec_info stmt_info
,
4500 vec
<tree
> &vec_dsts
,
4501 gimple_stmt_iterator
*gsi
,
4502 slp_tree slp_node
, enum tree_code code
)
4505 tree vop0
, vop1
, new_tmp
, vec_dest
;
4507 vec_dest
= vec_dsts
.pop ();
4509 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4511 /* Create demotion operation. */
4512 vop0
= (*vec_oprnds
)[i
];
4513 vop1
= (*vec_oprnds
)[i
+ 1];
4514 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4515 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4516 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4517 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4520 /* Store the resulting vector for next recursive call. */
4521 (*vec_oprnds
)[i
/2] = new_tmp
;
4524 /* This is the last step of the conversion sequence. Store the
4525 vectors in SLP_NODE or in vector info of the scalar statement
4526 (or in STMT_VINFO_RELATED_STMT chain). */
4528 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4530 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4534 /* For multi-step demotion operations we first generate demotion operations
4535 from the source type to the intermediate types, and then combine the
4536 results (stored in VEC_OPRNDS) in demotion operation to the destination
4540 /* At each level of recursion we have half of the operands we had at the
4542 vec_oprnds
->truncate ((i
+1)/2);
4543 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
4545 stmt_info
, vec_dsts
, gsi
,
4546 slp_node
, VEC_PACK_TRUNC_EXPR
);
4549 vec_dsts
.quick_push (vec_dest
);
4553 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4554 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4555 STMT_INFO. For multi-step conversions store the resulting vectors and
4556 call the function recursively. */
4559 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
4560 vec
<tree
> *vec_oprnds0
,
4561 vec
<tree
> *vec_oprnds1
,
4562 stmt_vec_info stmt_info
, tree vec_dest
,
4563 gimple_stmt_iterator
*gsi
,
4564 enum tree_code code1
,
4565 enum tree_code code2
, int op_type
)
4568 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4569 gimple
*new_stmt1
, *new_stmt2
;
4570 vec
<tree
> vec_tmp
= vNULL
;
4572 vec_tmp
.create (vec_oprnds0
->length () * 2);
4573 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4575 if (op_type
== binary_op
)
4576 vop1
= (*vec_oprnds1
)[i
];
4580 /* Generate the two halves of promotion operation. */
4581 new_stmt1
= vect_gen_widened_results_half (vinfo
, code1
, vop0
, vop1
,
4582 op_type
, vec_dest
, gsi
,
4584 new_stmt2
= vect_gen_widened_results_half (vinfo
, code2
, vop0
, vop1
,
4585 op_type
, vec_dest
, gsi
,
4587 if (is_gimple_call (new_stmt1
))
4589 new_tmp1
= gimple_call_lhs (new_stmt1
);
4590 new_tmp2
= gimple_call_lhs (new_stmt2
);
4594 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4595 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4598 /* Store the results for the next step. */
4599 vec_tmp
.quick_push (new_tmp1
);
4600 vec_tmp
.quick_push (new_tmp2
);
4603 vec_oprnds0
->release ();
4604 *vec_oprnds0
= vec_tmp
;
4607 /* Create vectorized promotion stmts for widening stmts using only half the
4608 potential vector size for input. */
4610 vect_create_half_widening_stmts (vec_info
*vinfo
,
4611 vec
<tree
> *vec_oprnds0
,
4612 vec
<tree
> *vec_oprnds1
,
4613 stmt_vec_info stmt_info
, tree vec_dest
,
4614 gimple_stmt_iterator
*gsi
,
4615 enum tree_code code1
,
4623 vec
<tree
> vec_tmp
= vNULL
;
4625 vec_tmp
.create (vec_oprnds0
->length ());
4626 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4628 tree new_tmp1
, new_tmp2
, new_tmp3
, out_type
;
4630 gcc_assert (op_type
== binary_op
);
4631 vop1
= (*vec_oprnds1
)[i
];
4633 /* Widen the first vector input. */
4634 out_type
= TREE_TYPE (vec_dest
);
4635 new_tmp1
= make_ssa_name (out_type
);
4636 new_stmt1
= gimple_build_assign (new_tmp1
, NOP_EXPR
, vop0
);
4637 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt1
, gsi
);
4638 if (VECTOR_TYPE_P (TREE_TYPE (vop1
)))
4640 /* Widen the second vector input. */
4641 new_tmp2
= make_ssa_name (out_type
);
4642 new_stmt2
= gimple_build_assign (new_tmp2
, NOP_EXPR
, vop1
);
4643 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt2
, gsi
);
4644 /* Perform the operation. With both vector inputs widened. */
4645 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, new_tmp2
);
4649 /* Perform the operation. With the single vector input widened. */
4650 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, vop1
);
4653 new_tmp3
= make_ssa_name (vec_dest
, new_stmt3
);
4654 gimple_assign_set_lhs (new_stmt3
, new_tmp3
);
4655 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt3
, gsi
);
4657 /* Store the results for the next step. */
4658 vec_tmp
.quick_push (new_tmp3
);
4661 vec_oprnds0
->release ();
4662 *vec_oprnds0
= vec_tmp
;
4666 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4667 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4668 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4669 Return true if STMT_INFO is vectorizable in this way. */
4672 vectorizable_conversion (vec_info
*vinfo
,
4673 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4674 gimple
**vec_stmt
, slp_tree slp_node
,
4675 stmt_vector_for_cost
*cost_vec
)
4679 tree op0
, op1
= NULL_TREE
;
4680 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4681 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4682 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4684 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4686 poly_uint64 nunits_in
;
4687 poly_uint64 nunits_out
;
4688 tree vectype_out
, vectype_in
;
4690 tree lhs_type
, rhs_type
;
4691 enum { NARROW
, NONE
, WIDEN
} modifier
;
4692 vec
<tree
> vec_oprnds0
= vNULL
;
4693 vec
<tree
> vec_oprnds1
= vNULL
;
4695 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4696 int multi_step_cvt
= 0;
4697 vec
<tree
> interm_types
= vNULL
;
4698 tree intermediate_type
, cvt_type
= NULL_TREE
;
4700 unsigned short fltsz
;
4702 /* Is STMT a vectorizable conversion? */
4704 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4707 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4711 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4715 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4718 code
= gimple_assign_rhs_code (stmt
);
4719 if (!CONVERT_EXPR_CODE_P (code
)
4720 && code
!= FIX_TRUNC_EXPR
4721 && code
!= FLOAT_EXPR
4722 && code
!= WIDEN_PLUS_EXPR
4723 && code
!= WIDEN_MINUS_EXPR
4724 && code
!= WIDEN_MULT_EXPR
4725 && code
!= WIDEN_LSHIFT_EXPR
)
4728 bool widen_arith
= (code
== WIDEN_PLUS_EXPR
4729 || code
== WIDEN_MINUS_EXPR
4730 || code
== WIDEN_MULT_EXPR
4731 || code
== WIDEN_LSHIFT_EXPR
);
4732 op_type
= TREE_CODE_LENGTH (code
);
4734 /* Check types of lhs and rhs. */
4735 scalar_dest
= gimple_assign_lhs (stmt
);
4736 lhs_type
= TREE_TYPE (scalar_dest
);
4737 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4739 /* Check the operands of the operation. */
4740 slp_tree slp_op0
, slp_op1
= NULL
;
4741 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
4742 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
4744 if (dump_enabled_p ())
4745 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4746 "use not simple.\n");
4750 rhs_type
= TREE_TYPE (op0
);
4751 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4752 && !((INTEGRAL_TYPE_P (lhs_type
)
4753 && INTEGRAL_TYPE_P (rhs_type
))
4754 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4755 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4758 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4759 && ((INTEGRAL_TYPE_P (lhs_type
)
4760 && !type_has_mode_precision_p (lhs_type
))
4761 || (INTEGRAL_TYPE_P (rhs_type
)
4762 && !type_has_mode_precision_p (rhs_type
))))
4764 if (dump_enabled_p ())
4765 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4766 "type conversion to/from bit-precision unsupported."
4771 if (op_type
== binary_op
)
4773 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
4774 || code
== WIDEN_PLUS_EXPR
|| code
== WIDEN_MINUS_EXPR
);
4776 op1
= gimple_assign_rhs2 (stmt
);
4778 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
4779 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
4781 if (dump_enabled_p ())
4782 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4783 "use not simple.\n");
4786 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4789 vectype_in
= vectype1_in
;
4792 /* If op0 is an external or constant def, infer the vector type
4793 from the scalar type. */
4795 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
4797 gcc_assert (vectype_in
);
4800 if (dump_enabled_p ())
4801 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4802 "no vectype for scalar type %T\n", rhs_type
);
4807 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4808 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4810 if (dump_enabled_p ())
4811 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4812 "can't convert between boolean and non "
4813 "boolean vectors %T\n", rhs_type
);
4818 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4819 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4820 if (known_eq (nunits_out
, nunits_in
))
4825 else if (multiple_p (nunits_out
, nunits_in
))
4829 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4833 /* Multiple types in SLP are handled by creating the appropriate number of
4834 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4838 else if (modifier
== NARROW
)
4839 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4841 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4843 /* Sanity check: make sure that at least one copy of the vectorized stmt
4844 needs to be generated. */
4845 gcc_assert (ncopies
>= 1);
4847 bool found_mode
= false;
4848 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4849 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4850 opt_scalar_mode rhs_mode_iter
;
4852 /* Supportable by target? */
4856 if (code
!= FIX_TRUNC_EXPR
4857 && code
!= FLOAT_EXPR
4858 && !CONVERT_EXPR_CODE_P (code
))
4860 if (supportable_convert_operation (code
, vectype_out
, vectype_in
, &code1
))
4864 if (dump_enabled_p ())
4865 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4866 "conversion not supported by target.\n");
4870 if (known_eq (nunits_in
, nunits_out
))
4872 if (!supportable_half_widening_operation (code
, vectype_out
,
4873 vectype_in
, &code1
))
4875 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4878 if (supportable_widening_operation (vinfo
, code
, stmt_info
,
4879 vectype_out
, vectype_in
, &code1
,
4880 &code2
, &multi_step_cvt
,
4883 /* Binary widening operation can only be supported directly by the
4885 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4889 if (code
!= FLOAT_EXPR
4890 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4893 fltsz
= GET_MODE_SIZE (lhs_mode
);
4894 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4896 rhs_mode
= rhs_mode_iter
.require ();
4897 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4901 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4902 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4903 if (cvt_type
== NULL_TREE
)
4906 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4908 if (!supportable_convert_operation (code
, vectype_out
,
4909 cvt_type
, &codecvt1
))
4912 else if (!supportable_widening_operation (vinfo
, code
, stmt_info
,
4913 vectype_out
, cvt_type
,
4914 &codecvt1
, &codecvt2
,
4919 gcc_assert (multi_step_cvt
== 0);
4921 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
4923 vectype_in
, &code1
, &code2
,
4924 &multi_step_cvt
, &interm_types
))
4934 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4935 codecvt2
= ERROR_MARK
;
4939 interm_types
.safe_push (cvt_type
);
4940 cvt_type
= NULL_TREE
;
4945 gcc_assert (op_type
== unary_op
);
4946 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4947 &code1
, &multi_step_cvt
,
4951 if (code
!= FIX_TRUNC_EXPR
4952 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4956 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4957 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4958 if (cvt_type
== NULL_TREE
)
4960 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4963 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4964 &code1
, &multi_step_cvt
,
4973 if (!vec_stmt
) /* transformation not required. */
4976 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
4977 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
4979 if (dump_enabled_p ())
4980 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4981 "incompatible vector types for invariants\n");
4984 DUMP_VECT_SCOPE ("vectorizable_conversion");
4985 if (modifier
== NONE
)
4987 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4988 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
4991 else if (modifier
== NARROW
)
4993 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4994 /* The final packing step produces one vector result per copy. */
4995 unsigned int nvectors
4996 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
4997 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
4998 multi_step_cvt
, cost_vec
,
5003 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5004 /* The initial unpacking step produces two vector results
5005 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5006 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5007 unsigned int nvectors
5009 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5011 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5012 multi_step_cvt
, cost_vec
,
5015 interm_types
.release ();
5020 if (dump_enabled_p ())
5021 dump_printf_loc (MSG_NOTE
, vect_location
,
5022 "transform conversion. ncopies = %d.\n", ncopies
);
5024 if (op_type
== binary_op
)
5026 if (CONSTANT_CLASS_P (op0
))
5027 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5028 else if (CONSTANT_CLASS_P (op1
))
5029 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5032 /* In case of multi-step conversion, we first generate conversion operations
5033 to the intermediate types, and then from that types to the final one.
5034 We create vector destinations for the intermediate type (TYPES) received
5035 from supportable_*_operation, and store them in the correct order
5036 for future use in vect_create_vectorized_*_stmts (). */
5037 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5038 vec_dest
= vect_create_destination_var (scalar_dest
,
5039 (cvt_type
&& modifier
== WIDEN
)
5040 ? cvt_type
: vectype_out
);
5041 vec_dsts
.quick_push (vec_dest
);
5045 for (i
= interm_types
.length () - 1;
5046 interm_types
.iterate (i
, &intermediate_type
); i
--)
5048 vec_dest
= vect_create_destination_var (scalar_dest
,
5050 vec_dsts
.quick_push (vec_dest
);
5055 vec_dest
= vect_create_destination_var (scalar_dest
,
5057 ? vectype_out
: cvt_type
);
5062 if (modifier
== WIDEN
)
5064 else if (modifier
== NARROW
)
5067 ninputs
= vect_pow2 (multi_step_cvt
);
5075 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5077 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5079 /* Arguments are ready, create the new vector stmt. */
5080 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5081 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
5082 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5083 gimple_assign_set_lhs (new_stmt
, new_temp
);
5084 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5087 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5089 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5094 /* In case the vectorization factor (VF) is bigger than the number
5095 of elements that we can fit in a vectype (nunits), we have to
5096 generate more than one vector stmt - i.e - we need to "unroll"
5097 the vector stmt by a factor VF/nunits. */
5098 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5100 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
5102 if (code
== WIDEN_LSHIFT_EXPR
)
5104 int oprnds_size
= vec_oprnds0
.length ();
5105 vec_oprnds1
.create (oprnds_size
);
5106 for (i
= 0; i
< oprnds_size
; ++i
)
5107 vec_oprnds1
.quick_push (op1
);
5109 /* Arguments are ready. Create the new vector stmts. */
5110 for (i
= multi_step_cvt
; i
>= 0; i
--)
5112 tree this_dest
= vec_dsts
[i
];
5113 enum tree_code c1
= code1
, c2
= code2
;
5114 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5119 if (known_eq (nunits_out
, nunits_in
))
5120 vect_create_half_widening_stmts (vinfo
, &vec_oprnds0
,
5121 &vec_oprnds1
, stmt_info
,
5125 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5126 &vec_oprnds1
, stmt_info
,
5131 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5136 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5137 new_temp
= make_ssa_name (vec_dest
);
5138 new_stmt
= gimple_build_assign (new_temp
, codecvt1
, vop0
);
5139 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5142 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
5145 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5147 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5152 /* In case the vectorization factor (VF) is bigger than the number
5153 of elements that we can fit in a vectype (nunits), we have to
5154 generate more than one vector stmt - i.e - we need to "unroll"
5155 the vector stmt by a factor VF/nunits. */
5156 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5158 /* Arguments are ready. Create the new vector stmts. */
5160 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5162 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5163 new_temp
= make_ssa_name (vec_dest
);
5165 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5166 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5167 vec_oprnds0
[i
] = new_temp
;
5170 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5172 stmt_info
, vec_dsts
, gsi
,
5177 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5179 vec_oprnds0
.release ();
5180 vec_oprnds1
.release ();
5181 interm_types
.release ();
5186 /* Return true if we can assume from the scalar form of STMT_INFO that
5187 neither the scalar nor the vector forms will generate code. STMT_INFO
5188 is known not to involve a data reference. */
5191 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5193 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5197 tree lhs
= gimple_assign_lhs (stmt
);
5198 tree_code code
= gimple_assign_rhs_code (stmt
);
5199 tree rhs
= gimple_assign_rhs1 (stmt
);
5201 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5204 if (CONVERT_EXPR_CODE_P (code
))
5205 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5210 /* Function vectorizable_assignment.
5212 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5213 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5214 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5215 Return true if STMT_INFO is vectorizable in this way. */
5218 vectorizable_assignment (vec_info
*vinfo
,
5219 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5220 gimple
**vec_stmt
, slp_tree slp_node
,
5221 stmt_vector_for_cost
*cost_vec
)
5226 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5228 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5232 vec
<tree
> vec_oprnds
= vNULL
;
5234 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5235 enum tree_code code
;
5238 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5241 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5245 /* Is vectorizable assignment? */
5246 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5250 scalar_dest
= gimple_assign_lhs (stmt
);
5251 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5254 if (STMT_VINFO_DATA_REF (stmt_info
))
5257 code
= gimple_assign_rhs_code (stmt
);
5258 if (!(gimple_assign_single_p (stmt
)
5259 || code
== PAREN_EXPR
5260 || CONVERT_EXPR_CODE_P (code
)))
5263 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5264 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5266 /* Multiple types in SLP are handled by creating the appropriate number of
5267 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5272 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5274 gcc_assert (ncopies
>= 1);
5277 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5278 &dt
[0], &vectype_in
))
5280 if (dump_enabled_p ())
5281 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5282 "use not simple.\n");
5286 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5288 /* We can handle NOP_EXPR conversions that do not change the number
5289 of elements or the vector size. */
5290 if ((CONVERT_EXPR_CODE_P (code
)
5291 || code
== VIEW_CONVERT_EXPR
)
5293 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5294 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5295 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5298 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
5299 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5301 if (dump_enabled_p ())
5302 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5303 "can't convert between boolean and non "
5304 "boolean vectors %T\n", TREE_TYPE (op
));
5309 /* We do not handle bit-precision changes. */
5310 if ((CONVERT_EXPR_CODE_P (code
)
5311 || code
== VIEW_CONVERT_EXPR
)
5312 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5313 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5314 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5315 /* But a conversion that does not change the bit-pattern is ok. */
5316 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5317 > TYPE_PRECISION (TREE_TYPE (op
)))
5318 && TYPE_UNSIGNED (TREE_TYPE (op
))))
5320 if (dump_enabled_p ())
5321 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5322 "type conversion to/from bit-precision "
5327 if (!vec_stmt
) /* transformation not required. */
5330 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
5332 if (dump_enabled_p ())
5333 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5334 "incompatible vector types for invariants\n");
5337 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5338 DUMP_VECT_SCOPE ("vectorizable_assignment");
5339 if (!vect_nop_conversion_p (stmt_info
))
5340 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5346 if (dump_enabled_p ())
5347 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5350 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5353 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
5355 /* Arguments are ready. create the new vector stmt. */
5356 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5358 if (CONVERT_EXPR_CODE_P (code
)
5359 || code
== VIEW_CONVERT_EXPR
)
5360 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5361 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5362 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5363 gimple_assign_set_lhs (new_stmt
, new_temp
);
5364 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5366 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5368 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5371 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5373 vec_oprnds
.release ();
5378 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5379 either as shift by a scalar or by a vector. */
5382 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5385 machine_mode vec_mode
;
5390 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5394 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5396 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5398 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5400 || (optab_handler (optab
, TYPE_MODE (vectype
))
5401 == CODE_FOR_nothing
))
5405 vec_mode
= TYPE_MODE (vectype
);
5406 icode
= (int) optab_handler (optab
, vec_mode
);
5407 if (icode
== CODE_FOR_nothing
)
5414 /* Function vectorizable_shift.
5416 Check if STMT_INFO performs a shift operation that can be vectorized.
5417 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5418 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5419 Return true if STMT_INFO is vectorizable in this way. */
5422 vectorizable_shift (vec_info
*vinfo
,
5423 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5424 gimple
**vec_stmt
, slp_tree slp_node
,
5425 stmt_vector_for_cost
*cost_vec
)
5429 tree op0
, op1
= NULL
;
5430 tree vec_oprnd1
= NULL_TREE
;
5432 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5433 enum tree_code code
;
5434 machine_mode vec_mode
;
5438 machine_mode optab_op2_mode
;
5439 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5441 poly_uint64 nunits_in
;
5442 poly_uint64 nunits_out
;
5447 vec
<tree
> vec_oprnds0
= vNULL
;
5448 vec
<tree
> vec_oprnds1
= vNULL
;
5451 bool scalar_shift_arg
= true;
5452 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5453 bool incompatible_op1_vectype_p
= false;
5455 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5458 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5459 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5463 /* Is STMT a vectorizable binary/unary operation? */
5464 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5468 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5471 code
= gimple_assign_rhs_code (stmt
);
5473 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5474 || code
== RROTATE_EXPR
))
5477 scalar_dest
= gimple_assign_lhs (stmt
);
5478 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5479 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5481 if (dump_enabled_p ())
5482 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5483 "bit-precision shifts not supported.\n");
5488 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5489 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5491 if (dump_enabled_p ())
5492 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5493 "use not simple.\n");
5496 /* If op0 is an external or constant def, infer the vector type
5497 from the scalar type. */
5499 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
5501 gcc_assert (vectype
);
5504 if (dump_enabled_p ())
5505 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5506 "no vectype for scalar type\n");
5510 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5511 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5512 if (maybe_ne (nunits_out
, nunits_in
))
5515 stmt_vec_info op1_def_stmt_info
;
5517 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
5518 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
5520 if (dump_enabled_p ())
5521 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5522 "use not simple.\n");
5526 /* Multiple types in SLP are handled by creating the appropriate number of
5527 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5532 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5534 gcc_assert (ncopies
>= 1);
5536 /* Determine whether the shift amount is a vector, or scalar. If the
5537 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5539 if ((dt
[1] == vect_internal_def
5540 || dt
[1] == vect_induction_def
5541 || dt
[1] == vect_nested_cycle
)
5543 scalar_shift_arg
= false;
5544 else if (dt
[1] == vect_constant_def
5545 || dt
[1] == vect_external_def
5546 || dt
[1] == vect_internal_def
)
5548 /* In SLP, need to check whether the shift count is the same,
5549 in loops if it is a constant or invariant, it is always
5553 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5554 stmt_vec_info slpstmt_info
;
5556 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5558 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5559 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5560 scalar_shift_arg
= false;
5563 /* For internal SLP defs we have to make sure we see scalar stmts
5564 for all vector elements.
5565 ??? For different vectors we could resort to a different
5566 scalar shift operand but code-generation below simply always
5568 if (dt
[1] == vect_internal_def
5569 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5571 scalar_shift_arg
= false;
5574 /* If the shift amount is computed by a pattern stmt we cannot
5575 use the scalar amount directly thus give up and use a vector
5577 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5578 scalar_shift_arg
= false;
5582 if (dump_enabled_p ())
5583 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5584 "operand mode requires invariant argument.\n");
5588 /* Vector shifted by vector. */
5589 bool was_scalar_shift_arg
= scalar_shift_arg
;
5590 if (!scalar_shift_arg
)
5592 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5593 if (dump_enabled_p ())
5594 dump_printf_loc (MSG_NOTE
, vect_location
,
5595 "vector/vector shift/rotate found.\n");
5598 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
5600 incompatible_op1_vectype_p
5601 = (op1_vectype
== NULL_TREE
5602 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5603 TYPE_VECTOR_SUBPARTS (vectype
))
5604 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5605 if (incompatible_op1_vectype_p
5607 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
5608 || slp_op1
->refcnt
!= 1))
5610 if (dump_enabled_p ())
5611 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5612 "unusable type for last operand in"
5613 " vector/vector shift/rotate.\n");
5617 /* See if the machine has a vector shifted by scalar insn and if not
5618 then see if it has a vector shifted by vector insn. */
5621 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5623 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5625 if (dump_enabled_p ())
5626 dump_printf_loc (MSG_NOTE
, vect_location
,
5627 "vector/scalar shift/rotate found.\n");
5631 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5633 && (optab_handler (optab
, TYPE_MODE (vectype
))
5634 != CODE_FOR_nothing
))
5636 scalar_shift_arg
= false;
5638 if (dump_enabled_p ())
5639 dump_printf_loc (MSG_NOTE
, vect_location
,
5640 "vector/vector shift/rotate found.\n");
5643 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
5647 /* Unlike the other binary operators, shifts/rotates have
5648 the rhs being int, instead of the same type as the lhs,
5649 so make sure the scalar is the right type if we are
5650 dealing with vectors of long long/long/short/char. */
5651 incompatible_op1_vectype_p
5653 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
5655 if (incompatible_op1_vectype_p
5656 && dt
[1] == vect_internal_def
)
5658 if (dump_enabled_p ())
5659 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5660 "unusable type for last operand in"
5661 " vector/vector shift/rotate.\n");
5668 /* Supportable by target? */
5671 if (dump_enabled_p ())
5672 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5676 vec_mode
= TYPE_MODE (vectype
);
5677 icode
= (int) optab_handler (optab
, vec_mode
);
5678 if (icode
== CODE_FOR_nothing
)
5680 if (dump_enabled_p ())
5681 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5682 "op not supported by target.\n");
5685 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5686 if (vect_emulated_vector_p (vectype
))
5689 if (!vec_stmt
) /* transformation not required. */
5692 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5693 || ((!scalar_shift_arg
|| dt
[1] == vect_internal_def
)
5694 && (!incompatible_op1_vectype_p
5695 || dt
[1] == vect_constant_def
)
5696 && !vect_maybe_update_slp_op_vectype
5698 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
5700 if (dump_enabled_p ())
5701 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5702 "incompatible vector types for invariants\n");
5705 /* Now adjust the constant shift amount in place. */
5707 && incompatible_op1_vectype_p
5708 && dt
[1] == vect_constant_def
)
5710 for (unsigned i
= 0;
5711 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
5713 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
5714 = fold_convert (TREE_TYPE (vectype
),
5715 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
5716 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
5720 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5721 DUMP_VECT_SCOPE ("vectorizable_shift");
5722 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
5723 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
5729 if (dump_enabled_p ())
5730 dump_printf_loc (MSG_NOTE
, vect_location
,
5731 "transform binary/unary operation.\n");
5733 if (incompatible_op1_vectype_p
&& !slp_node
)
5735 gcc_assert (!scalar_shift_arg
&& was_scalar_shift_arg
);
5736 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5737 if (dt
[1] != vect_constant_def
)
5738 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
5739 TREE_TYPE (vectype
), NULL
);
5743 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5745 if (scalar_shift_arg
&& dt
[1] != vect_internal_def
)
5747 /* Vector shl and shr insn patterns can be defined with scalar
5748 operand 2 (shift operand). In this case, use constant or loop
5749 invariant op1 directly, without extending it to vector mode
5751 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5752 if (!VECTOR_MODE_P (optab_op2_mode
))
5754 if (dump_enabled_p ())
5755 dump_printf_loc (MSG_NOTE
, vect_location
,
5756 "operand 1 using scalar mode.\n");
5758 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
5759 vec_oprnds1
.quick_push (vec_oprnd1
);
5760 /* Store vec_oprnd1 for every vector stmt to be created.
5761 We check during the analysis that all the shift arguments
5763 TODO: Allow different constants for different vector
5764 stmts generated for an SLP instance. */
5766 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
5767 vec_oprnds1
.quick_push (vec_oprnd1
);
5770 else if (!scalar_shift_arg
&& slp_node
&& incompatible_op1_vectype_p
)
5772 if (was_scalar_shift_arg
)
5774 /* If the argument was the same in all lanes create
5775 the correctly typed vector shift amount directly. */
5776 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5777 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
5778 !loop_vinfo
? gsi
: NULL
);
5779 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
5780 !loop_vinfo
? gsi
: NULL
);
5781 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5782 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
5783 vec_oprnds1
.quick_push (vec_oprnd1
);
5785 else if (dt
[1] == vect_constant_def
)
5786 /* The constant shift amount has been adjusted in place. */
5789 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
5792 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5793 (a special case for certain kind of vector shifts); otherwise,
5794 operand 1 should be of a vector type (the usual case). */
5795 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5797 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
5799 /* Arguments are ready. Create the new vector stmt. */
5800 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5802 /* For internal defs where we need to use a scalar shift arg
5803 extract the first lane. */
5804 if (scalar_shift_arg
&& dt
[1] == vect_internal_def
)
5806 vop1
= vec_oprnds1
[0];
5807 new_temp
= make_ssa_name (TREE_TYPE (TREE_TYPE (vop1
)));
5809 = gimple_build_assign (new_temp
,
5810 build3 (BIT_FIELD_REF
, TREE_TYPE (new_temp
),
5812 TYPE_SIZE (TREE_TYPE (new_temp
)),
5813 bitsize_zero_node
));
5814 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5818 vop1
= vec_oprnds1
[i
];
5819 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5820 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5821 gimple_assign_set_lhs (new_stmt
, new_temp
);
5822 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5824 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5826 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5830 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5832 vec_oprnds0
.release ();
5833 vec_oprnds1
.release ();
5839 /* Function vectorizable_operation.
5841 Check if STMT_INFO performs a binary, unary or ternary operation that can
5843 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5844 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5845 Return true if STMT_INFO is vectorizable in this way. */
5848 vectorizable_operation (vec_info
*vinfo
,
5849 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5850 gimple
**vec_stmt
, slp_tree slp_node
,
5851 stmt_vector_for_cost
*cost_vec
)
5855 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5857 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5858 enum tree_code code
, orig_code
;
5859 machine_mode vec_mode
;
5863 bool target_support_p
;
5864 enum vect_def_type dt
[3]
5865 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5867 poly_uint64 nunits_in
;
5868 poly_uint64 nunits_out
;
5870 int ncopies
, vec_num
;
5872 vec
<tree
> vec_oprnds0
= vNULL
;
5873 vec
<tree
> vec_oprnds1
= vNULL
;
5874 vec
<tree
> vec_oprnds2
= vNULL
;
5875 tree vop0
, vop1
, vop2
;
5876 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5878 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5881 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5885 /* Is STMT a vectorizable binary/unary operation? */
5886 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5890 /* Loads and stores are handled in vectorizable_{load,store}. */
5891 if (STMT_VINFO_DATA_REF (stmt_info
))
5894 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5896 /* Shifts are handled in vectorizable_shift. */
5897 if (code
== LSHIFT_EXPR
5898 || code
== RSHIFT_EXPR
5899 || code
== LROTATE_EXPR
5900 || code
== RROTATE_EXPR
)
5903 /* Comparisons are handled in vectorizable_comparison. */
5904 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
5907 /* Conditions are handled in vectorizable_condition. */
5908 if (code
== COND_EXPR
)
5911 /* For pointer addition and subtraction, we should use the normal
5912 plus and minus for the vector operation. */
5913 if (code
== POINTER_PLUS_EXPR
)
5915 if (code
== POINTER_DIFF_EXPR
)
5918 /* Support only unary or binary operations. */
5919 op_type
= TREE_CODE_LENGTH (code
);
5920 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5922 if (dump_enabled_p ())
5923 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5924 "num. args = %d (not unary/binary/ternary op).\n",
5929 scalar_dest
= gimple_assign_lhs (stmt
);
5930 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5932 /* Most operations cannot handle bit-precision types without extra
5934 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
5936 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5937 /* Exception are bitwise binary operations. */
5938 && code
!= BIT_IOR_EXPR
5939 && code
!= BIT_XOR_EXPR
5940 && code
!= BIT_AND_EXPR
)
5942 if (dump_enabled_p ())
5943 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5944 "bit-precision arithmetic not supported.\n");
5949 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5950 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5952 if (dump_enabled_p ())
5953 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5954 "use not simple.\n");
5957 /* If op0 is an external or constant def, infer the vector type
5958 from the scalar type. */
5961 /* For boolean type we cannot determine vectype by
5962 invariant value (don't know whether it is a vector
5963 of booleans or vector of integers). We use output
5964 vectype because operations on boolean don't change
5966 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5968 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5970 if (dump_enabled_p ())
5971 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5972 "not supported operation on bool value.\n");
5975 vectype
= vectype_out
;
5978 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
5982 gcc_assert (vectype
);
5985 if (dump_enabled_p ())
5986 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5987 "no vectype for scalar type %T\n",
5993 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5994 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5995 if (maybe_ne (nunits_out
, nunits_in
))
5998 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
5999 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6000 if (op_type
== binary_op
|| op_type
== ternary_op
)
6002 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6003 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6005 if (dump_enabled_p ())
6006 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6007 "use not simple.\n");
6011 if (op_type
== ternary_op
)
6013 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6014 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6016 if (dump_enabled_p ())
6017 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6018 "use not simple.\n");
6023 /* Multiple types in SLP are handled by creating the appropriate number of
6024 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6029 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6033 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6037 gcc_assert (ncopies
>= 1);
6039 /* Reject attempts to combine mask types with nonmask types, e.g. if
6040 we have an AND between a (nonmask) boolean loaded from memory and
6041 a (mask) boolean result of a comparison.
6043 TODO: We could easily fix these cases up using pattern statements. */
6044 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6045 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6046 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6048 if (dump_enabled_p ())
6049 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6050 "mixed mask and nonmask vector types\n");
6054 /* Supportable by target? */
6056 vec_mode
= TYPE_MODE (vectype
);
6057 if (code
== MULT_HIGHPART_EXPR
)
6058 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6061 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6064 if (dump_enabled_p ())
6065 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6069 target_support_p
= (optab_handler (optab
, vec_mode
)
6070 != CODE_FOR_nothing
);
6073 bool using_emulated_vectors_p
= vect_emulated_vector_p (vectype
);
6074 if (!target_support_p
)
6076 if (dump_enabled_p ())
6077 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6078 "op not supported by target.\n");
6079 /* Check only during analysis. */
6080 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6081 || (!vec_stmt
&& !vect_can_vectorize_without_simd_p (code
)))
6083 if (dump_enabled_p ())
6084 dump_printf_loc (MSG_NOTE
, vect_location
,
6085 "proceeding using word mode.\n");
6086 using_emulated_vectors_p
= true;
6089 if (using_emulated_vectors_p
6090 && !vect_can_vectorize_without_simd_p (code
))
6092 if (dump_enabled_p ())
6093 dump_printf (MSG_NOTE
, "using word mode not possible.\n");
6097 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6098 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6099 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6101 if (!vec_stmt
) /* transformation not required. */
6103 /* If this operation is part of a reduction, a fully-masked loop
6104 should only change the active lanes of the reduction chain,
6105 keeping the inactive lanes as-is. */
6107 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
6110 if (cond_fn
== IFN_LAST
6111 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
6112 OPTIMIZE_FOR_SPEED
))
6114 if (dump_enabled_p ())
6115 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6116 "can't use a fully-masked loop because no"
6117 " conditional operation is available.\n");
6118 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
6121 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6125 /* Put types on constant and invariant SLP children. */
6127 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6128 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6129 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6131 if (dump_enabled_p ())
6132 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6133 "incompatible vector types for invariants\n");
6137 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6138 DUMP_VECT_SCOPE ("vectorizable_operation");
6139 vect_model_simple_cost (vinfo
, stmt_info
,
6140 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6141 if (using_emulated_vectors_p
)
6143 /* The above vect_model_simple_cost call handles constants
6144 in the prologue and (mis-)costs one of the stmts as
6145 vector stmt. See tree-vect-generic.c:do_plus_minus/do_negate
6146 for the actual lowering that will be applied. */
6148 = slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
;
6162 record_stmt_cost (cost_vec
, n
, scalar_stmt
, stmt_info
, 0, vect_body
);
6169 if (dump_enabled_p ())
6170 dump_printf_loc (MSG_NOTE
, vect_location
,
6171 "transform binary/unary operation.\n");
6173 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6175 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6176 vectors with unsigned elements, but the result is signed. So, we
6177 need to compute the MINUS_EXPR into vectype temporary and
6178 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6179 tree vec_cvt_dest
= NULL_TREE
;
6180 if (orig_code
== POINTER_DIFF_EXPR
)
6182 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6183 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6187 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6189 /* In case the vectorization factor (VF) is bigger than the number
6190 of elements that we can fit in a vectype (nunits), we have to generate
6191 more than one vector stmt - i.e - we need to "unroll" the
6192 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6193 from one copy of the vector stmt to the next, in the field
6194 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6195 stages to find the correct vector defs to be used when vectorizing
6196 stmts that use the defs of the current stmt. The example below
6197 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6198 we need to create 4 vectorized stmts):
6200 before vectorization:
6201 RELATED_STMT VEC_STMT
6205 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6207 RELATED_STMT VEC_STMT
6208 VS1_0: vx0 = memref0 VS1_1 -
6209 VS1_1: vx1 = memref1 VS1_2 -
6210 VS1_2: vx2 = memref2 VS1_3 -
6211 VS1_3: vx3 = memref3 - -
6212 S1: x = load - VS1_0
6215 step2: vectorize stmt S2 (done here):
6216 To vectorize stmt S2 we first need to find the relevant vector
6217 def for the first operand 'x'. This is, as usual, obtained from
6218 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6219 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6220 relevant vector def 'vx0'. Having found 'vx0' we can generate
6221 the vector stmt VS2_0, and as usual, record it in the
6222 STMT_VINFO_VEC_STMT of stmt S2.
6223 When creating the second copy (VS2_1), we obtain the relevant vector
6224 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6225 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6226 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6227 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6228 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6229 chain of stmts and pointers:
6230 RELATED_STMT VEC_STMT
6231 VS1_0: vx0 = memref0 VS1_1 -
6232 VS1_1: vx1 = memref1 VS1_2 -
6233 VS1_2: vx2 = memref2 VS1_3 -
6234 VS1_3: vx3 = memref3 - -
6235 S1: x = load - VS1_0
6236 VS2_0: vz0 = vx0 + v1 VS2_1 -
6237 VS2_1: vz1 = vx1 + v1 VS2_2 -
6238 VS2_2: vz2 = vx2 + v1 VS2_3 -
6239 VS2_3: vz3 = vx3 + v1 - -
6240 S2: z = x + 1 - VS2_0 */
6242 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6243 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
6244 /* Arguments are ready. Create the new vector stmt. */
6245 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6247 gimple
*new_stmt
= NULL
;
6248 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6249 ? vec_oprnds1
[i
] : NULL_TREE
);
6250 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
6251 if (masked_loop_p
&& reduc_idx
>= 0)
6253 /* Perform the operation on active elements only and take
6254 inactive elements from the reduction chain input. */
6256 vop2
= reduc_idx
== 1 ? vop1
: vop0
;
6257 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6259 gcall
*call
= gimple_build_call_internal (cond_fn
, 4, mask
,
6261 new_temp
= make_ssa_name (vec_dest
, call
);
6262 gimple_call_set_lhs (call
, new_temp
);
6263 gimple_call_set_nothrow (call
, true);
6264 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
6269 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
6270 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6271 gimple_assign_set_lhs (new_stmt
, new_temp
);
6272 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6275 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6276 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6278 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6279 gimple_assign_set_lhs (new_stmt
, new_temp
);
6280 vect_finish_stmt_generation (vinfo
, stmt_info
,
6285 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6287 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6291 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6293 vec_oprnds0
.release ();
6294 vec_oprnds1
.release ();
6295 vec_oprnds2
.release ();
6300 /* A helper function to ensure data reference DR_INFO's base alignment. */
6303 ensure_base_align (dr_vec_info
*dr_info
)
6305 if (dr_info
->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6308 if (dr_info
->base_misaligned
)
6310 tree base_decl
= dr_info
->base_decl
;
6312 // We should only be able to increase the alignment of a base object if
6313 // we know what its new alignment should be at compile time.
6314 unsigned HOST_WIDE_INT align_base_to
=
6315 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6317 if (decl_in_symtab_p (base_decl
))
6318 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6319 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6321 SET_DECL_ALIGN (base_decl
, align_base_to
);
6322 DECL_USER_ALIGN (base_decl
) = 1;
6324 dr_info
->base_misaligned
= false;
6329 /* Function get_group_alias_ptr_type.
6331 Return the alias type for the group starting at FIRST_STMT_INFO. */
6334 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6336 struct data_reference
*first_dr
, *next_dr
;
6338 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6339 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6340 while (next_stmt_info
)
6342 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6343 if (get_alias_set (DR_REF (first_dr
))
6344 != get_alias_set (DR_REF (next_dr
)))
6346 if (dump_enabled_p ())
6347 dump_printf_loc (MSG_NOTE
, vect_location
,
6348 "conflicting alias set types.\n");
6349 return ptr_type_node
;
6351 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6353 return reference_alias_ptr_type (DR_REF (first_dr
));
6357 /* Function scan_operand_equal_p.
6359 Helper function for check_scan_store. Compare two references
6360 with .GOMP_SIMD_LANE bases. */
6363 scan_operand_equal_p (tree ref1
, tree ref2
)
6365 tree ref
[2] = { ref1
, ref2
};
6366 poly_int64 bitsize
[2], bitpos
[2];
6367 tree offset
[2], base
[2];
6368 for (int i
= 0; i
< 2; ++i
)
6371 int unsignedp
, reversep
, volatilep
= 0;
6372 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6373 &offset
[i
], &mode
, &unsignedp
,
6374 &reversep
, &volatilep
);
6375 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6377 if (TREE_CODE (base
[i
]) == MEM_REF
6378 && offset
[i
] == NULL_TREE
6379 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6381 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6382 if (is_gimple_assign (def_stmt
)
6383 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6384 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6385 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6387 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6389 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6390 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6395 if (!operand_equal_p (base
[0], base
[1], 0))
6397 if (maybe_ne (bitsize
[0], bitsize
[1]))
6399 if (offset
[0] != offset
[1])
6401 if (!offset
[0] || !offset
[1])
6403 if (!operand_equal_p (offset
[0], offset
[1], 0))
6406 for (int i
= 0; i
< 2; ++i
)
6408 step
[i
] = integer_one_node
;
6409 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6411 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6412 if (is_gimple_assign (def_stmt
)
6413 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6414 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6417 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6418 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6421 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6423 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6424 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6426 tree rhs1
= NULL_TREE
;
6427 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6429 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6430 if (gimple_assign_cast_p (def_stmt
))
6431 rhs1
= gimple_assign_rhs1 (def_stmt
);
6433 else if (CONVERT_EXPR_P (offset
[i
]))
6434 rhs1
= TREE_OPERAND (offset
[i
], 0);
6436 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6437 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6438 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6439 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6442 if (!operand_equal_p (offset
[0], offset
[1], 0)
6443 || !operand_equal_p (step
[0], step
[1], 0))
6451 enum scan_store_kind
{
6452 /* Normal permutation. */
6453 scan_store_kind_perm
,
6455 /* Whole vector left shift permutation with zero init. */
6456 scan_store_kind_lshift_zero
,
6458 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6459 scan_store_kind_lshift_cond
6462 /* Function check_scan_store.
6464 Verify if we can perform the needed permutations or whole vector shifts.
6465 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6466 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6467 to do at each step. */
6470 scan_store_can_perm_p (tree vectype
, tree init
,
6471 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6473 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6474 unsigned HOST_WIDE_INT nunits
;
6475 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6477 int units_log2
= exact_log2 (nunits
);
6478 if (units_log2
<= 0)
6482 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6483 for (i
= 0; i
<= units_log2
; ++i
)
6485 unsigned HOST_WIDE_INT j
, k
;
6486 enum scan_store_kind kind
= scan_store_kind_perm
;
6487 vec_perm_builder
sel (nunits
, nunits
, 1);
6488 sel
.quick_grow (nunits
);
6489 if (i
== units_log2
)
6491 for (j
= 0; j
< nunits
; ++j
)
6492 sel
[j
] = nunits
- 1;
6496 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6498 for (k
= 0; j
< nunits
; ++j
, ++k
)
6499 sel
[j
] = nunits
+ k
;
6501 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6502 if (!can_vec_perm_const_p (vec_mode
, indices
))
6504 if (i
== units_log2
)
6507 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6509 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6511 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6512 /* Whole vector shifts shift in zeros, so if init is all zero
6513 constant, there is no need to do anything further. */
6514 if ((TREE_CODE (init
) != INTEGER_CST
6515 && TREE_CODE (init
) != REAL_CST
)
6516 || !initializer_zerop (init
))
6518 tree masktype
= truth_type_for (vectype
);
6519 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6521 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6524 kind
= whole_vector_shift_kind
;
6526 if (use_whole_vector
)
6528 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6529 use_whole_vector
->safe_grow_cleared (i
, true);
6530 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6531 use_whole_vector
->safe_push (kind
);
6539 /* Function check_scan_store.
6541 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6544 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
6545 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6546 vect_memory_access_type memory_access_type
)
6548 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6549 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6552 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6555 || memory_access_type
!= VMAT_CONTIGUOUS
6556 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6557 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6558 || loop_vinfo
== NULL
6559 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6560 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6561 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
6562 || !integer_zerop (DR_INIT (dr_info
->dr
))
6563 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6564 || !alias_sets_conflict_p (get_alias_set (vectype
),
6565 get_alias_set (TREE_TYPE (ref_type
))))
6567 if (dump_enabled_p ())
6568 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6569 "unsupported OpenMP scan store.\n");
6573 /* We need to pattern match code built by OpenMP lowering and simplified
6574 by following optimizations into something we can handle.
6575 #pragma omp simd reduction(inscan,+:r)
6579 #pragma omp scan inclusive (r)
6582 shall have body with:
6583 // Initialization for input phase, store the reduction initializer:
6584 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6585 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6587 // Actual input phase:
6589 r.0_5 = D.2042[_20];
6592 // Initialization for scan phase:
6593 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6599 // Actual scan phase:
6601 r.1_8 = D.2042[_20];
6603 The "omp simd array" variable D.2042 holds the privatized copy used
6604 inside of the loop and D.2043 is another one that holds copies of
6605 the current original list item. The separate GOMP_SIMD_LANE ifn
6606 kinds are there in order to allow optimizing the initializer store
6607 and combiner sequence, e.g. if it is originally some C++ish user
6608 defined reduction, but allow the vectorizer to pattern recognize it
6609 and turn into the appropriate vectorized scan.
6611 For exclusive scan, this is slightly different:
6612 #pragma omp simd reduction(inscan,+:r)
6616 #pragma omp scan exclusive (r)
6619 shall have body with:
6620 // Initialization for input phase, store the reduction initializer:
6621 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6622 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6624 // Actual input phase:
6626 r.0_5 = D.2042[_20];
6629 // Initialization for scan phase:
6630 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6636 // Actual scan phase:
6638 r.1_8 = D.2044[_20];
6641 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6643 /* Match the D.2042[_21] = 0; store above. Just require that
6644 it is a constant or external definition store. */
6645 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6648 if (dump_enabled_p ())
6649 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6650 "unsupported OpenMP scan initializer store.\n");
6654 if (! loop_vinfo
->scan_map
)
6655 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6656 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6657 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6660 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6662 /* These stores can be vectorized normally. */
6666 if (rhs_dt
!= vect_internal_def
)
6669 if (dump_enabled_p ())
6670 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6671 "unsupported OpenMP scan combiner pattern.\n");
6675 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6676 tree rhs
= gimple_assign_rhs1 (stmt
);
6677 if (TREE_CODE (rhs
) != SSA_NAME
)
6680 gimple
*other_store_stmt
= NULL
;
6681 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6682 bool inscan_var_store
6683 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6685 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6687 if (!inscan_var_store
)
6689 use_operand_p use_p
;
6690 imm_use_iterator iter
;
6691 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6693 gimple
*use_stmt
= USE_STMT (use_p
);
6694 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6696 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6697 || !is_gimple_assign (use_stmt
)
6698 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6700 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6702 other_store_stmt
= use_stmt
;
6704 if (other_store_stmt
== NULL
)
6706 rhs
= gimple_assign_lhs (other_store_stmt
);
6707 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6711 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6713 use_operand_p use_p
;
6714 imm_use_iterator iter
;
6715 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6717 gimple
*use_stmt
= USE_STMT (use_p
);
6718 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6720 if (other_store_stmt
)
6722 other_store_stmt
= use_stmt
;
6728 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6729 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6730 || !is_gimple_assign (def_stmt
)
6731 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6734 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6735 /* For pointer addition, we should use the normal plus for the vector
6739 case POINTER_PLUS_EXPR
:
6742 case MULT_HIGHPART_EXPR
:
6747 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6750 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6751 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6752 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6755 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6756 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6757 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6758 || !gimple_assign_load_p (load1_stmt
)
6759 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6760 || !gimple_assign_load_p (load2_stmt
))
6763 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6764 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6765 if (load1_stmt_info
== NULL
6766 || load2_stmt_info
== NULL
6767 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
6768 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
6769 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
6770 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6773 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
6775 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6776 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
6777 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
6779 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6781 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6785 use_operand_p use_p
;
6786 imm_use_iterator iter
;
6787 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
6789 gimple
*use_stmt
= USE_STMT (use_p
);
6790 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
6792 if (other_store_stmt
)
6794 other_store_stmt
= use_stmt
;
6798 if (other_store_stmt
== NULL
)
6800 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
6801 || !gimple_store_p (other_store_stmt
))
6804 stmt_vec_info other_store_stmt_info
6805 = loop_vinfo
->lookup_stmt (other_store_stmt
);
6806 if (other_store_stmt_info
== NULL
6807 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
6808 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6811 gimple
*stmt1
= stmt
;
6812 gimple
*stmt2
= other_store_stmt
;
6813 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6814 std::swap (stmt1
, stmt2
);
6815 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6816 gimple_assign_rhs1 (load2_stmt
)))
6818 std::swap (rhs1
, rhs2
);
6819 std::swap (load1_stmt
, load2_stmt
);
6820 std::swap (load1_stmt_info
, load2_stmt_info
);
6822 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6823 gimple_assign_rhs1 (load1_stmt
)))
6826 tree var3
= NULL_TREE
;
6827 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
6828 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
6829 gimple_assign_rhs1 (load2_stmt
)))
6831 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6833 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6834 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
6835 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
6837 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6838 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
6839 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
6840 || lookup_attribute ("omp simd inscan exclusive",
6841 DECL_ATTRIBUTES (var3
)))
6845 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
6846 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
6847 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
6850 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6851 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
6852 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
6853 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
6854 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6855 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
6858 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6859 std::swap (var1
, var2
);
6861 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6863 if (!lookup_attribute ("omp simd inscan exclusive",
6864 DECL_ATTRIBUTES (var1
)))
6869 if (loop_vinfo
->scan_map
== NULL
)
6871 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
6875 /* The IL is as expected, now check if we can actually vectorize it.
6882 should be vectorized as (where _40 is the vectorized rhs
6883 from the D.2042[_21] = 0; store):
6884 _30 = MEM <vector(8) int> [(int *)&D.2043];
6885 _31 = MEM <vector(8) int> [(int *)&D.2042];
6886 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6888 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6889 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6891 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6892 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6893 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6895 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6896 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6898 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6899 MEM <vector(8) int> [(int *)&D.2043] = _39;
6900 MEM <vector(8) int> [(int *)&D.2042] = _38;
6907 should be vectorized as (where _40 is the vectorized rhs
6908 from the D.2042[_21] = 0; store):
6909 _30 = MEM <vector(8) int> [(int *)&D.2043];
6910 _31 = MEM <vector(8) int> [(int *)&D.2042];
6911 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6912 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6914 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6915 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6916 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6918 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6919 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6920 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6922 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6923 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6926 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6927 MEM <vector(8) int> [(int *)&D.2044] = _39;
6928 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6929 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6930 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6931 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
6934 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
6935 if (units_log2
== -1)
6942 /* Function vectorizable_scan_store.
6944 Helper of vectorizable_score, arguments like on vectorizable_store.
6945 Handle only the transformation, checking is done in check_scan_store. */
6948 vectorizable_scan_store (vec_info
*vinfo
,
6949 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6950 gimple
**vec_stmt
, int ncopies
)
6952 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6953 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6954 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
6955 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6957 if (dump_enabled_p ())
6958 dump_printf_loc (MSG_NOTE
, vect_location
,
6959 "transform scan store. ncopies = %d\n", ncopies
);
6961 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6962 tree rhs
= gimple_assign_rhs1 (stmt
);
6963 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
6965 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6966 bool inscan_var_store
6967 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6969 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6971 use_operand_p use_p
;
6972 imm_use_iterator iter
;
6973 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6975 gimple
*use_stmt
= USE_STMT (use_p
);
6976 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6978 rhs
= gimple_assign_lhs (use_stmt
);
6983 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6984 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6985 if (code
== POINTER_PLUS_EXPR
)
6987 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
6988 && commutative_tree_code (code
));
6989 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6990 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6991 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
6992 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6993 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6994 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6995 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6996 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6997 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6998 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6999 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7001 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7003 std::swap (rhs1
, rhs2
);
7004 std::swap (var1
, var2
);
7005 std::swap (load1_dr_info
, load2_dr_info
);
7008 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7011 unsigned HOST_WIDE_INT nunits
;
7012 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7014 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7015 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7016 gcc_assert (units_log2
> 0);
7017 auto_vec
<tree
, 16> perms
;
7018 perms
.quick_grow (units_log2
+ 1);
7019 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7020 for (int i
= 0; i
<= units_log2
; ++i
)
7022 unsigned HOST_WIDE_INT j
, k
;
7023 vec_perm_builder
sel (nunits
, nunits
, 1);
7024 sel
.quick_grow (nunits
);
7025 if (i
== units_log2
)
7026 for (j
= 0; j
< nunits
; ++j
)
7027 sel
[j
] = nunits
- 1;
7030 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7032 for (k
= 0; j
< nunits
; ++j
, ++k
)
7033 sel
[j
] = nunits
+ k
;
7035 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7036 if (!use_whole_vector
.is_empty ()
7037 && use_whole_vector
[i
] != scan_store_kind_perm
)
7039 if (zero_vec
== NULL_TREE
)
7040 zero_vec
= build_zero_cst (vectype
);
7041 if (masktype
== NULL_TREE
7042 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7043 masktype
= truth_type_for (vectype
);
7044 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7047 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7050 tree vec_oprnd1
= NULL_TREE
;
7051 tree vec_oprnd2
= NULL_TREE
;
7052 tree vec_oprnd3
= NULL_TREE
;
7053 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7054 tree dataref_offset
= build_int_cst (ref_type
, 0);
7055 tree bump
= vect_get_data_ptr_increment (vinfo
, dr_info
,
7056 vectype
, VMAT_CONTIGUOUS
);
7057 tree ldataref_ptr
= NULL_TREE
;
7058 tree orig
= NULL_TREE
;
7059 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7060 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7061 auto_vec
<tree
> vec_oprnds1
;
7062 auto_vec
<tree
> vec_oprnds2
;
7063 auto_vec
<tree
> vec_oprnds3
;
7064 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
7065 *init
, &vec_oprnds1
,
7066 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
7067 rhs2
, &vec_oprnds3
);
7068 for (int j
= 0; j
< ncopies
; j
++)
7070 vec_oprnd1
= vec_oprnds1
[j
];
7071 if (ldataref_ptr
== NULL
)
7072 vec_oprnd2
= vec_oprnds2
[j
];
7073 vec_oprnd3
= vec_oprnds3
[j
];
7076 else if (!inscan_var_store
)
7077 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7081 vec_oprnd2
= make_ssa_name (vectype
);
7082 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7083 unshare_expr (ldataref_ptr
),
7085 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7086 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7087 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7088 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7089 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7092 tree v
= vec_oprnd2
;
7093 for (int i
= 0; i
< units_log2
; ++i
)
7095 tree new_temp
= make_ssa_name (vectype
);
7096 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7098 && (use_whole_vector
[i
]
7099 != scan_store_kind_perm
))
7100 ? zero_vec
: vec_oprnd1
, v
,
7102 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7103 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7104 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7106 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7108 /* Whole vector shift shifted in zero bits, but if *init
7109 is not initializer_zerop, we need to replace those elements
7110 with elements from vec_oprnd1. */
7111 tree_vector_builder
vb (masktype
, nunits
, 1);
7112 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7113 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7114 ? boolean_false_node
: boolean_true_node
);
7116 tree new_temp2
= make_ssa_name (vectype
);
7117 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7118 new_temp
, vec_oprnd1
);
7119 vect_finish_stmt_generation (vinfo
, stmt_info
,
7121 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7122 new_temp
= new_temp2
;
7125 /* For exclusive scan, perform the perms[i] permutation once
7128 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7136 tree new_temp2
= make_ssa_name (vectype
);
7137 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7138 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7139 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7144 tree new_temp
= make_ssa_name (vectype
);
7145 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7146 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7147 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7149 tree last_perm_arg
= new_temp
;
7150 /* For exclusive scan, new_temp computed above is the exclusive scan
7151 prefix sum. Turn it into inclusive prefix sum for the broadcast
7152 of the last element into orig. */
7153 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7155 last_perm_arg
= make_ssa_name (vectype
);
7156 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7157 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7158 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7161 orig
= make_ssa_name (vectype
);
7162 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7163 last_perm_arg
, perms
[units_log2
]);
7164 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7165 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7167 if (!inscan_var_store
)
7169 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7170 unshare_expr (dataref_ptr
),
7172 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7173 g
= gimple_build_assign (data_ref
, new_temp
);
7174 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7175 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7179 if (inscan_var_store
)
7180 for (int j
= 0; j
< ncopies
; j
++)
7183 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7185 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7186 unshare_expr (dataref_ptr
),
7188 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7189 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7190 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7191 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7197 /* Function vectorizable_store.
7199 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7200 that can be vectorized.
7201 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7202 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7203 Return true if STMT_INFO is vectorizable in this way. */
7206 vectorizable_store (vec_info
*vinfo
,
7207 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7208 gimple
**vec_stmt
, slp_tree slp_node
,
7209 stmt_vector_for_cost
*cost_vec
)
7213 tree vec_oprnd
= NULL_TREE
;
7215 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7216 class loop
*loop
= NULL
;
7217 machine_mode vec_mode
;
7219 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7220 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7221 tree dataref_ptr
= NULL_TREE
;
7222 tree dataref_offset
= NULL_TREE
;
7223 gimple
*ptr_incr
= NULL
;
7226 stmt_vec_info first_stmt_info
;
7228 unsigned int group_size
, i
;
7229 vec
<tree
> oprnds
= vNULL
;
7230 vec
<tree
> result_chain
= vNULL
;
7231 tree offset
= NULL_TREE
;
7232 vec
<tree
> vec_oprnds
= vNULL
;
7233 bool slp
= (slp_node
!= NULL
);
7234 unsigned int vec_num
;
7235 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
7237 gather_scatter_info gs_info
;
7239 vec_load_store_type vls_type
;
7242 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7245 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7249 /* Is vectorizable store? */
7251 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7252 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7254 tree scalar_dest
= gimple_assign_lhs (assign
);
7255 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7256 && is_pattern_stmt_p (stmt_info
))
7257 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7258 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7259 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7260 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7261 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7262 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7263 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7264 && TREE_CODE (scalar_dest
) != MEM_REF
)
7269 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7270 if (!call
|| !gimple_call_internal_p (call
))
7273 internal_fn ifn
= gimple_call_internal_fn (call
);
7274 if (!internal_store_fn_p (ifn
))
7277 if (slp_node
!= NULL
)
7279 if (dump_enabled_p ())
7280 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7281 "SLP of masked stores not supported.\n");
7285 int mask_index
= internal_fn_mask_index (ifn
);
7287 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
7288 &mask
, NULL
, &mask_dt
, &mask_vectype
))
7292 op
= vect_get_store_rhs (stmt_info
);
7294 /* Cannot have hybrid store SLP -- that would mean storing to the
7295 same location twice. */
7296 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7298 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7299 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7303 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7304 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7309 /* Multiple types in SLP are handled by creating the appropriate number of
7310 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7315 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7317 gcc_assert (ncopies
>= 1);
7319 /* FORNOW. This restriction should be relaxed. */
7320 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7322 if (dump_enabled_p ())
7323 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7324 "multiple types in nested loop.\n");
7328 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
7329 op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7332 elem_type
= TREE_TYPE (vectype
);
7333 vec_mode
= TYPE_MODE (vectype
);
7335 if (!STMT_VINFO_DATA_REF (stmt_info
))
7338 vect_memory_access_type memory_access_type
;
7339 enum dr_alignment_support alignment_support_scheme
;
7340 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, vls_type
,
7341 ncopies
, &memory_access_type
,
7342 &alignment_support_scheme
, &gs_info
))
7347 if (memory_access_type
== VMAT_CONTIGUOUS
)
7349 if (!VECTOR_MODE_P (vec_mode
)
7350 || !can_vec_mask_load_store_p (vec_mode
,
7351 TYPE_MODE (mask_vectype
), false))
7354 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7355 && (memory_access_type
!= VMAT_GATHER_SCATTER
7356 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7358 if (dump_enabled_p ())
7359 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7360 "unsupported access type for masked store.\n");
7366 /* FORNOW. In some cases can vectorize even if data-type not supported
7367 (e.g. - array initialization with 0). */
7368 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7372 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7373 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7374 && memory_access_type
!= VMAT_GATHER_SCATTER
7375 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7378 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7379 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7380 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7384 first_stmt_info
= stmt_info
;
7385 first_dr_info
= dr_info
;
7386 group_size
= vec_num
= 1;
7389 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7391 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7392 memory_access_type
))
7396 if (!vec_stmt
) /* transformation not required. */
7398 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7401 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
7402 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, vls_type
,
7403 group_size
, memory_access_type
,
7407 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
7410 if (dump_enabled_p ())
7411 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7412 "incompatible vector types for invariants\n");
7416 if (dump_enabled_p ()
7417 && memory_access_type
!= VMAT_ELEMENTWISE
7418 && memory_access_type
!= VMAT_GATHER_SCATTER
7419 && alignment_support_scheme
!= dr_aligned
)
7420 dump_printf_loc (MSG_NOTE
, vect_location
,
7421 "Vectorizing an unaligned access.\n");
7423 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7424 vect_model_store_cost (vinfo
, stmt_info
, ncopies
,
7425 memory_access_type
, vls_type
, slp_node
, cost_vec
);
7428 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7432 ensure_base_align (dr_info
);
7434 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7436 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7437 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7438 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7439 tree ptr
, var
, scale
, vec_mask
;
7440 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7441 tree mask_halfvectype
= mask_vectype
;
7442 edge pe
= loop_preheader_edge (loop
);
7445 enum { NARROW
, NONE
, WIDEN
} modifier
;
7446 poly_uint64 scatter_off_nunits
7447 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7449 if (known_eq (nunits
, scatter_off_nunits
))
7451 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7455 /* Currently gathers and scatters are only supported for
7456 fixed-length vectors. */
7457 unsigned int count
= scatter_off_nunits
.to_constant ();
7458 vec_perm_builder
sel (count
, count
, 1);
7459 for (i
= 0; i
< (unsigned int) count
; ++i
)
7460 sel
.quick_push (i
| (count
/ 2));
7462 vec_perm_indices
indices (sel
, 1, count
);
7463 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7465 gcc_assert (perm_mask
!= NULL_TREE
);
7467 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7471 /* Currently gathers and scatters are only supported for
7472 fixed-length vectors. */
7473 unsigned int count
= nunits
.to_constant ();
7474 vec_perm_builder
sel (count
, count
, 1);
7475 for (i
= 0; i
< (unsigned int) count
; ++i
)
7476 sel
.quick_push (i
| (count
/ 2));
7478 vec_perm_indices
indices (sel
, 2, count
);
7479 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7480 gcc_assert (perm_mask
!= NULL_TREE
);
7484 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7489 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7490 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7491 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7492 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7493 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7494 scaletype
= TREE_VALUE (arglist
);
7496 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7497 && TREE_CODE (rettype
) == VOID_TYPE
);
7499 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7500 if (!is_gimple_min_invariant (ptr
))
7502 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7503 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7504 gcc_assert (!new_bb
);
7507 if (mask
== NULL_TREE
)
7509 mask_arg
= build_int_cst (masktype
, -1);
7510 mask_arg
= vect_init_vector (vinfo
, stmt_info
,
7511 mask_arg
, masktype
, NULL
);
7514 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7516 auto_vec
<tree
> vec_oprnds0
;
7517 auto_vec
<tree
> vec_oprnds1
;
7518 auto_vec
<tree
> vec_masks
;
7521 tree mask_vectype
= truth_type_for (vectype
);
7522 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7524 ? ncopies
/ 2 : ncopies
,
7525 mask
, &vec_masks
, mask_vectype
);
7527 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7529 ? ncopies
/ 2 : ncopies
,
7530 gs_info
.offset
, &vec_oprnds0
);
7531 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7533 ? ncopies
/ 2 : ncopies
,
7535 for (j
= 0; j
< ncopies
; ++j
)
7537 if (modifier
== WIDEN
)
7540 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
7541 perm_mask
, stmt_info
, gsi
);
7543 op
= vec_oprnd0
= vec_oprnds0
[j
/ 2];
7544 src
= vec_oprnd1
= vec_oprnds1
[j
];
7546 mask_op
= vec_mask
= vec_masks
[j
];
7548 else if (modifier
== NARROW
)
7551 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
7552 perm_mask
, stmt_info
, gsi
);
7554 src
= vec_oprnd1
= vec_oprnds1
[j
/ 2];
7555 op
= vec_oprnd0
= vec_oprnds0
[j
];
7557 mask_op
= vec_mask
= vec_masks
[j
/ 2];
7561 op
= vec_oprnd0
= vec_oprnds0
[j
];
7562 src
= vec_oprnd1
= vec_oprnds1
[j
];
7564 mask_op
= vec_mask
= vec_masks
[j
];
7567 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7569 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7570 TYPE_VECTOR_SUBPARTS (srctype
)));
7571 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7572 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7574 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7575 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7579 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7581 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7582 TYPE_VECTOR_SUBPARTS (idxtype
)));
7583 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7584 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7586 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7587 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7595 if (modifier
== NARROW
)
7597 var
= vect_get_new_ssa_name (mask_halfvectype
,
7600 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7601 : VEC_UNPACK_LO_EXPR
,
7603 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7606 tree optype
= TREE_TYPE (mask_arg
);
7607 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7610 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7611 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7612 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7614 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7615 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7617 if (!useless_type_conversion_p (masktype
, utype
))
7619 gcc_assert (TYPE_PRECISION (utype
)
7620 <= TYPE_PRECISION (masktype
));
7621 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7622 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7623 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7629 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7630 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7632 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
7634 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7637 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7638 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
7640 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7641 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7646 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7648 /* We vectorize all the stmts of the interleaving group when we
7649 reach the last stmt in the group. */
7650 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7651 < DR_GROUP_SIZE (first_stmt_info
)
7660 grouped_store
= false;
7661 /* VEC_NUM is the number of vect stmts to be created for this
7663 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7664 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7665 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7666 == first_stmt_info
);
7667 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7668 op
= vect_get_store_rhs (first_stmt_info
);
7671 /* VEC_NUM is the number of vect stmts to be created for this
7673 vec_num
= group_size
;
7675 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7678 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7680 if (dump_enabled_p ())
7681 dump_printf_loc (MSG_NOTE
, vect_location
,
7682 "transform store. ncopies = %d\n", ncopies
);
7684 if (memory_access_type
== VMAT_ELEMENTWISE
7685 || memory_access_type
== VMAT_STRIDED_SLP
)
7687 gimple_stmt_iterator incr_gsi
;
7693 tree stride_base
, stride_step
, alias_off
;
7697 /* Checked by get_load_store_type. */
7698 unsigned int const_nunits
= nunits
.to_constant ();
7700 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7701 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7703 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
7705 = fold_build_pointer_plus
7706 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7707 size_binop (PLUS_EXPR
,
7708 convert_to_ptrofftype (dr_offset
),
7709 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7710 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7712 /* For a store with loop-invariant (but other than power-of-2)
7713 stride (i.e. not a grouped access) like so:
7715 for (i = 0; i < n; i += stride)
7718 we generate a new induction variable and new stores from
7719 the components of the (vectorized) rhs:
7721 for (j = 0; ; j += VF*stride)
7726 array[j + stride] = tmp2;
7730 unsigned nstores
= const_nunits
;
7732 tree ltype
= elem_type
;
7733 tree lvectype
= vectype
;
7736 if (group_size
< const_nunits
7737 && const_nunits
% group_size
== 0)
7739 nstores
= const_nunits
/ group_size
;
7741 ltype
= build_vector_type (elem_type
, group_size
);
7744 /* First check if vec_extract optab doesn't support extraction
7745 of vector elts directly. */
7746 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7748 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7749 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
7750 group_size
).exists (&vmode
)
7751 || (convert_optab_handler (vec_extract_optab
,
7752 TYPE_MODE (vectype
), vmode
)
7753 == CODE_FOR_nothing
))
7755 /* Try to avoid emitting an extract of vector elements
7756 by performing the extracts using an integer type of the
7757 same size, extracting from a vector of those and then
7758 re-interpreting it as the original vector type if
7761 = group_size
* GET_MODE_BITSIZE (elmode
);
7762 unsigned int lnunits
= const_nunits
/ group_size
;
7763 /* If we can't construct such a vector fall back to
7764 element extracts from the original vector type and
7765 element size stores. */
7766 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7767 && VECTOR_MODE_P (TYPE_MODE (vectype
))
7768 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
7769 lnunits
).exists (&vmode
)
7770 && (convert_optab_handler (vec_extract_optab
,
7772 != CODE_FOR_nothing
))
7776 ltype
= build_nonstandard_integer_type (lsize
, 1);
7777 lvectype
= build_vector_type (ltype
, nstores
);
7779 /* Else fall back to vector extraction anyway.
7780 Fewer stores are more important than avoiding spilling
7781 of the vector we extract from. Compared to the
7782 construction case in vectorizable_load no store-forwarding
7783 issue exists here for reasonable archs. */
7786 else if (group_size
>= const_nunits
7787 && group_size
% const_nunits
== 0)
7790 lnel
= const_nunits
;
7794 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
7795 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7798 ivstep
= stride_step
;
7799 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
7800 build_int_cst (TREE_TYPE (ivstep
), vf
));
7802 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7804 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7805 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7806 create_iv (stride_base
, ivstep
, NULL
,
7807 loop
, &incr_gsi
, insert_after
,
7809 incr
= gsi_stmt (incr_gsi
);
7811 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7813 alias_off
= build_int_cst (ref_type
, 0);
7814 stmt_vec_info next_stmt_info
= first_stmt_info
;
7815 for (g
= 0; g
< group_size
; g
++)
7817 running_off
= offvar
;
7820 tree size
= TYPE_SIZE_UNIT (ltype
);
7821 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
7823 tree newoff
= copy_ssa_name (running_off
, NULL
);
7824 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7826 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7827 running_off
= newoff
;
7830 op
= vect_get_store_rhs (next_stmt_info
);
7831 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
,
7833 unsigned int group_el
= 0;
7834 unsigned HOST_WIDE_INT
7835 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7836 for (j
= 0; j
< ncopies
; j
++)
7838 vec_oprnd
= vec_oprnds
[j
];
7839 /* Pun the vector to extract from if necessary. */
7840 if (lvectype
!= vectype
)
7842 tree tem
= make_ssa_name (lvectype
);
7844 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
7845 lvectype
, vec_oprnd
));
7846 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
7849 for (i
= 0; i
< nstores
; i
++)
7851 tree newref
, newoff
;
7852 gimple
*incr
, *assign
;
7853 tree size
= TYPE_SIZE (ltype
);
7854 /* Extract the i'th component. */
7855 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
7856 bitsize_int (i
), size
);
7857 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
7860 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
7864 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7866 newref
= build2 (MEM_REF
, ltype
,
7867 running_off
, this_off
);
7868 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
7870 /* And store it to *running_off. */
7871 assign
= gimple_build_assign (newref
, elem
);
7872 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
7876 || group_el
== group_size
)
7878 newoff
= copy_ssa_name (running_off
, NULL
);
7879 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7880 running_off
, stride_step
);
7881 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7883 running_off
= newoff
;
7886 if (g
== group_size
- 1
7889 if (j
== 0 && i
== 0)
7891 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
7895 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7896 vec_oprnds
.release ();
7904 auto_vec
<tree
> dr_chain (group_size
);
7905 oprnds
.create (group_size
);
7907 /* Gather-scatter accesses perform only component accesses, alignment
7908 is irrelevant for them. */
7909 if (memory_access_type
== VMAT_GATHER_SCATTER
)
7910 alignment_support_scheme
= dr_unaligned_supported
;
7912 alignment_support_scheme
7913 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, vectype
, false);
7915 gcc_assert (alignment_support_scheme
);
7916 vec_loop_masks
*loop_masks
7917 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7918 ? &LOOP_VINFO_MASKS (loop_vinfo
)
7920 vec_loop_lens
*loop_lens
7921 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
7922 ? &LOOP_VINFO_LENS (loop_vinfo
)
7925 /* Shouldn't go with length-based approach if fully masked. */
7926 gcc_assert (!loop_lens
|| !loop_masks
);
7928 /* Targets with store-lane instructions must not require explicit
7929 realignment. vect_supportable_dr_alignment always returns either
7930 dr_aligned or dr_unaligned_supported for masked operations. */
7931 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
7934 || alignment_support_scheme
== dr_aligned
7935 || alignment_support_scheme
== dr_unaligned_supported
);
7937 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
7938 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7939 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7942 tree vec_offset
= NULL_TREE
;
7943 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7945 aggr_type
= NULL_TREE
;
7948 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
7950 aggr_type
= elem_type
;
7951 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
7952 &bump
, &vec_offset
);
7956 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7957 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7959 aggr_type
= vectype
;
7960 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
7961 memory_access_type
);
7965 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
7967 /* In case the vectorization factor (VF) is bigger than the number
7968 of elements that we can fit in a vectype (nunits), we have to generate
7969 more than one vector stmt - i.e - we need to "unroll" the
7970 vector stmt by a factor VF/nunits. */
7972 /* In case of interleaving (non-unit grouped access):
7979 We create vectorized stores starting from base address (the access of the
7980 first stmt in the chain (S2 in the above example), when the last store stmt
7981 of the chain (S4) is reached:
7984 VS2: &base + vec_size*1 = vx0
7985 VS3: &base + vec_size*2 = vx1
7986 VS4: &base + vec_size*3 = vx3
7988 Then permutation statements are generated:
7990 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7991 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7994 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7995 (the order of the data-refs in the output of vect_permute_store_chain
7996 corresponds to the order of scalar stmts in the interleaving chain - see
7997 the documentation of vect_permute_store_chain()).
7999 In case of both multiple types and interleaving, above vector stores and
8000 permutation stmts are created for every copy. The result vector stmts are
8001 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8002 STMT_VINFO_RELATED_STMT for the next copies.
8005 auto_vec
<tree
> vec_masks
;
8006 tree vec_mask
= NULL
;
8007 auto_vec
<tree
> vec_offsets
;
8008 auto_vec
<vec
<tree
> > gvec_oprnds
;
8009 gvec_oprnds
.safe_grow_cleared (group_size
, true);
8010 for (j
= 0; j
< ncopies
; j
++)
8017 /* Get vectorized arguments for SLP_NODE. */
8018 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1,
8020 vec_oprnd
= vec_oprnds
[0];
8024 /* For interleaved stores we collect vectorized defs for all the
8025 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8026 used as an input to vect_permute_store_chain().
8028 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8029 and OPRNDS are of size 1. */
8030 stmt_vec_info next_stmt_info
= first_stmt_info
;
8031 for (i
= 0; i
< group_size
; i
++)
8033 /* Since gaps are not supported for interleaved stores,
8034 DR_GROUP_SIZE is the exact number of stmts in the chain.
8035 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8036 that there is no interleaving, DR_GROUP_SIZE is 1,
8037 and only one iteration of the loop will be executed. */
8038 op
= vect_get_store_rhs (next_stmt_info
);
8039 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
8040 ncopies
, op
, &gvec_oprnds
[i
]);
8041 vec_oprnd
= gvec_oprnds
[i
][0];
8042 dr_chain
.quick_push (gvec_oprnds
[i
][0]);
8043 oprnds
.quick_push (gvec_oprnds
[i
][0]);
8044 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8048 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
8049 mask
, &vec_masks
, mask_vectype
);
8050 vec_mask
= vec_masks
[0];
8054 /* We should have catched mismatched types earlier. */
8055 gcc_assert (useless_type_conversion_p (vectype
,
8056 TREE_TYPE (vec_oprnd
)));
8057 bool simd_lane_access_p
8058 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8059 if (simd_lane_access_p
8061 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8062 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8063 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
8064 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8065 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8066 get_alias_set (TREE_TYPE (ref_type
))))
8068 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8069 dataref_offset
= build_int_cst (ref_type
, 0);
8071 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8073 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
8074 &gs_info
, &dataref_ptr
,
8076 vec_offset
= vec_offsets
[0];
8080 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
8081 simd_lane_access_p
? loop
: NULL
,
8082 offset
, &dummy
, gsi
, &ptr_incr
,
8083 simd_lane_access_p
, NULL_TREE
, bump
);
8087 /* For interleaved stores we created vectorized defs for all the
8088 defs stored in OPRNDS in the previous iteration (previous copy).
8089 DR_CHAIN is then used as an input to vect_permute_store_chain().
8090 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8091 OPRNDS are of size 1. */
8092 for (i
= 0; i
< group_size
; i
++)
8094 vec_oprnd
= gvec_oprnds
[i
][j
];
8095 dr_chain
[i
] = gvec_oprnds
[i
][j
];
8096 oprnds
[i
] = gvec_oprnds
[i
][j
];
8099 vec_mask
= vec_masks
[j
];
8102 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8103 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8104 vec_offset
= vec_offsets
[j
];
8106 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8110 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8114 /* Get an array into which we can store the individual vectors. */
8115 vec_array
= create_vector_array (vectype
, vec_num
);
8117 /* Invalidate the current contents of VEC_ARRAY. This should
8118 become an RTL clobber too, which prevents the vector registers
8119 from being upward-exposed. */
8120 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8122 /* Store the individual vectors into the array. */
8123 for (i
= 0; i
< vec_num
; i
++)
8125 vec_oprnd
= dr_chain
[i
];
8126 write_vector_array (vinfo
, stmt_info
,
8127 gsi
, vec_oprnd
, vec_array
, i
);
8130 tree final_mask
= NULL
;
8132 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8135 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8142 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8144 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8145 tree alias_ptr
= build_int_cst (ref_type
, align
);
8146 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8147 dataref_ptr
, alias_ptr
,
8148 final_mask
, vec_array
);
8153 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8154 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8155 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8157 gimple_call_set_lhs (call
, data_ref
);
8159 gimple_call_set_nothrow (call
, true);
8160 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8163 /* Record that VEC_ARRAY is now dead. */
8164 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8172 result_chain
.create (group_size
);
8174 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
8175 gsi
, &result_chain
);
8178 stmt_vec_info next_stmt_info
= first_stmt_info
;
8179 for (i
= 0; i
< vec_num
; i
++)
8182 unsigned HOST_WIDE_INT align
;
8184 tree final_mask
= NULL_TREE
;
8186 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8188 vectype
, vec_num
* j
+ i
);
8190 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8193 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8195 tree scale
= size_int (gs_info
.scale
);
8198 call
= gimple_build_call_internal
8199 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8200 scale
, vec_oprnd
, final_mask
);
8202 call
= gimple_build_call_internal
8203 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8205 gimple_call_set_nothrow (call
, true);
8206 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8212 /* Bump the vector pointer. */
8213 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
8214 gsi
, stmt_info
, bump
);
8217 vec_oprnd
= vec_oprnds
[i
];
8218 else if (grouped_store
)
8219 /* For grouped stores vectorized defs are interleaved in
8220 vect_permute_store_chain(). */
8221 vec_oprnd
= result_chain
[i
];
8223 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8224 if (aligned_access_p (first_dr_info
, vectype
))
8226 else if (dr_misalignment (first_dr_info
, vectype
)
8227 == DR_MISALIGNMENT_UNKNOWN
)
8229 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
8233 misalign
= dr_misalignment (first_dr_info
, vectype
);
8234 if (dataref_offset
== NULL_TREE
8235 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8236 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8238 align
= least_bit_hwi (misalign
| align
);
8240 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8242 tree perm_mask
= perm_mask_for_reverse (vectype
);
8243 tree perm_dest
= vect_create_destination_var
8244 (vect_get_store_rhs (stmt_info
), vectype
);
8245 tree new_temp
= make_ssa_name (perm_dest
);
8247 /* Generate the permute statement. */
8249 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8250 vec_oprnd
, perm_mask
);
8251 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8253 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8254 vec_oprnd
= new_temp
;
8257 /* Arguments are ready. Create the new vector stmt. */
8260 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8262 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8264 final_mask
, vec_oprnd
);
8265 gimple_call_set_nothrow (call
, true);
8266 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8272 = vect_get_loop_len (loop_vinfo
, loop_lens
,
8273 vec_num
* ncopies
, vec_num
* j
+ i
);
8274 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8275 machine_mode vmode
= TYPE_MODE (vectype
);
8276 opt_machine_mode new_ovmode
8277 = get_len_load_store_mode (vmode
, false);
8278 machine_mode new_vmode
= new_ovmode
.require ();
8279 /* Need conversion if it's wrapped with VnQI. */
8280 if (vmode
!= new_vmode
)
8283 = build_vector_type_for_mode (unsigned_intQI_type_node
,
8286 = vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
8288 = build1 (VIEW_CONVERT_EXPR
, new_vtype
, vec_oprnd
);
8290 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
,
8292 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
8297 = gimple_build_call_internal (IFN_LEN_STORE
, 4, dataref_ptr
,
8298 ptr
, final_len
, vec_oprnd
);
8299 gimple_call_set_nothrow (call
, true);
8300 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8305 data_ref
= fold_build2 (MEM_REF
, vectype
,
8309 : build_int_cst (ref_type
, 0));
8310 if (aligned_access_p (first_dr_info
, vectype
))
8313 TREE_TYPE (data_ref
)
8314 = build_aligned_type (TREE_TYPE (data_ref
),
8315 align
* BITS_PER_UNIT
);
8316 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8317 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
8318 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8324 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8325 if (!next_stmt_info
)
8332 *vec_stmt
= new_stmt
;
8333 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8337 for (i
= 0; i
< group_size
; ++i
)
8339 vec
<tree
> oprndsi
= gvec_oprnds
[i
];
8343 result_chain
.release ();
8344 vec_oprnds
.release ();
8349 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8350 VECTOR_CST mask. No checks are made that the target platform supports the
8351 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8352 vect_gen_perm_mask_checked. */
8355 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8359 poly_uint64 nunits
= sel
.length ();
8360 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8362 mask_type
= build_vector_type (ssizetype
, nunits
);
8363 return vec_perm_indices_to_tree (mask_type
, sel
);
8366 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8367 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8370 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8372 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
8373 return vect_gen_perm_mask_any (vectype
, sel
);
8376 /* Given a vector variable X and Y, that was generated for the scalar
8377 STMT_INFO, generate instructions to permute the vector elements of X and Y
8378 using permutation mask MASK_VEC, insert them at *GSI and return the
8379 permuted vector variable. */
8382 permute_vec_elements (vec_info
*vinfo
,
8383 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8384 gimple_stmt_iterator
*gsi
)
8386 tree vectype
= TREE_TYPE (x
);
8387 tree perm_dest
, data_ref
;
8390 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8391 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8392 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8394 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8395 data_ref
= make_ssa_name (perm_dest
);
8397 /* Generate the permute statement. */
8398 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8399 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8404 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8405 inserting them on the loops preheader edge. Returns true if we
8406 were successful in doing so (and thus STMT_INFO can be moved then),
8407 otherwise returns false. */
8410 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8416 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8418 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8419 if (!gimple_nop_p (def_stmt
)
8420 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8422 /* Make sure we don't need to recurse. While we could do
8423 so in simple cases when there are more complex use webs
8424 we don't have an easy way to preserve stmt order to fulfil
8425 dependencies within them. */
8428 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8430 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8432 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8433 if (!gimple_nop_p (def_stmt2
)
8434 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8444 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8446 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8447 if (!gimple_nop_p (def_stmt
)
8448 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8450 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8451 gsi_remove (&gsi
, false);
8452 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8459 /* vectorizable_load.
8461 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8462 that can be vectorized.
8463 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8464 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8465 Return true if STMT_INFO is vectorizable in this way. */
8468 vectorizable_load (vec_info
*vinfo
,
8469 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8470 gimple
**vec_stmt
, slp_tree slp_node
,
8471 stmt_vector_for_cost
*cost_vec
)
8474 tree vec_dest
= NULL
;
8475 tree data_ref
= NULL
;
8476 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8477 class loop
*loop
= NULL
;
8478 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8479 bool nested_in_vect_loop
= false;
8484 tree dataref_ptr
= NULL_TREE
;
8485 tree dataref_offset
= NULL_TREE
;
8486 gimple
*ptr_incr
= NULL
;
8489 unsigned int group_size
;
8490 poly_uint64 group_gap_adj
;
8491 tree msq
= NULL_TREE
, lsq
;
8492 tree offset
= NULL_TREE
;
8493 tree byte_offset
= NULL_TREE
;
8494 tree realignment_token
= NULL_TREE
;
8496 vec
<tree
> dr_chain
= vNULL
;
8497 bool grouped_load
= false;
8498 stmt_vec_info first_stmt_info
;
8499 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8500 bool compute_in_loop
= false;
8501 class loop
*at_loop
;
8503 bool slp
= (slp_node
!= NULL
);
8504 bool slp_perm
= false;
8505 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8508 gather_scatter_info gs_info
;
8510 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8512 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8515 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8519 if (!STMT_VINFO_DATA_REF (stmt_info
))
8522 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8523 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8525 scalar_dest
= gimple_assign_lhs (assign
);
8526 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8529 tree_code code
= gimple_assign_rhs_code (assign
);
8530 if (code
!= ARRAY_REF
8531 && code
!= BIT_FIELD_REF
8532 && code
!= INDIRECT_REF
8533 && code
!= COMPONENT_REF
8534 && code
!= IMAGPART_EXPR
8535 && code
!= REALPART_EXPR
8537 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8542 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8543 if (!call
|| !gimple_call_internal_p (call
))
8546 internal_fn ifn
= gimple_call_internal_fn (call
);
8547 if (!internal_load_fn_p (ifn
))
8550 scalar_dest
= gimple_call_lhs (call
);
8554 int mask_index
= internal_fn_mask_index (ifn
);
8556 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
,
8557 /* ??? For SLP we only have operands for
8558 the mask operand. */
8559 slp_node
? 0 : mask_index
,
8560 &mask
, NULL
, &mask_dt
, &mask_vectype
))
8564 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8565 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8569 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8570 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8571 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8576 /* Multiple types in SLP are handled by creating the appropriate number of
8577 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8582 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8584 gcc_assert (ncopies
>= 1);
8586 /* FORNOW. This restriction should be relaxed. */
8587 if (nested_in_vect_loop
&& ncopies
> 1)
8589 if (dump_enabled_p ())
8590 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8591 "multiple types in nested loop.\n");
8595 /* Invalidate assumptions made by dependence analysis when vectorization
8596 on the unrolled body effectively re-orders stmts. */
8598 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8599 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8600 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8602 if (dump_enabled_p ())
8603 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8604 "cannot perform implicit CSE when unrolling "
8605 "with negative dependence distance\n");
8609 elem_type
= TREE_TYPE (vectype
);
8610 mode
= TYPE_MODE (vectype
);
8612 /* FORNOW. In some cases can vectorize even if data-type not supported
8613 (e.g. - data copies). */
8614 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8616 if (dump_enabled_p ())
8617 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8618 "Aligned load, but unsupported type.\n");
8622 /* Check if the load is a part of an interleaving chain. */
8623 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8625 grouped_load
= true;
8627 gcc_assert (!nested_in_vect_loop
);
8628 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8630 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8631 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8633 /* Refuse non-SLP vectorization of SLP-only groups. */
8634 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8636 if (dump_enabled_p ())
8637 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8638 "cannot vectorize load in non-SLP mode.\n");
8642 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8648 /* In BB vectorization we may not actually use a loaded vector
8649 accessing elements in excess of DR_GROUP_SIZE. */
8650 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8651 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
8652 unsigned HOST_WIDE_INT nunits
;
8653 unsigned j
, k
, maxk
= 0;
8654 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
8657 tree vectype
= SLP_TREE_VECTYPE (slp_node
);
8658 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
8659 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
8661 if (dump_enabled_p ())
8662 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8663 "BB vectorization with gaps at the end of "
8664 "a load is not supported\n");
8671 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
8674 if (dump_enabled_p ())
8675 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
8677 "unsupported load permutation\n");
8682 /* Invalidate assumptions made by dependence analysis when vectorization
8683 on the unrolled body effectively re-orders stmts. */
8684 if (!PURE_SLP_STMT (stmt_info
)
8685 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8686 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8687 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8689 if (dump_enabled_p ())
8690 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8691 "cannot perform implicit CSE when performing "
8692 "group loads with negative dependence distance\n");
8699 vect_memory_access_type memory_access_type
;
8700 enum dr_alignment_support alignment_support_scheme
;
8701 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, VLS_LOAD
,
8702 ncopies
, &memory_access_type
,
8703 &alignment_support_scheme
, &gs_info
))
8708 if (memory_access_type
== VMAT_CONTIGUOUS
)
8710 machine_mode vec_mode
= TYPE_MODE (vectype
);
8711 if (!VECTOR_MODE_P (vec_mode
)
8712 || !can_vec_mask_load_store_p (vec_mode
,
8713 TYPE_MODE (mask_vectype
), true))
8716 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8717 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8719 if (dump_enabled_p ())
8720 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8721 "unsupported access type for masked load.\n");
8724 else if (memory_access_type
== VMAT_GATHER_SCATTER
8725 && gs_info
.ifn
== IFN_LAST
8728 if (dump_enabled_p ())
8729 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8730 "unsupported masked emulated gather.\n");
8735 if (!vec_stmt
) /* transformation not required. */
8739 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
8742 if (dump_enabled_p ())
8743 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8744 "incompatible vector types for invariants\n");
8749 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8752 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
8753 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, VLS_LOAD
,
8754 group_size
, memory_access_type
,
8757 if (dump_enabled_p ()
8758 && memory_access_type
!= VMAT_ELEMENTWISE
8759 && memory_access_type
!= VMAT_GATHER_SCATTER
8760 && alignment_support_scheme
!= dr_aligned
)
8761 dump_printf_loc (MSG_NOTE
, vect_location
,
8762 "Vectorizing an unaligned access.\n");
8764 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
8765 vect_model_load_cost (vinfo
, stmt_info
, ncopies
, vf
, memory_access_type
,
8766 &gs_info
, slp_node
, cost_vec
);
8771 gcc_assert (memory_access_type
8772 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8774 if (dump_enabled_p ())
8775 dump_printf_loc (MSG_NOTE
, vect_location
,
8776 "transform load. ncopies = %d\n", ncopies
);
8780 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8781 ensure_base_align (dr_info
);
8783 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8785 vect_build_gather_load_calls (vinfo
,
8786 stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
8790 if (memory_access_type
== VMAT_INVARIANT
)
8792 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
8793 /* If we have versioned for aliasing or the loop doesn't
8794 have any data dependencies that would preclude this,
8795 then we are sure this is a loop invariant load and
8796 thus we can insert it on the preheader edge. */
8797 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8798 && !nested_in_vect_loop
8799 && hoist_defs_of_uses (stmt_info
, loop
));
8802 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
8803 if (dump_enabled_p ())
8804 dump_printf_loc (MSG_NOTE
, vect_location
,
8805 "hoisting out of the vectorized loop: %G", stmt
);
8806 scalar_dest
= copy_ssa_name (scalar_dest
);
8807 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
8808 gsi_insert_on_edge_immediate
8809 (loop_preheader_edge (loop
),
8810 gimple_build_assign (scalar_dest
, rhs
));
8812 /* These copies are all equivalent, but currently the representation
8813 requires a separate STMT_VINFO_VEC_STMT for each one. */
8814 gimple_stmt_iterator gsi2
= *gsi
;
8816 for (j
= 0; j
< ncopies
; j
++)
8819 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8822 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8824 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8826 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8830 *vec_stmt
= new_stmt
;
8831 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8837 if (memory_access_type
== VMAT_ELEMENTWISE
8838 || memory_access_type
== VMAT_STRIDED_SLP
)
8840 gimple_stmt_iterator incr_gsi
;
8845 vec
<constructor_elt
, va_gc
> *v
= NULL
;
8846 tree stride_base
, stride_step
, alias_off
;
8847 /* Checked by get_load_store_type. */
8848 unsigned int const_nunits
= nunits
.to_constant ();
8849 unsigned HOST_WIDE_INT cst_offset
= 0;
8852 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
));
8853 gcc_assert (!nested_in_vect_loop
);
8857 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8858 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8862 first_stmt_info
= stmt_info
;
8863 first_dr_info
= dr_info
;
8865 if (slp
&& grouped_load
)
8867 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8868 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8874 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
8875 * vect_get_place_in_interleaving_chain (stmt_info
,
8878 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
8881 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
8883 = fold_build_pointer_plus
8884 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8885 size_binop (PLUS_EXPR
,
8886 convert_to_ptrofftype (dr_offset
),
8887 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8888 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8890 /* For a load with loop-invariant (but other than power-of-2)
8891 stride (i.e. not a grouped access) like so:
8893 for (i = 0; i < n; i += stride)
8896 we generate a new induction variable and new accesses to
8897 form a new vector (or vectors, depending on ncopies):
8899 for (j = 0; ; j += VF*stride)
8901 tmp2 = array[j + stride];
8903 vectemp = {tmp1, tmp2, ...}
8906 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
8907 build_int_cst (TREE_TYPE (stride_step
), vf
));
8909 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8911 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8912 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8913 create_iv (stride_base
, ivstep
, NULL
,
8914 loop
, &incr_gsi
, insert_after
,
8917 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8919 running_off
= offvar
;
8920 alias_off
= build_int_cst (ref_type
, 0);
8921 int nloads
= const_nunits
;
8923 tree ltype
= TREE_TYPE (vectype
);
8924 tree lvectype
= vectype
;
8925 auto_vec
<tree
> dr_chain
;
8926 if (memory_access_type
== VMAT_STRIDED_SLP
)
8928 if (group_size
< const_nunits
)
8930 /* First check if vec_init optab supports construction from vector
8931 elts directly. Otherwise avoid emitting a constructor of
8932 vector elements by performing the loads using an integer type
8933 of the same size, constructing a vector of those and then
8934 re-interpreting it as the original vector type. This avoids a
8935 huge runtime penalty due to the general inability to perform
8936 store forwarding from smaller stores to a larger load. */
8939 = vector_vector_composition_type (vectype
,
8940 const_nunits
/ group_size
,
8942 if (vtype
!= NULL_TREE
)
8944 nloads
= const_nunits
/ group_size
;
8953 lnel
= const_nunits
;
8956 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
8958 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8959 else if (nloads
== 1)
8964 /* For SLP permutation support we need to load the whole group,
8965 not only the number of vector stmts the permutation result
8969 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8971 unsigned int const_vf
= vf
.to_constant ();
8972 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
8973 dr_chain
.create (ncopies
);
8976 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8978 unsigned int group_el
= 0;
8979 unsigned HOST_WIDE_INT
8980 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8981 for (j
= 0; j
< ncopies
; j
++)
8984 vec_alloc (v
, nloads
);
8985 gimple
*new_stmt
= NULL
;
8986 for (i
= 0; i
< nloads
; i
++)
8988 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8989 group_el
* elsz
+ cst_offset
);
8990 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
8991 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8992 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
8993 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8995 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
8996 gimple_assign_lhs (new_stmt
));
9000 || group_el
== group_size
)
9002 tree newoff
= copy_ssa_name (running_off
);
9003 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
9004 running_off
, stride_step
);
9005 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
9007 running_off
= newoff
;
9013 tree vec_inv
= build_constructor (lvectype
, v
);
9014 new_temp
= vect_init_vector (vinfo
, stmt_info
,
9015 vec_inv
, lvectype
, gsi
);
9016 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9017 if (lvectype
!= vectype
)
9019 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
9021 build1 (VIEW_CONVERT_EXPR
,
9022 vectype
, new_temp
));
9023 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9030 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
9032 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9037 *vec_stmt
= new_stmt
;
9038 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9044 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
9050 if (memory_access_type
== VMAT_GATHER_SCATTER
9051 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
9052 grouped_load
= false;
9056 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9057 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9058 /* For SLP vectorization we directly vectorize a subchain
9059 without permutation. */
9060 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9061 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9062 /* For BB vectorization always use the first stmt to base
9063 the data ref pointer on. */
9065 first_stmt_info_for_drptr
9066 = vect_find_first_scalar_stmt_in_slp (slp_node
);
9068 /* Check if the chain of loads is already vectorized. */
9069 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
9070 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9071 ??? But we can only do so if there is exactly one
9072 as we have no way to get at the rest. Leave the CSE
9074 ??? With the group load eventually participating
9075 in multiple different permutations (having multiple
9076 slp nodes which refer to the same group) the CSE
9077 is even wrong code. See PR56270. */
9080 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9083 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9086 /* VEC_NUM is the number of vect stmts to be created for this group. */
9089 grouped_load
= false;
9090 /* If an SLP permutation is from N elements to N elements,
9091 and if one vector holds a whole number of N, we can load
9092 the inputs to the permutation in the same way as an
9093 unpermuted sequence. In other cases we need to load the
9094 whole group, not only the number of vector stmts the
9095 permutation result fits in. */
9096 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
9098 && (group_size
!= scalar_lanes
9099 || !multiple_p (nunits
, group_size
)))
9101 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9102 variable VF; see vect_transform_slp_perm_load. */
9103 unsigned int const_vf
= vf
.to_constant ();
9104 unsigned int const_nunits
= nunits
.to_constant ();
9105 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
9106 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
9110 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9112 = group_size
- scalar_lanes
;
9116 vec_num
= group_size
;
9118 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9122 first_stmt_info
= stmt_info
;
9123 first_dr_info
= dr_info
;
9124 group_size
= vec_num
= 1;
9126 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
9129 gcc_assert (alignment_support_scheme
);
9130 vec_loop_masks
*loop_masks
9131 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
9132 ? &LOOP_VINFO_MASKS (loop_vinfo
)
9134 vec_loop_lens
*loop_lens
9135 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
9136 ? &LOOP_VINFO_LENS (loop_vinfo
)
9139 /* Shouldn't go with length-based approach if fully masked. */
9140 gcc_assert (!loop_lens
|| !loop_masks
);
9142 /* Targets with store-lane instructions must not require explicit
9143 realignment. vect_supportable_dr_alignment always returns either
9144 dr_aligned or dr_unaligned_supported for masked operations. */
9145 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
9148 || alignment_support_scheme
== dr_aligned
9149 || alignment_support_scheme
== dr_unaligned_supported
);
9151 /* In case the vectorization factor (VF) is bigger than the number
9152 of elements that we can fit in a vectype (nunits), we have to generate
9153 more than one vector stmt - i.e - we need to "unroll" the
9154 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9155 from one copy of the vector stmt to the next, in the field
9156 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9157 stages to find the correct vector defs to be used when vectorizing
9158 stmts that use the defs of the current stmt. The example below
9159 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9160 need to create 4 vectorized stmts):
9162 before vectorization:
9163 RELATED_STMT VEC_STMT
9167 step 1: vectorize stmt S1:
9168 We first create the vector stmt VS1_0, and, as usual, record a
9169 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9170 Next, we create the vector stmt VS1_1, and record a pointer to
9171 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9172 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9174 RELATED_STMT VEC_STMT
9175 VS1_0: vx0 = memref0 VS1_1 -
9176 VS1_1: vx1 = memref1 VS1_2 -
9177 VS1_2: vx2 = memref2 VS1_3 -
9178 VS1_3: vx3 = memref3 - -
9179 S1: x = load - VS1_0
9183 /* In case of interleaving (non-unit grouped access):
9190 Vectorized loads are created in the order of memory accesses
9191 starting from the access of the first stmt of the chain:
9194 VS2: vx1 = &base + vec_size*1
9195 VS3: vx3 = &base + vec_size*2
9196 VS4: vx4 = &base + vec_size*3
9198 Then permutation statements are generated:
9200 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9201 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9204 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9205 (the order of the data-refs in the output of vect_permute_load_chain
9206 corresponds to the order of scalar stmts in the interleaving chain - see
9207 the documentation of vect_permute_load_chain()).
9208 The generation of permutation stmts and recording them in
9209 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9211 In case of both multiple types and interleaving, the vector loads and
9212 permutation stmts above are created for every copy. The result vector
9213 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9214 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9216 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9217 on a target that supports unaligned accesses (dr_unaligned_supported)
9218 we generate the following code:
9222 p = p + indx * vectype_size;
9227 Otherwise, the data reference is potentially unaligned on a target that
9228 does not support unaligned accesses (dr_explicit_realign_optimized) -
9229 then generate the following code, in which the data in each iteration is
9230 obtained by two vector loads, one from the previous iteration, and one
9231 from the current iteration:
9233 msq_init = *(floor(p1))
9234 p2 = initial_addr + VS - 1;
9235 realignment_token = call target_builtin;
9238 p2 = p2 + indx * vectype_size
9240 vec_dest = realign_load (msq, lsq, realignment_token)
9245 /* If the misalignment remains the same throughout the execution of the
9246 loop, we can create the init_addr and permutation mask at the loop
9247 preheader. Otherwise, it needs to be created inside the loop.
9248 This can only occur when vectorizing memory accesses in the inner-loop
9249 nested within an outer-loop that is being vectorized. */
9251 if (nested_in_vect_loop
9252 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9253 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9255 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9256 compute_in_loop
= true;
9259 bool diff_first_stmt_info
9260 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
9262 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9263 || alignment_support_scheme
== dr_explicit_realign
)
9264 && !compute_in_loop
)
9266 /* If we have different first_stmt_info, we can't set up realignment
9267 here, since we can't guarantee first_stmt_info DR has been
9268 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9269 distance from first_stmt_info DR instead as below. */
9270 if (!diff_first_stmt_info
)
9271 msq
= vect_setup_realignment (vinfo
,
9272 first_stmt_info
, gsi
, &realignment_token
,
9273 alignment_support_scheme
, NULL_TREE
,
9275 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9277 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9278 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9280 gcc_assert (!first_stmt_info_for_drptr
);
9286 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9287 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
9290 tree vec_offset
= NULL_TREE
;
9291 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9293 aggr_type
= NULL_TREE
;
9296 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9298 aggr_type
= elem_type
;
9299 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9300 &bump
, &vec_offset
);
9304 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9305 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9307 aggr_type
= vectype
;
9308 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
9309 memory_access_type
);
9312 vec
<tree
> vec_offsets
= vNULL
;
9313 auto_vec
<tree
> vec_masks
;
9315 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
9316 mask
, &vec_masks
, mask_vectype
, NULL_TREE
);
9317 tree vec_mask
= NULL_TREE
;
9318 poly_uint64 group_elt
= 0;
9319 for (j
= 0; j
< ncopies
; j
++)
9321 /* 1. Create the vector or array pointer update chain. */
9324 bool simd_lane_access_p
9325 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9326 if (simd_lane_access_p
9327 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9328 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9329 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9330 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9331 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9332 get_alias_set (TREE_TYPE (ref_type
)))
9333 && (alignment_support_scheme
== dr_aligned
9334 || alignment_support_scheme
== dr_unaligned_supported
))
9336 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9337 dataref_offset
= build_int_cst (ref_type
, 0);
9339 else if (diff_first_stmt_info
)
9342 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
9343 aggr_type
, at_loop
, offset
, &dummy
,
9344 gsi
, &ptr_incr
, simd_lane_access_p
,
9346 /* Adjust the pointer by the difference to first_stmt. */
9347 data_reference_p ptrdr
9348 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9350 = fold_convert (sizetype
,
9351 size_binop (MINUS_EXPR
,
9352 DR_INIT (first_dr_info
->dr
),
9354 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9356 if (alignment_support_scheme
== dr_explicit_realign
)
9358 msq
= vect_setup_realignment (vinfo
,
9359 first_stmt_info_for_drptr
, gsi
,
9361 alignment_support_scheme
,
9362 dataref_ptr
, &at_loop
);
9363 gcc_assert (!compute_in_loop
);
9366 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9368 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
9369 &gs_info
, &dataref_ptr
,
9374 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9376 offset
, &dummy
, gsi
, &ptr_incr
,
9380 vec_mask
= vec_masks
[0];
9385 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9387 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9388 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9391 vec_mask
= vec_masks
[j
];
9394 if (grouped_load
|| slp_perm
)
9395 dr_chain
.create (vec_num
);
9397 gimple
*new_stmt
= NULL
;
9398 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9402 vec_array
= create_vector_array (vectype
, vec_num
);
9404 tree final_mask
= NULL_TREE
;
9406 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9409 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9416 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9418 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
9419 tree alias_ptr
= build_int_cst (ref_type
, align
);
9420 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9421 dataref_ptr
, alias_ptr
,
9427 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9428 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9429 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9431 gimple_call_set_lhs (call
, vec_array
);
9432 gimple_call_set_nothrow (call
, true);
9433 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9436 /* Extract each vector into an SSA_NAME. */
9437 for (i
= 0; i
< vec_num
; i
++)
9439 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
9441 dr_chain
.quick_push (new_temp
);
9444 /* Record the mapping between SSA_NAMEs and statements. */
9445 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
9447 /* Record that VEC_ARRAY is now dead. */
9448 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
9452 for (i
= 0; i
< vec_num
; i
++)
9454 tree final_mask
= NULL_TREE
;
9456 && memory_access_type
!= VMAT_INVARIANT
)
9457 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9459 vectype
, vec_num
* j
+ i
);
9461 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9465 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9466 gsi
, stmt_info
, bump
);
9468 /* 2. Create the vector-load in the loop. */
9469 switch (alignment_support_scheme
)
9472 case dr_unaligned_supported
:
9474 unsigned int misalign
;
9475 unsigned HOST_WIDE_INT align
;
9477 if (memory_access_type
== VMAT_GATHER_SCATTER
9478 && gs_info
.ifn
!= IFN_LAST
)
9480 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9481 vec_offset
= vec_offsets
[j
];
9482 tree zero
= build_zero_cst (vectype
);
9483 tree scale
= size_int (gs_info
.scale
);
9486 call
= gimple_build_call_internal
9487 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
9488 vec_offset
, scale
, zero
, final_mask
);
9490 call
= gimple_build_call_internal
9491 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
9492 vec_offset
, scale
, zero
);
9493 gimple_call_set_nothrow (call
, true);
9495 data_ref
= NULL_TREE
;
9498 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9500 /* Emulated gather-scatter. */
9501 gcc_assert (!final_mask
);
9502 unsigned HOST_WIDE_INT const_nunits
9503 = nunits
.to_constant ();
9504 unsigned HOST_WIDE_INT const_offset_nunits
9505 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
)
9507 vec
<constructor_elt
, va_gc
> *ctor_elts
;
9508 vec_alloc (ctor_elts
, const_nunits
);
9509 gimple_seq stmts
= NULL
;
9510 /* We support offset vectors with more elements
9511 than the data vector for now. */
9512 unsigned HOST_WIDE_INT factor
9513 = const_offset_nunits
/ const_nunits
;
9514 vec_offset
= vec_offsets
[j
/ factor
];
9515 unsigned elt_offset
= (j
% factor
) * const_nunits
;
9516 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
9517 tree scale
= size_int (gs_info
.scale
);
9519 = get_object_alignment (DR_REF (first_dr_info
->dr
));
9520 tree ltype
= build_aligned_type (TREE_TYPE (vectype
),
9522 for (unsigned k
= 0; k
< const_nunits
; ++k
)
9524 tree boff
= size_binop (MULT_EXPR
,
9525 TYPE_SIZE (idx_type
),
9528 tree idx
= gimple_build (&stmts
, BIT_FIELD_REF
,
9529 idx_type
, vec_offset
,
9530 TYPE_SIZE (idx_type
),
9532 idx
= gimple_convert (&stmts
, sizetype
, idx
);
9533 idx
= gimple_build (&stmts
, MULT_EXPR
,
9534 sizetype
, idx
, scale
);
9535 tree ptr
= gimple_build (&stmts
, PLUS_EXPR
,
9536 TREE_TYPE (dataref_ptr
),
9538 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
9539 tree elt
= make_ssa_name (TREE_TYPE (vectype
));
9540 tree ref
= build2 (MEM_REF
, ltype
, ptr
,
9541 build_int_cst (ref_type
, 0));
9542 new_stmt
= gimple_build_assign (elt
, ref
);
9543 gimple_seq_add_stmt (&stmts
, new_stmt
);
9544 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
, elt
);
9546 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
9547 new_stmt
= gimple_build_assign (NULL_TREE
,
9549 (vectype
, ctor_elts
));
9550 data_ref
= NULL_TREE
;
9555 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9556 if (alignment_support_scheme
== dr_aligned
)
9558 gcc_assert (aligned_access_p (first_dr_info
, vectype
));
9561 else if (dr_misalignment (first_dr_info
, vectype
) == -1)
9563 align
= dr_alignment
9564 (vect_dr_behavior (vinfo
, first_dr_info
));
9568 misalign
= dr_misalignment (first_dr_info
, vectype
);
9569 if (dataref_offset
== NULL_TREE
9570 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9571 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9573 align
= least_bit_hwi (misalign
| align
);
9577 tree ptr
= build_int_cst (ref_type
,
9578 align
* BITS_PER_UNIT
);
9580 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9583 gimple_call_set_nothrow (call
, true);
9585 data_ref
= NULL_TREE
;
9587 else if (loop_lens
&& memory_access_type
!= VMAT_INVARIANT
)
9590 = vect_get_loop_len (loop_vinfo
, loop_lens
,
9593 tree ptr
= build_int_cst (ref_type
,
9594 align
* BITS_PER_UNIT
);
9596 = gimple_build_call_internal (IFN_LEN_LOAD
, 3,
9599 gimple_call_set_nothrow (call
, true);
9601 data_ref
= NULL_TREE
;
9603 /* Need conversion if it's wrapped with VnQI. */
9604 machine_mode vmode
= TYPE_MODE (vectype
);
9605 opt_machine_mode new_ovmode
9606 = get_len_load_store_mode (vmode
, true);
9607 machine_mode new_vmode
= new_ovmode
.require ();
9608 if (vmode
!= new_vmode
)
9610 tree qi_type
= unsigned_intQI_type_node
;
9612 = build_vector_type_for_mode (qi_type
, new_vmode
);
9613 tree var
= vect_get_new_ssa_name (new_vtype
,
9615 gimple_set_lhs (call
, var
);
9616 vect_finish_stmt_generation (vinfo
, stmt_info
, call
,
9618 tree op
= build1 (VIEW_CONVERT_EXPR
, vectype
, var
);
9620 = gimple_build_assign (vec_dest
,
9621 VIEW_CONVERT_EXPR
, op
);
9626 tree ltype
= vectype
;
9627 tree new_vtype
= NULL_TREE
;
9628 unsigned HOST_WIDE_INT gap
9629 = DR_GROUP_GAP (first_stmt_info
);
9630 unsigned int vect_align
9631 = vect_known_alignment_in_bytes (first_dr_info
,
9633 unsigned int scalar_dr_size
9634 = vect_get_scalar_dr_size (first_dr_info
);
9635 /* If there's no peeling for gaps but we have a gap
9636 with slp loads then load the lower half of the
9637 vector only. See get_group_load_store_type for
9638 when we apply this optimization. */
9641 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9643 && known_eq (nunits
, (group_size
- gap
) * 2)
9644 && known_eq (nunits
, group_size
)
9645 && gap
>= (vect_align
/ scalar_dr_size
))
9649 = vector_vector_composition_type (vectype
, 2,
9651 if (new_vtype
!= NULL_TREE
)
9655 = (dataref_offset
? dataref_offset
9656 : build_int_cst (ref_type
, 0));
9657 if (ltype
!= vectype
9658 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9660 unsigned HOST_WIDE_INT gap_offset
9661 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
9662 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
9663 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
9666 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
9667 if (alignment_support_scheme
== dr_aligned
)
9670 TREE_TYPE (data_ref
)
9671 = build_aligned_type (TREE_TYPE (data_ref
),
9672 align
* BITS_PER_UNIT
);
9673 if (ltype
!= vectype
)
9675 vect_copy_ref_info (data_ref
,
9676 DR_REF (first_dr_info
->dr
));
9677 tree tem
= make_ssa_name (ltype
);
9678 new_stmt
= gimple_build_assign (tem
, data_ref
);
9679 vect_finish_stmt_generation (vinfo
, stmt_info
,
9682 vec
<constructor_elt
, va_gc
> *v
;
9684 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9686 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9687 build_zero_cst (ltype
));
9688 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9692 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9693 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9694 build_zero_cst (ltype
));
9696 gcc_assert (new_vtype
!= NULL_TREE
);
9697 if (new_vtype
== vectype
)
9698 new_stmt
= gimple_build_assign (
9699 vec_dest
, build_constructor (vectype
, v
));
9702 tree new_vname
= make_ssa_name (new_vtype
);
9703 new_stmt
= gimple_build_assign (
9704 new_vname
, build_constructor (new_vtype
, v
));
9705 vect_finish_stmt_generation (vinfo
, stmt_info
,
9707 new_stmt
= gimple_build_assign (
9708 vec_dest
, build1 (VIEW_CONVERT_EXPR
, vectype
,
9715 case dr_explicit_realign
:
9719 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9721 if (compute_in_loop
)
9722 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
9724 dr_explicit_realign
,
9727 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9728 ptr
= copy_ssa_name (dataref_ptr
);
9730 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9731 // For explicit realign the target alignment should be
9732 // known at compile time.
9733 unsigned HOST_WIDE_INT align
=
9734 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9735 new_stmt
= gimple_build_assign
9736 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
9738 (TREE_TYPE (dataref_ptr
),
9739 -(HOST_WIDE_INT
) align
));
9740 vect_finish_stmt_generation (vinfo
, stmt_info
,
9743 = build2 (MEM_REF
, vectype
, ptr
,
9744 build_int_cst (ref_type
, 0));
9745 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9746 vec_dest
= vect_create_destination_var (scalar_dest
,
9748 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9749 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9750 gimple_assign_set_lhs (new_stmt
, new_temp
);
9751 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
9752 vect_finish_stmt_generation (vinfo
, stmt_info
,
9756 bump
= size_binop (MULT_EXPR
, vs
,
9757 TYPE_SIZE_UNIT (elem_type
));
9758 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
9759 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
,
9761 new_stmt
= gimple_build_assign
9762 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
9764 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
9765 ptr
= copy_ssa_name (ptr
, new_stmt
);
9766 gimple_assign_set_lhs (new_stmt
, ptr
);
9767 vect_finish_stmt_generation (vinfo
, stmt_info
,
9770 = build2 (MEM_REF
, vectype
, ptr
,
9771 build_int_cst (ref_type
, 0));
9774 case dr_explicit_realign_optimized
:
9776 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9777 new_temp
= copy_ssa_name (dataref_ptr
);
9779 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9780 // We should only be doing this if we know the target
9781 // alignment at compile time.
9782 unsigned HOST_WIDE_INT align
=
9783 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9784 new_stmt
= gimple_build_assign
9785 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
9786 build_int_cst (TREE_TYPE (dataref_ptr
),
9787 -(HOST_WIDE_INT
) align
));
9788 vect_finish_stmt_generation (vinfo
, stmt_info
,
9791 = build2 (MEM_REF
, vectype
, new_temp
,
9792 build_int_cst (ref_type
, 0));
9798 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9799 /* DATA_REF is null if we've already built the statement. */
9802 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9803 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9805 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9806 gimple_set_lhs (new_stmt
, new_temp
);
9807 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9809 /* 3. Handle explicit realignment if necessary/supported.
9811 vec_dest = realign_load (msq, lsq, realignment_token) */
9812 if (alignment_support_scheme
== dr_explicit_realign_optimized
9813 || alignment_support_scheme
== dr_explicit_realign
)
9815 lsq
= gimple_assign_lhs (new_stmt
);
9816 if (!realignment_token
)
9817 realignment_token
= dataref_ptr
;
9818 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9819 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
9820 msq
, lsq
, realignment_token
);
9821 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9822 gimple_assign_set_lhs (new_stmt
, new_temp
);
9823 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9825 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9828 if (i
== vec_num
- 1 && j
== ncopies
- 1)
9829 add_phi_arg (phi
, lsq
,
9830 loop_latch_edge (containing_loop
),
9836 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9838 tree perm_mask
= perm_mask_for_reverse (vectype
);
9839 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
9840 perm_mask
, stmt_info
, gsi
);
9841 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9844 /* Collect vector loads and later create their permutation in
9845 vect_transform_grouped_load (). */
9846 if (grouped_load
|| slp_perm
)
9847 dr_chain
.quick_push (new_temp
);
9849 /* Store vector loads in the corresponding SLP_NODE. */
9850 if (slp
&& !slp_perm
)
9851 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9853 /* With SLP permutation we load the gaps as well, without
9854 we need to skip the gaps after we manage to fully load
9855 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9856 group_elt
+= nunits
;
9857 if (maybe_ne (group_gap_adj
, 0U)
9859 && known_eq (group_elt
, group_size
- group_gap_adj
))
9861 poly_wide_int bump_val
9862 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9864 if (tree_int_cst_sgn
9865 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
9866 bump_val
= -bump_val
;
9867 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9868 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9869 gsi
, stmt_info
, bump
);
9873 /* Bump the vector pointer to account for a gap or for excess
9874 elements loaded for a permuted SLP load. */
9875 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
9877 poly_wide_int bump_val
9878 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9880 if (tree_int_cst_sgn
9881 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
9882 bump_val
= -bump_val
;
9883 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9884 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9889 if (slp
&& !slp_perm
)
9895 /* For SLP we know we've seen all possible uses of dr_chain so
9896 direct vect_transform_slp_perm_load to DCE the unused parts.
9897 ??? This is a hack to prevent compile-time issues as seen
9898 in PR101120 and friends. */
9899 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
9900 gsi
, vf
, false, &n_perms
,
9908 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
9909 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
9911 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9915 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9918 dr_chain
.release ();
9921 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9926 /* Function vect_is_simple_cond.
9929 LOOP - the loop that is being vectorized.
9930 COND - Condition that is checked for simple use.
9933 *COMP_VECTYPE - the vector type for the comparison.
9934 *DTS - The def types for the arguments of the comparison
9936 Returns whether a COND can be vectorized. Checks whether
9937 condition operands are supportable using vec_is_simple_use. */
9940 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
9941 slp_tree slp_node
, tree
*comp_vectype
,
9942 enum vect_def_type
*dts
, tree vectype
)
9945 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9949 if (TREE_CODE (cond
) == SSA_NAME
9950 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
9952 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
9953 &slp_op
, &dts
[0], comp_vectype
)
9955 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
9960 if (!COMPARISON_CLASS_P (cond
))
9963 lhs
= TREE_OPERAND (cond
, 0);
9964 rhs
= TREE_OPERAND (cond
, 1);
9966 if (TREE_CODE (lhs
) == SSA_NAME
)
9968 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
9969 &lhs
, &slp_op
, &dts
[0], &vectype1
))
9972 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
9973 || TREE_CODE (lhs
) == FIXED_CST
)
9974 dts
[0] = vect_constant_def
;
9978 if (TREE_CODE (rhs
) == SSA_NAME
)
9980 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
9981 &rhs
, &slp_op
, &dts
[1], &vectype2
))
9984 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
9985 || TREE_CODE (rhs
) == FIXED_CST
)
9986 dts
[1] = vect_constant_def
;
9990 if (vectype1
&& vectype2
9991 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
9992 TYPE_VECTOR_SUBPARTS (vectype2
)))
9995 *comp_vectype
= vectype1
? vectype1
: vectype2
;
9996 /* Invariant comparison. */
9997 if (! *comp_vectype
)
9999 tree scalar_type
= TREE_TYPE (lhs
);
10000 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10001 *comp_vectype
= truth_type_for (vectype
);
10004 /* If we can widen the comparison to match vectype do so. */
10005 if (INTEGRAL_TYPE_P (scalar_type
)
10007 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
10008 TYPE_SIZE (TREE_TYPE (vectype
))))
10009 scalar_type
= build_nonstandard_integer_type
10010 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
10011 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
10019 /* vectorizable_condition.
10021 Check if STMT_INFO is conditional modify expression that can be vectorized.
10022 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10023 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10026 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10028 Return true if STMT_INFO is vectorizable in this way. */
10031 vectorizable_condition (vec_info
*vinfo
,
10032 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10034 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10036 tree scalar_dest
= NULL_TREE
;
10037 tree vec_dest
= NULL_TREE
;
10038 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
10039 tree then_clause
, else_clause
;
10040 tree comp_vectype
= NULL_TREE
;
10041 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
10042 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
10045 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10046 enum vect_def_type dts
[4]
10047 = {vect_unknown_def_type
, vect_unknown_def_type
,
10048 vect_unknown_def_type
, vect_unknown_def_type
};
10052 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10054 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10055 vec
<tree
> vec_oprnds0
= vNULL
;
10056 vec
<tree
> vec_oprnds1
= vNULL
;
10057 vec
<tree
> vec_oprnds2
= vNULL
;
10058 vec
<tree
> vec_oprnds3
= vNULL
;
10060 bool masked
= false;
10062 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10065 /* Is vectorizable conditional operation? */
10066 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10070 code
= gimple_assign_rhs_code (stmt
);
10071 if (code
!= COND_EXPR
)
10074 stmt_vec_info reduc_info
= NULL
;
10075 int reduc_index
= -1;
10076 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
10078 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
10081 if (STMT_SLP_TYPE (stmt_info
))
10083 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
10084 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
10085 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
10086 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
10087 || reduc_index
!= -1);
10091 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10095 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10096 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10101 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10105 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10109 gcc_assert (ncopies
>= 1);
10110 if (for_reduction
&& ncopies
> 1)
10111 return false; /* FORNOW */
10113 cond_expr
= gimple_assign_rhs1 (stmt
);
10115 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
10116 &comp_vectype
, &dts
[0], vectype
)
10120 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
10121 slp_tree then_slp_node
, else_slp_node
;
10122 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
10123 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
10125 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
10126 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
10129 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
10132 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
10135 masked
= !COMPARISON_CLASS_P (cond_expr
);
10136 vec_cmp_type
= truth_type_for (comp_vectype
);
10138 if (vec_cmp_type
== NULL_TREE
)
10141 cond_code
= TREE_CODE (cond_expr
);
10144 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
10145 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
10148 /* For conditional reductions, the "then" value needs to be the candidate
10149 value calculated by this iteration while the "else" value needs to be
10150 the result carried over from previous iterations. If the COND_EXPR
10151 is the other way around, we need to swap it. */
10152 bool must_invert_cmp_result
= false;
10153 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
10156 must_invert_cmp_result
= true;
10159 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
10160 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
10161 if (new_code
== ERROR_MARK
)
10162 must_invert_cmp_result
= true;
10165 cond_code
= new_code
;
10166 /* Make sure we don't accidentally use the old condition. */
10167 cond_expr
= NULL_TREE
;
10170 std::swap (then_clause
, else_clause
);
10173 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
10175 /* Boolean values may have another representation in vectors
10176 and therefore we prefer bit operations over comparison for
10177 them (which also works for scalar masks). We store opcodes
10178 to use in bitop1 and bitop2. Statement is vectorized as
10179 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10180 depending on bitop1 and bitop2 arity. */
10184 bitop1
= BIT_NOT_EXPR
;
10185 bitop2
= BIT_AND_EXPR
;
10188 bitop1
= BIT_NOT_EXPR
;
10189 bitop2
= BIT_IOR_EXPR
;
10192 bitop1
= BIT_NOT_EXPR
;
10193 bitop2
= BIT_AND_EXPR
;
10194 std::swap (cond_expr0
, cond_expr1
);
10197 bitop1
= BIT_NOT_EXPR
;
10198 bitop2
= BIT_IOR_EXPR
;
10199 std::swap (cond_expr0
, cond_expr1
);
10202 bitop1
= BIT_XOR_EXPR
;
10205 bitop1
= BIT_XOR_EXPR
;
10206 bitop2
= BIT_NOT_EXPR
;
10211 cond_code
= SSA_NAME
;
10214 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
10215 && reduction_type
== EXTRACT_LAST_REDUCTION
10216 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
10218 if (dump_enabled_p ())
10219 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10220 "reduction comparison operation not supported.\n");
10226 if (bitop1
!= NOP_EXPR
)
10228 machine_mode mode
= TYPE_MODE (comp_vectype
);
10231 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
10232 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10235 if (bitop2
!= NOP_EXPR
)
10237 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
10239 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10244 vect_cost_for_stmt kind
= vector_stmt
;
10245 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10246 /* Count one reduction-like operation per vector. */
10247 kind
= vec_to_scalar
;
10248 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
))
10252 && (!vect_maybe_update_slp_op_vectype
10253 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
10255 && !vect_maybe_update_slp_op_vectype
10256 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
10257 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
10258 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
10260 if (dump_enabled_p ())
10261 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10262 "incompatible vector types for invariants\n");
10266 if (loop_vinfo
&& for_reduction
10267 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
10269 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10270 vect_record_loop_mask (loop_vinfo
, &LOOP_VINFO_MASKS (loop_vinfo
),
10271 ncopies
* vec_num
, vectype
, NULL
);
10272 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10273 else if (STMT_VINFO_DEF_TYPE (reduc_info
) != vect_nested_cycle
)
10275 if (dump_enabled_p ())
10276 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10277 "conditional reduction prevents the use"
10278 " of partial vectors.\n");
10279 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
10283 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10284 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10292 scalar_dest
= gimple_assign_lhs (stmt
);
10293 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10294 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10296 bool swap_cond_operands
= false;
10298 /* See whether another part of the vectorized code applies a loop
10299 mask to the condition, or to its inverse. */
10301 vec_loop_masks
*masks
= NULL
;
10302 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10304 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10305 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10308 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10309 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10310 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10313 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10314 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10315 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10317 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10318 cond_code
= cond
.code
;
10319 swap_cond_operands
= true;
10325 /* Handle cond expr. */
10327 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10328 cond_expr
, &vec_oprnds0
, comp_vectype
,
10329 then_clause
, &vec_oprnds2
, vectype
,
10330 reduction_type
!= EXTRACT_LAST_REDUCTION
10331 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10333 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10334 cond_expr0
, &vec_oprnds0
, comp_vectype
,
10335 cond_expr1
, &vec_oprnds1
, comp_vectype
,
10336 then_clause
, &vec_oprnds2
, vectype
,
10337 reduction_type
!= EXTRACT_LAST_REDUCTION
10338 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10340 /* Arguments are ready. Create the new vector stmt. */
10341 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10343 vec_then_clause
= vec_oprnds2
[i
];
10344 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10345 vec_else_clause
= vec_oprnds3
[i
];
10347 if (swap_cond_operands
)
10348 std::swap (vec_then_clause
, vec_else_clause
);
10351 vec_compare
= vec_cond_lhs
;
10354 vec_cond_rhs
= vec_oprnds1
[i
];
10355 if (bitop1
== NOP_EXPR
)
10357 gimple_seq stmts
= NULL
;
10358 vec_compare
= gimple_build (&stmts
, cond_code
, vec_cmp_type
,
10359 vec_cond_lhs
, vec_cond_rhs
);
10360 gsi_insert_before (gsi
, stmts
, GSI_SAME_STMT
);
10364 new_temp
= make_ssa_name (vec_cmp_type
);
10366 if (bitop1
== BIT_NOT_EXPR
)
10367 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10371 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10373 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10374 if (bitop2
== NOP_EXPR
)
10375 vec_compare
= new_temp
;
10376 else if (bitop2
== BIT_NOT_EXPR
)
10378 /* Instead of doing ~x ? y : z do x ? z : y. */
10379 vec_compare
= new_temp
;
10380 std::swap (vec_then_clause
, vec_else_clause
);
10384 vec_compare
= make_ssa_name (vec_cmp_type
);
10386 = gimple_build_assign (vec_compare
, bitop2
,
10387 vec_cond_lhs
, new_temp
);
10388 vect_finish_stmt_generation (vinfo
, stmt_info
,
10394 /* If we decided to apply a loop mask to the result of the vector
10395 comparison, AND the comparison with the mask now. Later passes
10396 should then be able to reuse the AND results between mulitple
10400 for (int i = 0; i < 100; ++i)
10401 x[i] = y[i] ? z[i] : 10;
10403 results in following optimized GIMPLE:
10405 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10406 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10407 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10408 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10409 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10410 vect_iftmp.11_47, { 10, ... }>;
10412 instead of using a masked and unmasked forms of
10413 vec != { 0, ... } (masked in the MASK_LOAD,
10414 unmasked in the VEC_COND_EXPR). */
10416 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10417 in cases where that's necessary. */
10419 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
10421 if (!is_gimple_val (vec_compare
))
10423 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10424 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10426 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10427 vec_compare
= vec_compare_name
;
10430 if (must_invert_cmp_result
)
10432 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10433 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10436 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10437 vec_compare
= vec_compare_name
;
10443 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10445 tree tmp2
= make_ssa_name (vec_cmp_type
);
10447 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10449 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
10450 vec_compare
= tmp2
;
10455 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10457 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
10458 tree lhs
= gimple_get_lhs (old_stmt
);
10459 new_stmt
= gimple_build_call_internal
10460 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10462 gimple_call_set_lhs (new_stmt
, lhs
);
10463 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
10464 if (old_stmt
== gsi_stmt (*gsi
))
10465 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
10468 /* In this case we're moving the definition to later in the
10469 block. That doesn't matter because the only uses of the
10470 lhs are in phi statements. */
10471 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
10472 gsi_remove (&old_gsi
, true);
10473 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10478 new_temp
= make_ssa_name (vec_dest
);
10479 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10480 vec_then_clause
, vec_else_clause
);
10481 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10484 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10486 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10490 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10492 vec_oprnds0
.release ();
10493 vec_oprnds1
.release ();
10494 vec_oprnds2
.release ();
10495 vec_oprnds3
.release ();
10500 /* vectorizable_comparison.
10502 Check if STMT_INFO is comparison expression that can be vectorized.
10503 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10504 comparison, put it in VEC_STMT, and insert it at GSI.
10506 Return true if STMT_INFO is vectorizable in this way. */
10509 vectorizable_comparison (vec_info
*vinfo
,
10510 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10512 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10514 tree lhs
, rhs1
, rhs2
;
10515 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10516 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10517 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10519 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10520 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10522 poly_uint64 nunits
;
10524 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10526 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10527 vec
<tree
> vec_oprnds0
= vNULL
;
10528 vec
<tree
> vec_oprnds1
= vNULL
;
10532 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10535 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10538 mask_type
= vectype
;
10539 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10544 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10546 gcc_assert (ncopies
>= 1);
10547 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10550 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10554 code
= gimple_assign_rhs_code (stmt
);
10556 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10559 slp_tree slp_rhs1
, slp_rhs2
;
10560 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10561 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
10564 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10565 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
10568 if (vectype1
&& vectype2
10569 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10570 TYPE_VECTOR_SUBPARTS (vectype2
)))
10573 vectype
= vectype1
? vectype1
: vectype2
;
10575 /* Invariant comparison. */
10578 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10579 vectype
= mask_type
;
10581 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
10583 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10586 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10589 /* Can't compare mask and non-mask types. */
10590 if (vectype1
&& vectype2
10591 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10594 /* Boolean values may have another representation in vectors
10595 and therefore we prefer bit operations over comparison for
10596 them (which also works for scalar masks). We store opcodes
10597 to use in bitop1 and bitop2. Statement is vectorized as
10598 BITOP2 (rhs1 BITOP1 rhs2) or
10599 rhs1 BITOP2 (BITOP1 rhs2)
10600 depending on bitop1 and bitop2 arity. */
10601 bool swap_p
= false;
10602 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10604 if (code
== GT_EXPR
)
10606 bitop1
= BIT_NOT_EXPR
;
10607 bitop2
= BIT_AND_EXPR
;
10609 else if (code
== GE_EXPR
)
10611 bitop1
= BIT_NOT_EXPR
;
10612 bitop2
= BIT_IOR_EXPR
;
10614 else if (code
== LT_EXPR
)
10616 bitop1
= BIT_NOT_EXPR
;
10617 bitop2
= BIT_AND_EXPR
;
10620 else if (code
== LE_EXPR
)
10622 bitop1
= BIT_NOT_EXPR
;
10623 bitop2
= BIT_IOR_EXPR
;
10628 bitop1
= BIT_XOR_EXPR
;
10629 if (code
== EQ_EXPR
)
10630 bitop2
= BIT_NOT_EXPR
;
10636 if (bitop1
== NOP_EXPR
)
10638 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10643 machine_mode mode
= TYPE_MODE (vectype
);
10646 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10647 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10650 if (bitop2
!= NOP_EXPR
)
10652 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10653 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10658 /* Put types on constant and invariant SLP children. */
10660 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
10661 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
10663 if (dump_enabled_p ())
10664 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10665 "incompatible vector types for invariants\n");
10669 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10670 vect_model_simple_cost (vinfo
, stmt_info
,
10671 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10672 dts
, ndts
, slp_node
, cost_vec
);
10679 lhs
= gimple_assign_lhs (stmt
);
10680 mask
= vect_create_destination_var (lhs
, mask_type
);
10682 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10683 rhs1
, &vec_oprnds0
, vectype
,
10684 rhs2
, &vec_oprnds1
, vectype
);
10686 std::swap (vec_oprnds0
, vec_oprnds1
);
10688 /* Arguments are ready. Create the new vector stmt. */
10689 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10692 vec_rhs2
= vec_oprnds1
[i
];
10694 new_temp
= make_ssa_name (mask
);
10695 if (bitop1
== NOP_EXPR
)
10697 new_stmt
= gimple_build_assign (new_temp
, code
,
10698 vec_rhs1
, vec_rhs2
);
10699 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10703 if (bitop1
== BIT_NOT_EXPR
)
10704 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10706 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10708 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10709 if (bitop2
!= NOP_EXPR
)
10711 tree res
= make_ssa_name (mask
);
10712 if (bitop2
== BIT_NOT_EXPR
)
10713 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10715 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
10717 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10721 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10723 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10727 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10729 vec_oprnds0
.release ();
10730 vec_oprnds1
.release ();
10735 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10736 can handle all live statements in the node. Otherwise return true
10737 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10738 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10741 can_vectorize_live_stmts (vec_info
*vinfo
,
10742 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10743 slp_tree slp_node
, slp_instance slp_node_instance
,
10745 stmt_vector_for_cost
*cost_vec
)
10749 stmt_vec_info slp_stmt_info
;
10751 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
10753 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
10754 && !vectorizable_live_operation (vinfo
,
10755 slp_stmt_info
, gsi
, slp_node
,
10756 slp_node_instance
, i
,
10757 vec_stmt_p
, cost_vec
))
10761 else if (STMT_VINFO_LIVE_P (stmt_info
)
10762 && !vectorizable_live_operation (vinfo
, stmt_info
, gsi
,
10763 slp_node
, slp_node_instance
, -1,
10764 vec_stmt_p
, cost_vec
))
10770 /* Make sure the statement is vectorizable. */
10773 vect_analyze_stmt (vec_info
*vinfo
,
10774 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
10775 slp_tree node
, slp_instance node_instance
,
10776 stmt_vector_for_cost
*cost_vec
)
10778 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10779 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
10781 gimple_seq pattern_def_seq
;
10783 if (dump_enabled_p ())
10784 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
10787 if (gimple_has_volatile_ops (stmt_info
->stmt
))
10788 return opt_result::failure_at (stmt_info
->stmt
,
10790 " stmt has volatile operands: %G\n",
10793 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10795 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
10797 gimple_stmt_iterator si
;
10799 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
10801 stmt_vec_info pattern_def_stmt_info
10802 = vinfo
->lookup_stmt (gsi_stmt (si
));
10803 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
10804 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
10806 /* Analyze def stmt of STMT if it's a pattern stmt. */
10807 if (dump_enabled_p ())
10808 dump_printf_loc (MSG_NOTE
, vect_location
,
10809 "==> examining pattern def statement: %G",
10810 pattern_def_stmt_info
->stmt
);
10813 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
10814 need_to_vectorize
, node
, node_instance
,
10822 /* Skip stmts that do not need to be vectorized. In loops this is expected
10824 - the COND_EXPR which is the loop exit condition
10825 - any LABEL_EXPRs in the loop
10826 - computations that are used only for array indexing or loop control.
10827 In basic blocks we only analyze statements that are a part of some SLP
10828 instance, therefore, all the statements are relevant.
10830 Pattern statement needs to be analyzed instead of the original statement
10831 if the original statement is not relevant. Otherwise, we analyze both
10832 statements. In basic blocks we are called from some SLP instance
10833 traversal, don't analyze pattern stmts instead, the pattern stmts
10834 already will be part of SLP instance. */
10836 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
10837 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
10838 && !STMT_VINFO_LIVE_P (stmt_info
))
10840 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10841 && pattern_stmt_info
10842 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10843 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10845 /* Analyze PATTERN_STMT instead of the original stmt. */
10846 stmt_info
= pattern_stmt_info
;
10847 if (dump_enabled_p ())
10848 dump_printf_loc (MSG_NOTE
, vect_location
,
10849 "==> examining pattern statement: %G",
10854 if (dump_enabled_p ())
10855 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
10857 return opt_result::success ();
10860 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10862 && pattern_stmt_info
10863 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10864 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10866 /* Analyze PATTERN_STMT too. */
10867 if (dump_enabled_p ())
10868 dump_printf_loc (MSG_NOTE
, vect_location
,
10869 "==> examining pattern statement: %G",
10870 pattern_stmt_info
->stmt
);
10873 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
10874 node_instance
, cost_vec
);
10879 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
10881 case vect_internal_def
:
10884 case vect_reduction_def
:
10885 case vect_nested_cycle
:
10886 gcc_assert (!bb_vinfo
10887 && (relevance
== vect_used_in_outer
10888 || relevance
== vect_used_in_outer_by_reduction
10889 || relevance
== vect_used_by_reduction
10890 || relevance
== vect_unused_in_scope
10891 || relevance
== vect_used_only_live
));
10894 case vect_induction_def
:
10895 gcc_assert (!bb_vinfo
);
10898 case vect_constant_def
:
10899 case vect_external_def
:
10900 case vect_unknown_def_type
:
10902 gcc_unreachable ();
10905 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10907 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (node
);
10909 if (STMT_VINFO_RELEVANT_P (stmt_info
))
10911 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
10912 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
10913 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
10914 *need_to_vectorize
= true;
10917 if (PURE_SLP_STMT (stmt_info
) && !node
)
10919 if (dump_enabled_p ())
10920 dump_printf_loc (MSG_NOTE
, vect_location
,
10921 "handled only by SLP analysis\n");
10922 return opt_result::success ();
10927 && (STMT_VINFO_RELEVANT_P (stmt_info
)
10928 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
10929 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10930 -mveclibabi= takes preference over library functions with
10931 the simd attribute. */
10932 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10933 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
10935 || vectorizable_conversion (vinfo
, stmt_info
,
10936 NULL
, NULL
, node
, cost_vec
)
10937 || vectorizable_operation (vinfo
, stmt_info
,
10938 NULL
, NULL
, node
, cost_vec
)
10939 || vectorizable_assignment (vinfo
, stmt_info
,
10940 NULL
, NULL
, node
, cost_vec
)
10941 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10942 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10943 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
10944 node
, node_instance
, cost_vec
)
10945 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
10946 NULL
, node
, cost_vec
)
10947 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10948 || vectorizable_condition (vinfo
, stmt_info
,
10949 NULL
, NULL
, node
, cost_vec
)
10950 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
10952 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
10953 stmt_info
, NULL
, node
));
10957 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10958 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
10959 NULL
, NULL
, node
, cost_vec
)
10960 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
10962 || vectorizable_shift (vinfo
, stmt_info
,
10963 NULL
, NULL
, node
, cost_vec
)
10964 || vectorizable_operation (vinfo
, stmt_info
,
10965 NULL
, NULL
, node
, cost_vec
)
10966 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
10968 || vectorizable_load (vinfo
, stmt_info
,
10969 NULL
, NULL
, node
, cost_vec
)
10970 || vectorizable_store (vinfo
, stmt_info
,
10971 NULL
, NULL
, node
, cost_vec
)
10972 || vectorizable_condition (vinfo
, stmt_info
,
10973 NULL
, NULL
, node
, cost_vec
)
10974 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
10976 || vectorizable_phi (vinfo
, stmt_info
, NULL
, node
, cost_vec
));
10980 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
10983 return opt_result::failure_at (stmt_info
->stmt
,
10985 " relevant stmt not supported: %G",
10988 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10989 need extra handling, except for vectorizable reductions. */
10991 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
10992 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
10993 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
10994 stmt_info
, NULL
, node
, node_instance
,
10996 return opt_result::failure_at (stmt_info
->stmt
,
10998 " live stmt not supported: %G",
11001 return opt_result::success ();
11005 /* Function vect_transform_stmt.
11007 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11010 vect_transform_stmt (vec_info
*vinfo
,
11011 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11012 slp_tree slp_node
, slp_instance slp_node_instance
)
11014 bool is_store
= false;
11015 gimple
*vec_stmt
= NULL
;
11018 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
11020 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11022 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (slp_node
);
11024 switch (STMT_VINFO_TYPE (stmt_info
))
11026 case type_demotion_vec_info_type
:
11027 case type_promotion_vec_info_type
:
11028 case type_conversion_vec_info_type
:
11029 done
= vectorizable_conversion (vinfo
, stmt_info
,
11030 gsi
, &vec_stmt
, slp_node
, NULL
);
11034 case induc_vec_info_type
:
11035 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
11036 stmt_info
, &vec_stmt
, slp_node
,
11041 case shift_vec_info_type
:
11042 done
= vectorizable_shift (vinfo
, stmt_info
,
11043 gsi
, &vec_stmt
, slp_node
, NULL
);
11047 case op_vec_info_type
:
11048 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11053 case assignment_vec_info_type
:
11054 done
= vectorizable_assignment (vinfo
, stmt_info
,
11055 gsi
, &vec_stmt
, slp_node
, NULL
);
11059 case load_vec_info_type
:
11060 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11065 case store_vec_info_type
:
11066 done
= vectorizable_store (vinfo
, stmt_info
,
11067 gsi
, &vec_stmt
, slp_node
, NULL
);
11069 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
11071 /* In case of interleaving, the whole chain is vectorized when the
11072 last store in the chain is reached. Store stmts before the last
11073 one are skipped, and there vec_stmt_info shouldn't be freed
11075 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
11076 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
11083 case condition_vec_info_type
:
11084 done
= vectorizable_condition (vinfo
, stmt_info
,
11085 gsi
, &vec_stmt
, slp_node
, NULL
);
11089 case comparison_vec_info_type
:
11090 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11095 case call_vec_info_type
:
11096 done
= vectorizable_call (vinfo
, stmt_info
,
11097 gsi
, &vec_stmt
, slp_node
, NULL
);
11100 case call_simd_clone_vec_info_type
:
11101 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11105 case reduc_vec_info_type
:
11106 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11107 gsi
, &vec_stmt
, slp_node
);
11111 case cycle_phi_info_type
:
11112 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11113 &vec_stmt
, slp_node
, slp_node_instance
);
11117 case lc_phi_info_type
:
11118 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11119 stmt_info
, &vec_stmt
, slp_node
);
11123 case phi_info_type
:
11124 done
= vectorizable_phi (vinfo
, stmt_info
, &vec_stmt
, slp_node
, NULL
);
11129 if (!STMT_VINFO_LIVE_P (stmt_info
))
11131 if (dump_enabled_p ())
11132 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11133 "stmt not supported.\n");
11134 gcc_unreachable ();
11139 if (!slp_node
&& vec_stmt
)
11140 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
11142 if (STMT_VINFO_TYPE (stmt_info
) != store_vec_info_type
)
11144 /* Handle stmts whose DEF is used outside the loop-nest that is
11145 being vectorized. */
11146 done
= can_vectorize_live_stmts (vinfo
, stmt_info
, gsi
, slp_node
,
11147 slp_node_instance
, true, NULL
);
11152 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11158 /* Remove a group of stores (for SLP or interleaving), free their
11162 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
11164 stmt_vec_info next_stmt_info
= first_stmt_info
;
11166 while (next_stmt_info
)
11168 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
11169 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
11170 /* Free the attached stmt_vec_info and remove the stmt. */
11171 vinfo
->remove_stmt (next_stmt_info
);
11172 next_stmt_info
= tmp
;
11176 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11177 elements of type SCALAR_TYPE, or null if the target doesn't support
11180 If NUNITS is zero, return a vector type that contains elements of
11181 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11183 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11184 for this vectorization region and want to "autodetect" the best choice.
11185 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11186 and we want the new type to be interoperable with it. PREVAILING_MODE
11187 in this case can be a scalar integer mode or a vector mode; when it
11188 is a vector mode, the function acts like a tree-level version of
11189 related_vector_mode. */
11192 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
11193 tree scalar_type
, poly_uint64 nunits
)
11195 tree orig_scalar_type
= scalar_type
;
11196 scalar_mode inner_mode
;
11197 machine_mode simd_mode
;
11200 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
11201 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
11204 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
11206 /* For vector types of elements whose mode precision doesn't
11207 match their types precision we use a element type of mode
11208 precision. The vectorization routines will have to make sure
11209 they support the proper result truncation/extension.
11210 We also make sure to build vector types with INTEGER_TYPE
11211 component type only. */
11212 if (INTEGRAL_TYPE_P (scalar_type
)
11213 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
11214 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
11215 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
11216 TYPE_UNSIGNED (scalar_type
));
11218 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11219 When the component mode passes the above test simply use a type
11220 corresponding to that mode. The theory is that any use that
11221 would cause problems with this will disable vectorization anyway. */
11222 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
11223 && !INTEGRAL_TYPE_P (scalar_type
))
11224 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
11226 /* We can't build a vector type of elements with alignment bigger than
11228 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
11229 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
11230 TYPE_UNSIGNED (scalar_type
));
11232 /* If we felt back to using the mode fail if there was
11233 no scalar type for it. */
11234 if (scalar_type
== NULL_TREE
)
11237 /* If no prevailing mode was supplied, use the mode the target prefers.
11238 Otherwise lookup a vector mode based on the prevailing mode. */
11239 if (prevailing_mode
== VOIDmode
)
11241 gcc_assert (known_eq (nunits
, 0U));
11242 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
11243 if (SCALAR_INT_MODE_P (simd_mode
))
11245 /* Traditional behavior is not to take the integer mode
11246 literally, but simply to use it as a way of determining
11247 the vector size. It is up to mode_for_vector to decide
11248 what the TYPE_MODE should be.
11250 Note that nunits == 1 is allowed in order to support single
11251 element vector types. */
11252 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
11253 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11257 else if (SCALAR_INT_MODE_P (prevailing_mode
)
11258 || !related_vector_mode (prevailing_mode
,
11259 inner_mode
, nunits
).exists (&simd_mode
))
11261 /* Fall back to using mode_for_vector, mostly in the hope of being
11262 able to use an integer mode. */
11263 if (known_eq (nunits
, 0U)
11264 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
11267 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11271 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
11273 /* In cases where the mode was chosen by mode_for_vector, check that
11274 the target actually supports the chosen mode, or that it at least
11275 allows the vector mode to be replaced by a like-sized integer. */
11276 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11277 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11280 /* Re-attach the address-space qualifier if we canonicalized the scalar
11282 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11283 return build_qualified_type
11284 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11289 /* Function get_vectype_for_scalar_type.
11291 Returns the vector type corresponding to SCALAR_TYPE as supported
11292 by the target. If GROUP_SIZE is nonzero and we're performing BB
11293 vectorization, make sure that the number of elements in the vector
11294 is no bigger than GROUP_SIZE. */
11297 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11298 unsigned int group_size
)
11300 /* For BB vectorization, we should always have a group size once we've
11301 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11302 are tentative requests during things like early data reference
11303 analysis and pattern recognition. */
11304 if (is_a
<bb_vec_info
> (vinfo
))
11305 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11309 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11311 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
11312 vinfo
->vector_mode
= TYPE_MODE (vectype
);
11314 /* Register the natural choice of vector type, before the group size
11315 has been applied. */
11317 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
11319 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11320 try again with an explicit number of elements. */
11323 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
11325 /* Start with the biggest number of units that fits within
11326 GROUP_SIZE and halve it until we find a valid vector type.
11327 Usually either the first attempt will succeed or all will
11328 fail (in the latter case because GROUP_SIZE is too small
11329 for the target), but it's possible that a target could have
11330 a hole between supported vector types.
11332 If GROUP_SIZE is not a power of 2, this has the effect of
11333 trying the largest power of 2 that fits within the group,
11334 even though the group is not a multiple of that vector size.
11335 The BB vectorizer will then try to carve up the group into
11337 unsigned int nunits
= 1 << floor_log2 (group_size
);
11340 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11341 scalar_type
, nunits
);
11344 while (nunits
> 1 && !vectype
);
11350 /* Return the vector type corresponding to SCALAR_TYPE as supported
11351 by the target. NODE, if nonnull, is the SLP tree node that will
11352 use the returned vector type. */
11355 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
11357 unsigned int group_size
= 0;
11359 group_size
= SLP_TREE_LANES (node
);
11360 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11363 /* Function get_mask_type_for_scalar_type.
11365 Returns the mask type corresponding to a result of comparison
11366 of vectors of specified SCALAR_TYPE as supported by target.
11367 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11368 make sure that the number of elements in the vector is no bigger
11369 than GROUP_SIZE. */
11372 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11373 unsigned int group_size
)
11375 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11380 return truth_type_for (vectype
);
11383 /* Function get_same_sized_vectype
11385 Returns a vector type corresponding to SCALAR_TYPE of size
11386 VECTOR_TYPE if supported by the target. */
11389 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11391 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11392 return truth_type_for (vector_type
);
11394 poly_uint64 nunits
;
11395 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
11396 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
11399 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
11400 scalar_type
, nunits
);
11403 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11404 would not change the chosen vector modes. */
11407 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
11409 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
11410 i
!= vinfo
->used_vector_modes
.end (); ++i
)
11411 if (!VECTOR_MODE_P (*i
)
11412 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
11417 /* Function vect_is_simple_use.
11420 VINFO - the vect info of the loop or basic block that is being vectorized.
11421 OPERAND - operand in the loop or bb.
11423 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11424 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11425 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11426 the definition could be anywhere in the function
11427 DT - the type of definition
11429 Returns whether a stmt with OPERAND can be vectorized.
11430 For loops, supportable operands are constants, loop invariants, and operands
11431 that are defined by the current iteration of the loop. Unsupportable
11432 operands are those that are defined by a previous iteration of the loop (as
11433 is the case in reduction/induction computations).
11434 For basic blocks, supportable operands are constants and bb invariants.
11435 For now, operands defined outside the basic block are not supported. */
11438 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11439 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11441 if (def_stmt_info_out
)
11442 *def_stmt_info_out
= NULL
;
11444 *def_stmt_out
= NULL
;
11445 *dt
= vect_unknown_def_type
;
11447 if (dump_enabled_p ())
11449 dump_printf_loc (MSG_NOTE
, vect_location
,
11450 "vect_is_simple_use: operand ");
11451 if (TREE_CODE (operand
) == SSA_NAME
11452 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11453 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11455 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11458 if (CONSTANT_CLASS_P (operand
))
11459 *dt
= vect_constant_def
;
11460 else if (is_gimple_min_invariant (operand
))
11461 *dt
= vect_external_def
;
11462 else if (TREE_CODE (operand
) != SSA_NAME
)
11463 *dt
= vect_unknown_def_type
;
11464 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11465 *dt
= vect_external_def
;
11468 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11469 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11471 *dt
= vect_external_def
;
11474 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11475 def_stmt
= stmt_vinfo
->stmt
;
11476 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11477 if (def_stmt_info_out
)
11478 *def_stmt_info_out
= stmt_vinfo
;
11481 *def_stmt_out
= def_stmt
;
11484 if (dump_enabled_p ())
11486 dump_printf (MSG_NOTE
, ", type of def: ");
11489 case vect_uninitialized_def
:
11490 dump_printf (MSG_NOTE
, "uninitialized\n");
11492 case vect_constant_def
:
11493 dump_printf (MSG_NOTE
, "constant\n");
11495 case vect_external_def
:
11496 dump_printf (MSG_NOTE
, "external\n");
11498 case vect_internal_def
:
11499 dump_printf (MSG_NOTE
, "internal\n");
11501 case vect_induction_def
:
11502 dump_printf (MSG_NOTE
, "induction\n");
11504 case vect_reduction_def
:
11505 dump_printf (MSG_NOTE
, "reduction\n");
11507 case vect_double_reduction_def
:
11508 dump_printf (MSG_NOTE
, "double reduction\n");
11510 case vect_nested_cycle
:
11511 dump_printf (MSG_NOTE
, "nested cycle\n");
11513 case vect_unknown_def_type
:
11514 dump_printf (MSG_NOTE
, "unknown\n");
11519 if (*dt
== vect_unknown_def_type
)
11521 if (dump_enabled_p ())
11522 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11523 "Unsupported pattern.\n");
11530 /* Function vect_is_simple_use.
11532 Same as vect_is_simple_use but also determines the vector operand
11533 type of OPERAND and stores it to *VECTYPE. If the definition of
11534 OPERAND is vect_uninitialized_def, vect_constant_def or
11535 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11536 is responsible to compute the best suited vector type for the
11540 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11541 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11542 gimple
**def_stmt_out
)
11544 stmt_vec_info def_stmt_info
;
11546 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11550 *def_stmt_out
= def_stmt
;
11551 if (def_stmt_info_out
)
11552 *def_stmt_info_out
= def_stmt_info
;
11554 /* Now get a vector type if the def is internal, otherwise supply
11555 NULL_TREE and leave it up to the caller to figure out a proper
11556 type for the use stmt. */
11557 if (*dt
== vect_internal_def
11558 || *dt
== vect_induction_def
11559 || *dt
== vect_reduction_def
11560 || *dt
== vect_double_reduction_def
11561 || *dt
== vect_nested_cycle
)
11563 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11564 gcc_assert (*vectype
!= NULL_TREE
);
11565 if (dump_enabled_p ())
11566 dump_printf_loc (MSG_NOTE
, vect_location
,
11567 "vect_is_simple_use: vectype %T\n", *vectype
);
11569 else if (*dt
== vect_uninitialized_def
11570 || *dt
== vect_constant_def
11571 || *dt
== vect_external_def
)
11572 *vectype
= NULL_TREE
;
11574 gcc_unreachable ();
11579 /* Function vect_is_simple_use.
11581 Same as vect_is_simple_use but determines the operand by operand
11582 position OPERAND from either STMT or SLP_NODE, filling in *OP
11583 and *SLP_DEF (when SLP_NODE is not NULL). */
11586 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
11587 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
11588 enum vect_def_type
*dt
,
11589 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
11593 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
11595 *vectype
= SLP_TREE_VECTYPE (child
);
11596 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
11598 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
11599 return vect_is_simple_use (*op
, vinfo
, dt
, def_stmt_info_out
);
11603 if (def_stmt_info_out
)
11604 *def_stmt_info_out
= NULL
;
11605 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
11606 *dt
= SLP_TREE_DEF_TYPE (child
);
11613 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
11615 if (gimple_assign_rhs_code (ass
) == COND_EXPR
11616 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
11619 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
11621 *op
= gimple_op (ass
, operand
);
11623 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
11624 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
11626 *op
= gimple_op (ass
, operand
+ 1);
11628 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
11629 *op
= gimple_call_arg (call
, operand
);
11631 gcc_unreachable ();
11632 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
11636 /* If OP is not NULL and is external or constant update its vector
11637 type with VECTYPE. Returns true if successful or false if not,
11638 for example when conflicting vector types are present. */
11641 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
11643 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
11645 if (SLP_TREE_VECTYPE (op
))
11646 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
11647 SLP_TREE_VECTYPE (op
) = vectype
;
11651 /* Function supportable_widening_operation
11653 Check whether an operation represented by the code CODE is a
11654 widening operation that is supported by the target platform in
11655 vector form (i.e., when operating on arguments of type VECTYPE_IN
11656 producing a result of type VECTYPE_OUT).
11658 Widening operations we currently support are NOP (CONVERT), FLOAT,
11659 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11660 are supported by the target platform either directly (via vector
11661 tree-codes), or via target builtins.
11664 - CODE1 and CODE2 are codes of vector operations to be used when
11665 vectorizing the operation, if available.
11666 - MULTI_STEP_CVT determines the number of required intermediate steps in
11667 case of multi-step conversion (like char->short->int - in that case
11668 MULTI_STEP_CVT will be 1).
11669 - INTERM_TYPES contains the intermediate type required to perform the
11670 widening operation (short in the above example). */
11673 supportable_widening_operation (vec_info
*vinfo
,
11674 enum tree_code code
, stmt_vec_info stmt_info
,
11675 tree vectype_out
, tree vectype_in
,
11676 enum tree_code
*code1
, enum tree_code
*code2
,
11677 int *multi_step_cvt
,
11678 vec
<tree
> *interm_types
)
11680 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
11681 class loop
*vect_loop
= NULL
;
11682 machine_mode vec_mode
;
11683 enum insn_code icode1
, icode2
;
11684 optab optab1
, optab2
;
11685 tree vectype
= vectype_in
;
11686 tree wide_vectype
= vectype_out
;
11687 enum tree_code c1
, c2
;
11689 tree prev_type
, intermediate_type
;
11690 machine_mode intermediate_mode
, prev_mode
;
11691 optab optab3
, optab4
;
11693 *multi_step_cvt
= 0;
11695 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11699 case WIDEN_MULT_EXPR
:
11700 /* The result of a vectorized widening operation usually requires
11701 two vectors (because the widened results do not fit into one vector).
11702 The generated vector results would normally be expected to be
11703 generated in the same order as in the original scalar computation,
11704 i.e. if 8 results are generated in each vector iteration, they are
11705 to be organized as follows:
11706 vect1: [res1,res2,res3,res4],
11707 vect2: [res5,res6,res7,res8].
11709 However, in the special case that the result of the widening
11710 operation is used in a reduction computation only, the order doesn't
11711 matter (because when vectorizing a reduction we change the order of
11712 the computation). Some targets can take advantage of this and
11713 generate more efficient code. For example, targets like Altivec,
11714 that support widen_mult using a sequence of {mult_even,mult_odd}
11715 generate the following vectors:
11716 vect1: [res1,res3,res5,res7],
11717 vect2: [res2,res4,res6,res8].
11719 When vectorizing outer-loops, we execute the inner-loop sequentially
11720 (each vectorized inner-loop iteration contributes to VF outer-loop
11721 iterations in parallel). We therefore don't allow to change the
11722 order of the computation in the inner-loop during outer-loop
11724 /* TODO: Another case in which order doesn't *really* matter is when we
11725 widen and then contract again, e.g. (short)((int)x * y >> 8).
11726 Normally, pack_trunc performs an even/odd permute, whereas the
11727 repack from an even/odd expansion would be an interleave, which
11728 would be significantly simpler for e.g. AVX2. */
11729 /* In any case, in order to avoid duplicating the code below, recurse
11730 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11731 are properly set up for the caller. If we fail, we'll continue with
11732 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11734 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
11735 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
11736 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
11737 stmt_info
, vectype_out
,
11738 vectype_in
, code1
, code2
,
11739 multi_step_cvt
, interm_types
))
11741 /* Elements in a vector with vect_used_by_reduction property cannot
11742 be reordered if the use chain with this property does not have the
11743 same operation. One such an example is s += a * b, where elements
11744 in a and b cannot be reordered. Here we check if the vector defined
11745 by STMT is only directly used in the reduction statement. */
11746 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
11747 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
11749 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
11752 c1
= VEC_WIDEN_MULT_LO_EXPR
;
11753 c2
= VEC_WIDEN_MULT_HI_EXPR
;
11756 case DOT_PROD_EXPR
:
11757 c1
= DOT_PROD_EXPR
;
11758 c2
= DOT_PROD_EXPR
;
11766 case VEC_WIDEN_MULT_EVEN_EXPR
:
11767 /* Support the recursion induced just above. */
11768 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
11769 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
11772 case WIDEN_LSHIFT_EXPR
:
11773 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
11774 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
11777 case WIDEN_PLUS_EXPR
:
11778 c1
= VEC_WIDEN_PLUS_LO_EXPR
;
11779 c2
= VEC_WIDEN_PLUS_HI_EXPR
;
11782 case WIDEN_MINUS_EXPR
:
11783 c1
= VEC_WIDEN_MINUS_LO_EXPR
;
11784 c2
= VEC_WIDEN_MINUS_HI_EXPR
;
11788 c1
= VEC_UNPACK_LO_EXPR
;
11789 c2
= VEC_UNPACK_HI_EXPR
;
11793 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
11794 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
11797 case FIX_TRUNC_EXPR
:
11798 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
11799 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
11803 gcc_unreachable ();
11806 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
11807 std::swap (c1
, c2
);
11809 if (code
== FIX_TRUNC_EXPR
)
11811 /* The signedness is determined from output operand. */
11812 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11813 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
11815 else if (CONVERT_EXPR_CODE_P (code
)
11816 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
11817 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11818 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
11819 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11821 /* If the input and result modes are the same, a different optab
11822 is needed where we pass in the number of units in vectype. */
11823 optab1
= vec_unpacks_sbool_lo_optab
;
11824 optab2
= vec_unpacks_sbool_hi_optab
;
11828 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11829 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
11832 if (!optab1
|| !optab2
)
11835 vec_mode
= TYPE_MODE (vectype
);
11836 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
11837 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
11843 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11844 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11846 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11848 /* For scalar masks we may have different boolean
11849 vector types having the same QImode. Thus we
11850 add additional check for elements number. */
11851 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
11852 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11856 /* Check if it's a multi-step conversion that can be done using intermediate
11859 prev_type
= vectype
;
11860 prev_mode
= vec_mode
;
11862 if (!CONVERT_EXPR_CODE_P (code
))
11865 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11866 intermediate steps in promotion sequence. We try
11867 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11869 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11870 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11872 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11873 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11875 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
11878 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
11879 TYPE_UNSIGNED (prev_type
));
11881 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11882 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11883 && intermediate_mode
== prev_mode
11884 && SCALAR_INT_MODE_P (prev_mode
))
11886 /* If the input and result modes are the same, a different optab
11887 is needed where we pass in the number of units in vectype. */
11888 optab3
= vec_unpacks_sbool_lo_optab
;
11889 optab4
= vec_unpacks_sbool_hi_optab
;
11893 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11894 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
11897 if (!optab3
|| !optab4
11898 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
11899 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
11900 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
11901 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
11902 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
11903 == CODE_FOR_nothing
)
11904 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
11905 == CODE_FOR_nothing
))
11908 interm_types
->quick_push (intermediate_type
);
11909 (*multi_step_cvt
)++;
11911 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11912 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11914 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11916 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
11917 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11921 prev_type
= intermediate_type
;
11922 prev_mode
= intermediate_mode
;
11925 interm_types
->release ();
11930 /* Function supportable_narrowing_operation
11932 Check whether an operation represented by the code CODE is a
11933 narrowing operation that is supported by the target platform in
11934 vector form (i.e., when operating on arguments of type VECTYPE_IN
11935 and producing a result of type VECTYPE_OUT).
11937 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11938 and FLOAT. This function checks if these operations are supported by
11939 the target platform directly via vector tree-codes.
11942 - CODE1 is the code of a vector operation to be used when
11943 vectorizing the operation, if available.
11944 - MULTI_STEP_CVT determines the number of required intermediate steps in
11945 case of multi-step conversion (like int->short->char - in that case
11946 MULTI_STEP_CVT will be 1).
11947 - INTERM_TYPES contains the intermediate type required to perform the
11948 narrowing operation (short in the above example). */
11951 supportable_narrowing_operation (enum tree_code code
,
11952 tree vectype_out
, tree vectype_in
,
11953 enum tree_code
*code1
, int *multi_step_cvt
,
11954 vec
<tree
> *interm_types
)
11956 machine_mode vec_mode
;
11957 enum insn_code icode1
;
11958 optab optab1
, interm_optab
;
11959 tree vectype
= vectype_in
;
11960 tree narrow_vectype
= vectype_out
;
11962 tree intermediate_type
, prev_type
;
11963 machine_mode intermediate_mode
, prev_mode
;
11967 *multi_step_cvt
= 0;
11971 c1
= VEC_PACK_TRUNC_EXPR
;
11972 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
11973 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11974 && TYPE_MODE (narrow_vectype
) == TYPE_MODE (vectype
)
11975 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11976 optab1
= vec_pack_sbool_trunc_optab
;
11978 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11981 case FIX_TRUNC_EXPR
:
11982 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
11983 /* The signedness is determined from output operand. */
11984 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11988 c1
= VEC_PACK_FLOAT_EXPR
;
11989 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11993 gcc_unreachable ();
11999 vec_mode
= TYPE_MODE (vectype
);
12000 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
12005 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12007 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12009 /* For scalar masks we may have different boolean
12010 vector types having the same QImode. Thus we
12011 add additional check for elements number. */
12012 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
12013 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12017 if (code
== FLOAT_EXPR
)
12020 /* Check if it's a multi-step conversion that can be done using intermediate
12022 prev_mode
= vec_mode
;
12023 prev_type
= vectype
;
12024 if (code
== FIX_TRUNC_EXPR
)
12025 uns
= TYPE_UNSIGNED (vectype_out
);
12027 uns
= TYPE_UNSIGNED (vectype
);
12029 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12030 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12031 costly than signed. */
12032 if (code
== FIX_TRUNC_EXPR
&& uns
)
12034 enum insn_code icode2
;
12037 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
12039 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12040 if (interm_optab
!= unknown_optab
12041 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
12042 && insn_data
[icode1
].operand
[0].mode
12043 == insn_data
[icode2
].operand
[0].mode
)
12046 optab1
= interm_optab
;
12051 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12052 intermediate steps in promotion sequence. We try
12053 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12054 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12055 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12057 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12058 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12060 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
12063 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
12064 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12065 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12066 && intermediate_mode
== prev_mode
12067 && SCALAR_INT_MODE_P (prev_mode
))
12068 interm_optab
= vec_pack_sbool_trunc_optab
;
12071 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
12074 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
12075 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12076 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
12077 == CODE_FOR_nothing
))
12080 interm_types
->quick_push (intermediate_type
);
12081 (*multi_step_cvt
)++;
12083 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12085 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12087 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
12088 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12092 prev_mode
= intermediate_mode
;
12093 prev_type
= intermediate_type
;
12094 optab1
= interm_optab
;
12097 interm_types
->release ();
12101 /* Generate and return a vector mask of MASK_TYPE such that
12102 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12103 Add the statements to SEQ. */
12106 vect_gen_while (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12107 tree end_index
, const char *name
)
12109 tree cmp_type
= TREE_TYPE (start_index
);
12110 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
12111 cmp_type
, mask_type
,
12112 OPTIMIZE_FOR_SPEED
));
12113 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
12114 start_index
, end_index
,
12115 build_zero_cst (mask_type
));
12118 tmp
= make_temp_ssa_name (mask_type
, NULL
, name
);
12120 tmp
= make_ssa_name (mask_type
);
12121 gimple_call_set_lhs (call
, tmp
);
12122 gimple_seq_add_stmt (seq
, call
);
12126 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12127 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12130 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12133 tree tmp
= vect_gen_while (seq
, mask_type
, start_index
, end_index
);
12134 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
12137 /* Try to compute the vector types required to vectorize STMT_INFO,
12138 returning true on success and false if vectorization isn't possible.
12139 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12140 take sure that the number of elements in the vectors is no bigger
12145 - Set *STMT_VECTYPE_OUT to:
12146 - NULL_TREE if the statement doesn't need to be vectorized;
12147 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12149 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12150 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12151 statement does not help to determine the overall number of units. */
12154 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12155 tree
*stmt_vectype_out
,
12156 tree
*nunits_vectype_out
,
12157 unsigned int group_size
)
12159 gimple
*stmt
= stmt_info
->stmt
;
12161 /* For BB vectorization, we should always have a group size once we've
12162 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12163 are tentative requests during things like early data reference
12164 analysis and pattern recognition. */
12165 if (is_a
<bb_vec_info
> (vinfo
))
12166 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
12170 *stmt_vectype_out
= NULL_TREE
;
12171 *nunits_vectype_out
= NULL_TREE
;
12173 if (gimple_get_lhs (stmt
) == NULL_TREE
12174 /* MASK_STORE has no lhs, but is ok. */
12175 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12177 if (is_a
<gcall
*> (stmt
))
12179 /* Ignore calls with no lhs. These must be calls to
12180 #pragma omp simd functions, and what vectorization factor
12181 it really needs can't be determined until
12182 vectorizable_simd_clone_call. */
12183 if (dump_enabled_p ())
12184 dump_printf_loc (MSG_NOTE
, vect_location
,
12185 "defer to SIMD clone analysis.\n");
12186 return opt_result::success ();
12189 return opt_result::failure_at (stmt
,
12190 "not vectorized: irregular stmt.%G", stmt
);
12194 tree scalar_type
= NULL_TREE
;
12195 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
12197 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12198 if (dump_enabled_p ())
12199 dump_printf_loc (MSG_NOTE
, vect_location
,
12200 "precomputed vectype: %T\n", vectype
);
12202 else if (vect_use_mask_type_p (stmt_info
))
12204 unsigned int precision
= stmt_info
->mask_precision
;
12205 scalar_type
= build_nonstandard_integer_type (precision
, 1);
12206 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
12208 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
12209 " data-type %T\n", scalar_type
);
12210 if (dump_enabled_p ())
12211 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12215 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
12216 scalar_type
= TREE_TYPE (DR_REF (dr
));
12217 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12218 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
12220 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
12222 if (dump_enabled_p ())
12225 dump_printf_loc (MSG_NOTE
, vect_location
,
12226 "get vectype for scalar type (group size %d):"
12227 " %T\n", group_size
, scalar_type
);
12229 dump_printf_loc (MSG_NOTE
, vect_location
,
12230 "get vectype for scalar type: %T\n", scalar_type
);
12232 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
12234 return opt_result::failure_at (stmt
,
12236 " unsupported data-type %T\n",
12239 if (dump_enabled_p ())
12240 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12243 if (scalar_type
&& VECTOR_MODE_P (TYPE_MODE (scalar_type
)))
12244 return opt_result::failure_at (stmt
,
12245 "not vectorized: vector stmt in loop:%G",
12248 *stmt_vectype_out
= vectype
;
12250 /* Don't try to compute scalar types if the stmt produces a boolean
12251 vector; use the existing vector type instead. */
12252 tree nunits_vectype
= vectype
;
12253 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12255 /* The number of units is set according to the smallest scalar
12256 type (or the largest vector size, but we only support one
12257 vector size per vectorization). */
12258 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
12259 TREE_TYPE (vectype
));
12260 if (scalar_type
!= TREE_TYPE (vectype
))
12262 if (dump_enabled_p ())
12263 dump_printf_loc (MSG_NOTE
, vect_location
,
12264 "get vectype for smallest scalar type: %T\n",
12266 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12268 if (!nunits_vectype
)
12269 return opt_result::failure_at
12270 (stmt
, "not vectorized: unsupported data-type %T\n",
12272 if (dump_enabled_p ())
12273 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
12278 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
12279 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)))
12280 return opt_result::failure_at (stmt
,
12281 "Not vectorized: Incompatible number "
12282 "of vector subparts between %T and %T\n",
12283 nunits_vectype
, *stmt_vectype_out
);
12285 if (dump_enabled_p ())
12287 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12288 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12289 dump_printf (MSG_NOTE
, "\n");
12292 *nunits_vectype_out
= nunits_vectype
;
12293 return opt_result::success ();
12296 /* Generate and return statement sequence that sets vector length LEN that is:
12298 min_of_start_and_end = min (START_INDEX, END_INDEX);
12299 left_len = END_INDEX - min_of_start_and_end;
12300 rhs = min (left_len, LEN_LIMIT);
12303 Note: the cost of the code generated by this function is modeled
12304 by vect_estimate_min_profitable_iters, so changes here may need
12305 corresponding changes there. */
12308 vect_gen_len (tree len
, tree start_index
, tree end_index
, tree len_limit
)
12310 gimple_seq stmts
= NULL
;
12311 tree len_type
= TREE_TYPE (len
);
12312 gcc_assert (TREE_TYPE (start_index
) == len_type
);
12314 tree min
= gimple_build (&stmts
, MIN_EXPR
, len_type
, start_index
, end_index
);
12315 tree left_len
= gimple_build (&stmts
, MINUS_EXPR
, len_type
, end_index
, min
);
12316 tree rhs
= gimple_build (&stmts
, MIN_EXPR
, len_type
, left_len
, len_limit
);
12317 gimple
* stmt
= gimple_build_assign (len
, rhs
);
12318 gimple_seq_add_stmt (&stmts
, stmt
);