1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
95 tree vectype
, int misalign
,
96 enum vect_cost_model_location where
)
98 if ((kind
== vector_load
|| kind
== unaligned_load
)
99 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
100 kind
= vector_gather_load
;
101 if ((kind
== vector_store
|| kind
== unaligned_store
)
102 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
103 kind
= vector_scatter_store
;
105 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, vectype
, misalign
};
106 body_cost_vec
->safe_push (si
);
109 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
117 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
127 read_vector_array (vec_info
*vinfo
,
128 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
129 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
131 tree vect_type
, vect
, vect_name
, array_ref
;
134 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
135 vect_type
= TREE_TYPE (TREE_TYPE (array
));
136 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
137 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
138 build_int_cst (size_type_node
, n
),
139 NULL_TREE
, NULL_TREE
);
141 new_stmt
= gimple_build_assign (vect
, array_ref
);
142 vect_name
= make_ssa_name (vect
, new_stmt
);
143 gimple_assign_set_lhs (new_stmt
, vect_name
);
144 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
154 write_vector_array (vec_info
*vinfo
,
155 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
156 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
161 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
162 build_int_cst (size_type_node
, n
),
163 NULL_TREE
, NULL_TREE
);
165 new_stmt
= gimple_build_assign (array_ref
, vect
);
166 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
174 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
178 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
188 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
189 gimple_stmt_iterator
*gsi
, tree var
)
191 tree clobber
= build_clobber (TREE_TYPE (var
));
192 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
193 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
203 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
204 enum vect_relevant relevant
, bool live_p
)
206 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
207 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE
, vect_location
,
211 "mark relevant %d, live %d: %G", relevant
, live_p
,
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE
, vect_location
,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info
= stmt_info
;
230 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
232 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
233 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
236 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
237 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
238 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
240 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE
, vect_location
,
245 "already marked relevant/live.\n");
249 worklist
->safe_push (stmt_info
);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
259 loop_vec_info loop_vinfo
)
264 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
268 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
270 enum vect_def_type dt
= vect_uninitialized_def
;
272 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
276 "use not simple.\n");
280 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
299 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
300 enum vect_relevant
*relevant
, bool *live_p
)
302 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
304 imm_use_iterator imm_iter
;
308 *relevant
= vect_unused_in_scope
;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info
->stmt
)
313 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
314 *relevant
= vect_used_in_scope
;
316 /* changing memory. */
317 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
318 if (gimple_vdef (stmt_info
->stmt
)
319 && !gimple_clobber_p (stmt_info
->stmt
))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE
, vect_location
,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant
= vect_used_in_scope
;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
330 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
332 basic_block bb
= gimple_bb (USE_STMT (use_p
));
333 if (!flow_bb_inside_loop_p (loop
, bb
))
335 if (is_gimple_debug (USE_STMT (use_p
)))
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE
, vect_location
,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
345 gcc_assert (bb
== single_exit (loop
)->dest
);
352 if (*live_p
&& *relevant
== vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE
, vect_location
,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant
= vect_used_only_live
;
361 return (*live_p
|| *relevant
);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
371 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info
))
381 /* STMT has a data_ref. FORNOW this means that its of one of
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
395 if (!assign
|| !gimple_assign_copy_p (assign
))
397 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
398 if (call
&& gimple_call_internal_p (call
))
400 internal_fn ifn
= gimple_call_internal_fn (call
);
401 int mask_index
= internal_fn_mask_index (ifn
);
403 && use
== gimple_call_arg (call
, mask_index
))
405 int stored_value_index
= internal_fn_stored_value_index (ifn
);
406 if (stored_value_index
>= 0
407 && use
== gimple_call_arg (call
, stored_value_index
))
409 if (internal_gather_scatter_fn_p (ifn
)
410 && use
== gimple_call_arg (call
, 1))
416 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
418 operand
= gimple_assign_rhs1 (assign
);
419 if (TREE_CODE (operand
) != SSA_NAME
)
430 Function process_use.
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
457 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
458 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
461 stmt_vec_info dstmt_vinfo
;
462 enum vect_def_type dt
;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
470 return opt_result::failure_at (stmt_vinfo
->stmt
,
472 " unsupported use in stmt.\n");
475 return opt_result::success ();
477 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
478 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
485 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
487 && bb
->loop_father
== def_bb
->loop_father
)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
503 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE
, vect_location
,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
511 case vect_unused_in_scope
:
512 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
513 vect_used_in_scope
: vect_unused_in_scope
;
516 case vect_used_in_outer_by_reduction
:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
518 relevant
= vect_used_by_reduction
;
521 case vect_used_in_outer
:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
523 relevant
= vect_used_in_scope
;
526 case vect_used_in_scope
:
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
541 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE
, vect_location
,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
549 case vect_unused_in_scope
:
550 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
552 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
555 case vect_used_by_reduction
:
556 case vect_used_only_live
:
557 relevant
= vect_used_in_outer_by_reduction
;
560 case vect_used_in_scope
:
561 relevant
= vect_used_in_outer
;
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
572 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
576 loop_latch_edge (bb
->loop_father
))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE
, vect_location
,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
610 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
611 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
612 unsigned int nbbs
= loop
->num_nodes
;
613 gimple_stmt_iterator si
;
617 enum vect_relevant relevant
;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec
<stmt_vec_info
, 64> worklist
;
623 /* 1. Init worklist. */
624 for (i
= 0; i
< nbbs
; i
++)
627 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
629 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
634 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
635 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
637 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
639 if (is_gimple_debug (gsi_stmt (si
)))
641 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE
, vect_location
,
644 "init: stmt relevant? %G", stmt_info
->stmt
);
646 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
647 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
651 /* 2. Process_worklist */
652 while (worklist
.length () > 0)
657 stmt_vec_info stmt_vinfo
= worklist
.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE
, vect_location
,
660 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
665 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
680 case vect_reduction_def
:
681 gcc_assert (relevant
!= vect_unused_in_scope
);
682 if (relevant
!= vect_unused_in_scope
683 && relevant
!= vect_used_in_scope
684 && relevant
!= vect_used_by_reduction
685 && relevant
!= vect_used_only_live
)
686 return opt_result::failure_at
687 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
690 case vect_nested_cycle
:
691 if (relevant
!= vect_unused_in_scope
692 && relevant
!= vect_used_in_outer_by_reduction
693 && relevant
!= vect_used_in_outer
)
694 return opt_result::failure_at
695 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
698 case vect_double_reduction_def
:
699 if (relevant
!= vect_unused_in_scope
700 && relevant
!= vect_used_by_reduction
701 && relevant
!= vect_used_only_live
)
702 return opt_result::failure_at
703 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
710 if (is_pattern_stmt_p (stmt_vinfo
))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
717 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
718 tree op
= gimple_assign_rhs1 (assign
);
721 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
724 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
725 loop_vinfo
, relevant
, &worklist
, false);
728 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
729 loop_vinfo
, relevant
, &worklist
, false);
734 for (; i
< gimple_num_ops (assign
); i
++)
736 op
= gimple_op (assign
, i
);
737 if (TREE_CODE (op
) == SSA_NAME
)
740 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
747 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
749 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
751 tree arg
= gimple_call_arg (call
, i
);
753 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
761 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
763 tree op
= USE_FROM_PTR (use_p
);
765 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
773 gather_scatter_info gs_info
;
774 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
777 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
786 } /* while worklist */
788 return opt_result::success ();
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
798 vect_model_simple_cost (vec_info
*,
799 stmt_vec_info stmt_info
, int ncopies
,
800 enum vect_def_type
*dt
,
803 stmt_vector_for_cost
*cost_vec
,
804 vect_cost_for_stmt kind
= vector_stmt
)
806 int inside_cost
= 0, prologue_cost
= 0;
808 gcc_assert (cost_vec
!= NULL
);
810 /* ??? Somehow we need to fix this at the callers. */
812 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
818 for (int i
= 0; i
< ndts
; i
++)
819 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
820 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
821 stmt_info
, 0, vect_prologue
);
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
825 stmt_info
, 0, vect_body
);
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE
, vect_location
,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. If WIDEN_ARITH
840 is true the stmt is doing widening arithmetic. */
843 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
844 enum vect_def_type
*dt
,
845 unsigned int ncopies
, int pwr
,
846 stmt_vector_for_cost
*cost_vec
,
850 int inside_cost
= 0, prologue_cost
= 0;
852 for (i
= 0; i
< pwr
+ 1; i
++)
854 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
,
856 ? vector_stmt
: vec_promote_demote
,
857 stmt_info
, 0, vect_body
);
861 /* FORNOW: Assuming maximum 2 args per stmts. */
862 for (i
= 0; i
< 2; i
++)
863 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
864 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
865 stmt_info
, 0, vect_prologue
);
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE
, vect_location
,
869 "vect_model_promotion_demotion_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
873 /* Returns true if the current function returns DECL. */
876 cfun_returns (tree decl
)
880 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
882 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
885 if (gimple_return_retval (ret
) == decl
)
887 /* We often end up with an aggregate copy to the result decl,
888 handle that case as well. First skip intermediate clobbers
893 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
895 while (gimple_clobber_p (def
));
896 if (is_a
<gassign
*> (def
)
897 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
898 && gimple_assign_rhs1 (def
) == decl
)
904 /* Function vect_model_store_cost
906 Models cost for stores. In the case of grouped accesses, one access
907 has the overhead of the grouped access attributed to it. */
910 vect_model_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
911 vect_memory_access_type memory_access_type
,
912 dr_alignment_support alignment_support_scheme
,
914 vec_load_store_type vls_type
, slp_tree slp_node
,
915 stmt_vector_for_cost
*cost_vec
)
917 unsigned int inside_cost
= 0, prologue_cost
= 0;
918 stmt_vec_info first_stmt_info
= stmt_info
;
919 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
921 /* ??? Somehow we need to fix this at the callers. */
923 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
925 if (vls_type
== VLS_STORE_INVARIANT
)
928 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
929 stmt_info
, 0, vect_prologue
);
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node
&& grouped_access_p
)
935 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
947 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
949 /* Uses a high and low interleave or shuffle operations for each
951 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
952 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
953 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
954 stmt_info
, 0, vect_body
);
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE
, vect_location
,
958 "vect_model_store_cost: strided group_size = %d .\n",
962 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
963 /* Costs of the stores. */
964 if (memory_access_type
== VMAT_ELEMENTWISE
965 || memory_access_type
== VMAT_GATHER_SCATTER
)
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
969 inside_cost
+= record_stmt_cost (cost_vec
,
970 ncopies
* assumed_nunits
,
971 scalar_store
, stmt_info
, 0, vect_body
);
974 vect_get_store_cost (vinfo
, stmt_info
, ncopies
, alignment_support_scheme
,
975 misalignment
, &inside_cost
, cost_vec
);
977 if (memory_access_type
== VMAT_ELEMENTWISE
978 || memory_access_type
== VMAT_STRIDED_SLP
)
980 /* N scalar stores plus extracting the elements. */
981 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
982 inside_cost
+= record_stmt_cost (cost_vec
,
983 ncopies
* assumed_nunits
,
984 vec_to_scalar
, stmt_info
, 0, vect_body
);
987 /* When vectorizing a store into the function result assign
988 a penalty if the function returns in a multi-register location.
989 In this case we assume we'll end up with having to spill the
990 vector result and do piecewise loads as a conservative estimate. */
991 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
993 && (TREE_CODE (base
) == RESULT_DECL
994 || (DECL_P (base
) && cfun_returns (base
)))
995 && !aggregate_value_p (base
, cfun
->decl
))
997 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
998 /* ??? Handle PARALLEL in some way. */
1001 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1002 /* Assume that a single reg-reg move is possible and cheap,
1003 do not account for vector to gp register move cost. */
1007 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1009 stmt_info
, 0, vect_epilogue
);
1011 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1013 stmt_info
, 0, vect_epilogue
);
1018 if (dump_enabled_p ())
1019 dump_printf_loc (MSG_NOTE
, vect_location
,
1020 "vect_model_store_cost: inside_cost = %d, "
1021 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1025 /* Calculate cost of DR's memory access. */
1027 vect_get_store_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1028 dr_alignment_support alignment_support_scheme
,
1030 unsigned int *inside_cost
,
1031 stmt_vector_for_cost
*body_cost_vec
)
1033 switch (alignment_support_scheme
)
1037 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1038 vector_store
, stmt_info
, 0,
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_NOTE
, vect_location
,
1043 "vect_model_store_cost: aligned.\n");
1047 case dr_unaligned_supported
:
1049 /* Here, we assign an additional cost for the unaligned store. */
1050 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1051 unaligned_store
, stmt_info
,
1052 misalignment
, vect_body
);
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE
, vect_location
,
1055 "vect_model_store_cost: unaligned supported by "
1060 case dr_unaligned_unsupported
:
1062 *inside_cost
= VECT_MAX_COST
;
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1066 "vect_model_store_cost: unsupported access.\n");
1076 /* Function vect_model_load_cost
1078 Models cost for loads. In the case of grouped accesses, one access has
1079 the overhead of the grouped access attributed to it. Since unaligned
1080 accesses are supported for loads, we also account for the costs of the
1081 access scheme chosen. */
1084 vect_model_load_cost (vec_info
*vinfo
,
1085 stmt_vec_info stmt_info
, unsigned ncopies
, poly_uint64 vf
,
1086 vect_memory_access_type memory_access_type
,
1087 dr_alignment_support alignment_support_scheme
,
1089 gather_scatter_info
*gs_info
,
1091 stmt_vector_for_cost
*cost_vec
)
1093 unsigned int inside_cost
= 0, prologue_cost
= 0;
1094 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1096 gcc_assert (cost_vec
);
1098 /* ??? Somehow we need to fix this at the callers. */
1100 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1102 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1104 /* If the load is permuted then the alignment is determined by
1105 the first group element not by the first scalar stmt DR. */
1106 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1107 /* Record the cost for the permutation. */
1108 unsigned n_perms
, n_loads
;
1109 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
,
1110 vf
, true, &n_perms
, &n_loads
);
1111 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1112 first_stmt_info
, 0, vect_body
);
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1119 /* Grouped loads read all elements in the group at once,
1120 so we want the DR for the first statement. */
1121 stmt_vec_info first_stmt_info
= stmt_info
;
1122 if (!slp_node
&& grouped_access_p
)
1123 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1125 /* True if we should include any once-per-group costs as well as
1126 the cost of the statement itself. For SLP we only get called
1127 once per group anyhow. */
1128 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1130 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1131 ones we actually need. Account for the cost of unused results. */
1132 if (first_stmt_p
&& !slp_node
&& memory_access_type
== VMAT_LOAD_STORE_LANES
)
1134 unsigned int gaps
= DR_GROUP_SIZE (first_stmt_info
);
1135 stmt_vec_info next_stmt_info
= first_stmt_info
;
1139 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
1141 while (next_stmt_info
);
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_NOTE
, vect_location
,
1146 "vect_model_load_cost: %d unused vectors.\n",
1148 vect_get_load_cost (vinfo
, stmt_info
, ncopies
* gaps
,
1149 alignment_support_scheme
, misalignment
, false,
1150 &inside_cost
, &prologue_cost
,
1151 cost_vec
, cost_vec
, true);
1155 /* We assume that the cost of a single load-lanes instruction is
1156 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1157 access is instead being provided by a load-and-permute operation,
1158 include the cost of the permutes. */
1160 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1162 /* Uses an even and odd extract operations or shuffle operations
1163 for each needed permute. */
1164 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1165 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1166 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1167 stmt_info
, 0, vect_body
);
1169 if (dump_enabled_p ())
1170 dump_printf_loc (MSG_NOTE
, vect_location
,
1171 "vect_model_load_cost: strided group_size = %d .\n",
1175 /* The loads themselves. */
1176 if (memory_access_type
== VMAT_ELEMENTWISE
1177 || memory_access_type
== VMAT_GATHER_SCATTER
)
1179 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1180 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1181 if (memory_access_type
== VMAT_GATHER_SCATTER
1182 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
1183 /* For emulated gathers N offset vector element extracts
1184 (we assume the scalar scaling and ptr + offset add is consumed by
1186 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
* assumed_nunits
,
1187 vec_to_scalar
, stmt_info
, 0,
1189 /* N scalar loads plus gathering them into a vector. */
1190 inside_cost
+= record_stmt_cost (cost_vec
,
1191 ncopies
* assumed_nunits
,
1192 scalar_load
, stmt_info
, 0, vect_body
);
1194 else if (memory_access_type
== VMAT_INVARIANT
)
1196 /* Invariant loads will ideally be hoisted and splat to a vector. */
1197 prologue_cost
+= record_stmt_cost (cost_vec
, 1,
1198 scalar_load
, stmt_info
, 0,
1200 prologue_cost
+= record_stmt_cost (cost_vec
, 1,
1201 scalar_to_vec
, stmt_info
, 0,
1205 vect_get_load_cost (vinfo
, stmt_info
, ncopies
,
1206 alignment_support_scheme
, misalignment
, first_stmt_p
,
1207 &inside_cost
, &prologue_cost
,
1208 cost_vec
, cost_vec
, true);
1209 if (memory_access_type
== VMAT_ELEMENTWISE
1210 || memory_access_type
== VMAT_STRIDED_SLP
1211 || (memory_access_type
== VMAT_GATHER_SCATTER
1212 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
))
1213 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1214 stmt_info
, 0, vect_body
);
1216 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_NOTE
, vect_location
,
1218 "vect_model_load_cost: inside_cost = %d, "
1219 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1223 /* Calculate cost of DR's memory access. */
1225 vect_get_load_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1226 dr_alignment_support alignment_support_scheme
,
1228 bool add_realign_cost
, unsigned int *inside_cost
,
1229 unsigned int *prologue_cost
,
1230 stmt_vector_for_cost
*prologue_cost_vec
,
1231 stmt_vector_for_cost
*body_cost_vec
,
1232 bool record_prologue_costs
)
1234 switch (alignment_support_scheme
)
1238 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1239 stmt_info
, 0, vect_body
);
1241 if (dump_enabled_p ())
1242 dump_printf_loc (MSG_NOTE
, vect_location
,
1243 "vect_model_load_cost: aligned.\n");
1247 case dr_unaligned_supported
:
1249 /* Here, we assign an additional cost for the unaligned load. */
1250 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1251 unaligned_load
, stmt_info
,
1252 misalignment
, vect_body
);
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE
, vect_location
,
1256 "vect_model_load_cost: unaligned supported by "
1261 case dr_explicit_realign
:
1263 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1264 vector_load
, stmt_info
, 0, vect_body
);
1265 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1266 vec_perm
, stmt_info
, 0, vect_body
);
1268 /* FIXME: If the misalignment remains fixed across the iterations of
1269 the containing loop, the following cost should be added to the
1271 if (targetm
.vectorize
.builtin_mask_for_load
)
1272 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1273 stmt_info
, 0, vect_body
);
1275 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE
, vect_location
,
1277 "vect_model_load_cost: explicit realign\n");
1281 case dr_explicit_realign_optimized
:
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE
, vect_location
,
1285 "vect_model_load_cost: unaligned software "
1288 /* Unaligned software pipeline has a load of an address, an initial
1289 load, and possibly a mask operation to "prime" the loop. However,
1290 if this is an access in a group of loads, which provide grouped
1291 access, then the above cost should only be considered for one
1292 access in the group. Inside the loop, there is a load op
1293 and a realignment op. */
1295 if (add_realign_cost
&& record_prologue_costs
)
1297 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1298 vector_stmt
, stmt_info
,
1300 if (targetm
.vectorize
.builtin_mask_for_load
)
1301 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1302 vector_stmt
, stmt_info
,
1306 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1307 stmt_info
, 0, vect_body
);
1308 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1309 stmt_info
, 0, vect_body
);
1311 if (dump_enabled_p ())
1312 dump_printf_loc (MSG_NOTE
, vect_location
,
1313 "vect_model_load_cost: explicit realign optimized"
1319 case dr_unaligned_unsupported
:
1321 *inside_cost
= VECT_MAX_COST
;
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1325 "vect_model_load_cost: unsupported access.\n");
1334 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1335 the loop preheader for the vectorized stmt STMT_VINFO. */
1338 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1339 gimple_stmt_iterator
*gsi
)
1342 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1344 vinfo
->insert_on_entry (stmt_vinfo
, new_stmt
);
1346 if (dump_enabled_p ())
1347 dump_printf_loc (MSG_NOTE
, vect_location
,
1348 "created new init_stmt: %G", new_stmt
);
1351 /* Function vect_init_vector.
1353 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1354 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1355 vector type a vector with all elements equal to VAL is created first.
1356 Place the initialization at GSI if it is not NULL. Otherwise, place the
1357 initialization at the loop preheader.
1358 Return the DEF of INIT_STMT.
1359 It will be used in the vectorization of STMT_INFO. */
1362 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1363 gimple_stmt_iterator
*gsi
)
1368 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1369 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1371 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1372 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1374 /* Scalar boolean value should be transformed into
1375 all zeros or all ones value before building a vector. */
1376 if (VECTOR_BOOLEAN_TYPE_P (type
))
1378 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1379 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1381 if (CONSTANT_CLASS_P (val
))
1382 val
= integer_zerop (val
) ? false_val
: true_val
;
1385 new_temp
= make_ssa_name (TREE_TYPE (type
));
1386 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1387 val
, true_val
, false_val
);
1388 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1394 gimple_seq stmts
= NULL
;
1395 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1396 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1397 TREE_TYPE (type
), val
);
1399 /* ??? Condition vectorization expects us to do
1400 promotion of invariant/external defs. */
1401 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1402 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1403 !gsi_end_p (gsi2
); )
1405 init_stmt
= gsi_stmt (gsi2
);
1406 gsi_remove (&gsi2
, false);
1407 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1411 val
= build_vector_from_val (type
, val
);
1414 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1415 init_stmt
= gimple_build_assign (new_temp
, val
);
1416 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1421 /* Function vect_get_vec_defs_for_operand.
1423 OP is an operand in STMT_VINFO. This function returns a vector of
1424 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1426 In the case that OP is an SSA_NAME which is defined in the loop, then
1427 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1429 In case OP is an invariant or constant, a new stmt that creates a vector def
1430 needs to be introduced. VECTYPE may be used to specify a required type for
1431 vector invariant. */
1434 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1436 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1439 enum vect_def_type dt
;
1441 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1443 if (dump_enabled_p ())
1444 dump_printf_loc (MSG_NOTE
, vect_location
,
1445 "vect_get_vec_defs_for_operand: %T\n", op
);
1447 stmt_vec_info def_stmt_info
;
1448 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1449 &def_stmt_info
, &def_stmt
);
1450 gcc_assert (is_simple_use
);
1451 if (def_stmt
&& dump_enabled_p ())
1452 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1454 vec_oprnds
->create (ncopies
);
1455 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1457 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1461 vector_type
= vectype
;
1462 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1463 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1464 vector_type
= truth_type_for (stmt_vectype
);
1466 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1468 gcc_assert (vector_type
);
1469 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1471 vec_oprnds
->quick_push (vop
);
1475 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1476 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1477 for (unsigned i
= 0; i
< ncopies
; ++i
)
1478 vec_oprnds
->quick_push (gimple_get_lhs
1479 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1484 /* Get vectorized definitions for OP0 and OP1. */
1487 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1489 tree op0
, vec
<tree
> *vec_oprnds0
, tree vectype0
,
1490 tree op1
, vec
<tree
> *vec_oprnds1
, tree vectype1
,
1491 tree op2
, vec
<tree
> *vec_oprnds2
, tree vectype2
,
1492 tree op3
, vec
<tree
> *vec_oprnds3
, tree vectype3
)
1497 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1499 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1501 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1503 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1508 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1509 op0
, vec_oprnds0
, vectype0
);
1511 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1512 op1
, vec_oprnds1
, vectype1
);
1514 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1515 op2
, vec_oprnds2
, vectype2
);
1517 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1518 op3
, vec_oprnds3
, vectype3
);
1523 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1525 tree op0
, vec
<tree
> *vec_oprnds0
,
1526 tree op1
, vec
<tree
> *vec_oprnds1
,
1527 tree op2
, vec
<tree
> *vec_oprnds2
,
1528 tree op3
, vec
<tree
> *vec_oprnds3
)
1530 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1531 op0
, vec_oprnds0
, NULL_TREE
,
1532 op1
, vec_oprnds1
, NULL_TREE
,
1533 op2
, vec_oprnds2
, NULL_TREE
,
1534 op3
, vec_oprnds3
, NULL_TREE
);
1537 /* Helper function called by vect_finish_replace_stmt and
1538 vect_finish_stmt_generation. Set the location of the new
1539 statement and create and return a stmt_vec_info for it. */
1542 vect_finish_stmt_generation_1 (vec_info
*,
1543 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1545 if (dump_enabled_p ())
1546 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1550 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1552 /* While EH edges will generally prevent vectorization, stmt might
1553 e.g. be in a must-not-throw region. Ensure newly created stmts
1554 that could throw are part of the same region. */
1555 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1556 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1557 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1560 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1563 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1564 which sets the same scalar result as STMT_INFO did. Create and return a
1565 stmt_vec_info for VEC_STMT. */
1568 vect_finish_replace_stmt (vec_info
*vinfo
,
1569 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1571 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1572 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1574 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1575 gsi_replace (&gsi
, vec_stmt
, true);
1577 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1580 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1581 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1584 vect_finish_stmt_generation (vec_info
*vinfo
,
1585 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1586 gimple_stmt_iterator
*gsi
)
1588 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1590 if (!gsi_end_p (*gsi
)
1591 && gimple_has_mem_ops (vec_stmt
))
1593 gimple
*at_stmt
= gsi_stmt (*gsi
);
1594 tree vuse
= gimple_vuse (at_stmt
);
1595 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1597 tree vdef
= gimple_vdef (at_stmt
);
1598 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1599 gimple_set_modified (vec_stmt
, true);
1600 /* If we have an SSA vuse and insert a store, update virtual
1601 SSA form to avoid triggering the renamer. Do so only
1602 if we can easily see all uses - which is what almost always
1603 happens with the way vectorized stmts are inserted. */
1604 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1605 && ((is_gimple_assign (vec_stmt
)
1606 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1607 || (is_gimple_call (vec_stmt
)
1608 && !(gimple_call_flags (vec_stmt
)
1609 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1611 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1612 gimple_set_vdef (vec_stmt
, new_vdef
);
1613 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1617 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1618 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1621 /* We want to vectorize a call to combined function CFN with function
1622 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1623 as the types of all inputs. Check whether this is possible using
1624 an internal function, returning its code if so or IFN_LAST if not. */
1627 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1628 tree vectype_out
, tree vectype_in
)
1631 if (internal_fn_p (cfn
))
1632 ifn
= as_internal_fn (cfn
);
1634 ifn
= associated_internal_fn (fndecl
);
1635 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1637 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1638 if (info
.vectorizable
)
1640 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1641 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1642 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1643 OPTIMIZE_FOR_SPEED
))
1651 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1652 gimple_stmt_iterator
*);
1654 /* Check whether a load or store statement in the loop described by
1655 LOOP_VINFO is possible in a loop using partial vectors. This is
1656 testing whether the vectorizer pass has the appropriate support,
1657 as well as whether the target does.
1659 VLS_TYPE says whether the statement is a load or store and VECTYPE
1660 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1661 says how the load or store is going to be implemented and GROUP_SIZE
1662 is the number of load or store statements in the containing group.
1663 If the access is a gather load or scatter store, GS_INFO describes
1664 its arguments. If the load or store is conditional, SCALAR_MASK is the
1665 condition under which it occurs.
1667 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1668 vectors is not supported, otherwise record the required rgroup control
1672 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo
, tree vectype
,
1673 vec_load_store_type vls_type
,
1675 vect_memory_access_type
1677 unsigned int ncopies
,
1678 gather_scatter_info
*gs_info
,
1681 /* Invariant loads need no special support. */
1682 if (memory_access_type
== VMAT_INVARIANT
)
1685 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1686 machine_mode vecmode
= TYPE_MODE (vectype
);
1687 bool is_load
= (vls_type
== VLS_LOAD
);
1688 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1691 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1692 : !vect_store_lanes_supported (vectype
, group_size
, true))
1694 if (dump_enabled_p ())
1695 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1696 "can't operate on partial vectors because"
1697 " the target doesn't have an appropriate"
1698 " load/store-lanes instruction.\n");
1699 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1702 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1706 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1708 internal_fn ifn
= (is_load
1709 ? IFN_MASK_GATHER_LOAD
1710 : IFN_MASK_SCATTER_STORE
);
1711 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1712 gs_info
->memory_type
,
1713 gs_info
->offset_vectype
,
1716 if (dump_enabled_p ())
1717 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1718 "can't operate on partial vectors because"
1719 " the target doesn't have an appropriate"
1720 " gather load or scatter store instruction.\n");
1721 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1724 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1728 if (memory_access_type
!= VMAT_CONTIGUOUS
1729 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1731 /* Element X of the data must come from iteration i * VF + X of the
1732 scalar loop. We need more work to support other mappings. */
1733 if (dump_enabled_p ())
1734 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1735 "can't operate on partial vectors because an"
1736 " access isn't contiguous.\n");
1737 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1741 if (!VECTOR_MODE_P (vecmode
))
1743 if (dump_enabled_p ())
1744 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1745 "can't operate on partial vectors when emulating"
1746 " vector operations.\n");
1747 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1751 /* We might load more scalars than we need for permuting SLP loads.
1752 We checked in get_group_load_store_type that the extra elements
1753 don't leak into a new vector. */
1754 auto get_valid_nvectors
= [] (poly_uint64 size
, poly_uint64 nunits
)
1756 unsigned int nvectors
;
1757 if (can_div_away_from_zero_p (size
, nunits
, &nvectors
))
1762 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1763 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1764 machine_mode mask_mode
;
1765 bool using_partial_vectors_p
= false;
1766 if (targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1767 && can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1769 unsigned int nvectors
= get_valid_nvectors (group_size
* vf
, nunits
);
1770 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1771 using_partial_vectors_p
= true;
1775 if (get_len_load_store_mode (vecmode
, is_load
).exists (&vmode
))
1777 unsigned int nvectors
= get_valid_nvectors (group_size
* vf
, nunits
);
1778 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
1779 unsigned factor
= (vecmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vecmode
);
1780 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, factor
);
1781 using_partial_vectors_p
= true;
1784 if (!using_partial_vectors_p
)
1786 if (dump_enabled_p ())
1787 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1788 "can't operate on partial vectors because the"
1789 " target doesn't have the appropriate partial"
1790 " vectorization load or store.\n");
1791 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1795 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1796 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1797 that needs to be applied to all loads and stores in a vectorized loop.
1798 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1799 otherwise return VEC_MASK & LOOP_MASK.
1801 MASK_TYPE is the type of both masks. If new statements are needed,
1802 insert them before GSI. */
1805 prepare_vec_mask (loop_vec_info loop_vinfo
, tree mask_type
, tree loop_mask
,
1806 tree vec_mask
, gimple_stmt_iterator
*gsi
)
1808 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1812 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1814 if (loop_vinfo
->vec_cond_masked_set
.contains ({ vec_mask
, loop_mask
}))
1817 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1818 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1819 vec_mask
, loop_mask
);
1821 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1825 /* Determine whether we can use a gather load or scatter store to vectorize
1826 strided load or store STMT_INFO by truncating the current offset to a
1827 smaller width. We need to be able to construct an offset vector:
1829 { 0, X, X*2, X*3, ... }
1831 without loss of precision, where X is STMT_INFO's DR_STEP.
1833 Return true if this is possible, describing the gather load or scatter
1834 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1837 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1838 loop_vec_info loop_vinfo
, bool masked_p
,
1839 gather_scatter_info
*gs_info
)
1841 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1842 data_reference
*dr
= dr_info
->dr
;
1843 tree step
= DR_STEP (dr
);
1844 if (TREE_CODE (step
) != INTEGER_CST
)
1846 /* ??? Perhaps we could use range information here? */
1847 if (dump_enabled_p ())
1848 dump_printf_loc (MSG_NOTE
, vect_location
,
1849 "cannot truncate variable step.\n");
1853 /* Get the number of bits in an element. */
1854 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1855 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1856 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1858 /* Set COUNT to the upper limit on the number of elements - 1.
1859 Start with the maximum vectorization factor. */
1860 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1862 /* Try lowering COUNT to the number of scalar latch iterations. */
1863 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1864 widest_int max_iters
;
1865 if (max_loop_iterations (loop
, &max_iters
)
1866 && max_iters
< count
)
1867 count
= max_iters
.to_shwi ();
1869 /* Try scales of 1 and the element size. */
1870 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1871 wi::overflow_type overflow
= wi::OVF_NONE
;
1872 for (int i
= 0; i
< 2; ++i
)
1874 int scale
= scales
[i
];
1876 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1879 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1880 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1883 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1884 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1886 /* Find the narrowest viable offset type. */
1887 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1888 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1891 /* See whether the target supports the operation with an offset
1892 no narrower than OFFSET_TYPE. */
1893 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1894 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1895 vectype
, memory_type
, offset_type
, scale
,
1896 &gs_info
->ifn
, &gs_info
->offset_vectype
)
1897 || gs_info
->ifn
== IFN_LAST
)
1900 gs_info
->decl
= NULL_TREE
;
1901 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1902 but we don't need to store that here. */
1903 gs_info
->base
= NULL_TREE
;
1904 gs_info
->element_type
= TREE_TYPE (vectype
);
1905 gs_info
->offset
= fold_convert (offset_type
, step
);
1906 gs_info
->offset_dt
= vect_constant_def
;
1907 gs_info
->scale
= scale
;
1908 gs_info
->memory_type
= memory_type
;
1912 if (overflow
&& dump_enabled_p ())
1913 dump_printf_loc (MSG_NOTE
, vect_location
,
1914 "truncating gather/scatter offset to %d bits"
1915 " might change its value.\n", element_bits
);
1920 /* Return true if we can use gather/scatter internal functions to
1921 vectorize STMT_INFO, which is a grouped or strided load or store.
1922 MASKED_P is true if load or store is conditional. When returning
1923 true, fill in GS_INFO with the information required to perform the
1927 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1928 loop_vec_info loop_vinfo
, bool masked_p
,
1929 gather_scatter_info
*gs_info
)
1931 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1932 || gs_info
->ifn
== IFN_LAST
)
1933 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1936 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1937 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1939 gcc_assert (TYPE_PRECISION (new_offset_type
)
1940 >= TYPE_PRECISION (old_offset_type
));
1941 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1943 if (dump_enabled_p ())
1944 dump_printf_loc (MSG_NOTE
, vect_location
,
1945 "using gather/scatter for strided/grouped access,"
1946 " scale = %d\n", gs_info
->scale
);
1951 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1952 elements with a known constant step. Return -1 if that step
1953 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1956 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
1958 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1959 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
1963 /* If the target supports a permute mask that reverses the elements in
1964 a vector of type VECTYPE, return that mask, otherwise return null. */
1967 perm_mask_for_reverse (tree vectype
)
1969 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1971 /* The encoding has a single stepped pattern. */
1972 vec_perm_builder
sel (nunits
, 1, 3);
1973 for (int i
= 0; i
< 3; ++i
)
1974 sel
.quick_push (nunits
- 1 - i
);
1976 vec_perm_indices
indices (sel
, 1, nunits
);
1977 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
1979 return vect_gen_perm_mask_checked (vectype
, indices
);
1982 /* A subroutine of get_load_store_type, with a subset of the same
1983 arguments. Handle the case where STMT_INFO is a load or store that
1984 accesses consecutive elements with a negative step. Sets *POFFSET
1985 to the offset to be applied to the DR for the first access. */
1987 static vect_memory_access_type
1988 get_negative_load_store_type (vec_info
*vinfo
,
1989 stmt_vec_info stmt_info
, tree vectype
,
1990 vec_load_store_type vls_type
,
1991 unsigned int ncopies
, poly_int64
*poffset
)
1993 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1994 dr_alignment_support alignment_support_scheme
;
1998 if (dump_enabled_p ())
1999 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2000 "multiple types with negative step.\n");
2001 return VMAT_ELEMENTWISE
;
2004 /* For backward running DRs the first access in vectype actually is
2005 N-1 elements before the address of the DR. */
2006 *poffset
= ((-TYPE_VECTOR_SUBPARTS (vectype
) + 1)
2007 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype
))));
2009 int misalignment
= dr_misalignment (dr_info
, vectype
, *poffset
);
2010 alignment_support_scheme
2011 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
, misalignment
);
2012 if (alignment_support_scheme
!= dr_aligned
2013 && alignment_support_scheme
!= dr_unaligned_supported
)
2015 if (dump_enabled_p ())
2016 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2017 "negative step but alignment required.\n");
2019 return VMAT_ELEMENTWISE
;
2022 if (vls_type
== VLS_STORE_INVARIANT
)
2024 if (dump_enabled_p ())
2025 dump_printf_loc (MSG_NOTE
, vect_location
,
2026 "negative step with invariant source;"
2027 " no permute needed.\n");
2028 return VMAT_CONTIGUOUS_DOWN
;
2031 if (!perm_mask_for_reverse (vectype
))
2033 if (dump_enabled_p ())
2034 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2035 "negative step and reversing not supported.\n");
2037 return VMAT_ELEMENTWISE
;
2040 return VMAT_CONTIGUOUS_REVERSE
;
2043 /* STMT_INFO is either a masked or unconditional store. Return the value
2047 vect_get_store_rhs (stmt_vec_info stmt_info
)
2049 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2051 gcc_assert (gimple_assign_single_p (assign
));
2052 return gimple_assign_rhs1 (assign
);
2054 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2056 internal_fn ifn
= gimple_call_internal_fn (call
);
2057 int index
= internal_fn_stored_value_index (ifn
);
2058 gcc_assert (index
>= 0);
2059 return gimple_call_arg (call
, index
);
2064 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2066 This function returns a vector type which can be composed with NETLS pieces,
2067 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2068 same vector size as the return vector. It checks target whether supports
2069 pieces-size vector mode for construction firstly, if target fails to, check
2070 pieces-size scalar mode for construction further. It returns NULL_TREE if
2071 fails to find the available composition.
2073 For example, for (vtype=V16QI, nelts=4), we can probably get:
2074 - V16QI with PTYPE V4QI.
2075 - V4SI with PTYPE SI.
2079 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
2081 gcc_assert (VECTOR_TYPE_P (vtype
));
2082 gcc_assert (known_gt (nelts
, 0U));
2084 machine_mode vmode
= TYPE_MODE (vtype
);
2085 if (!VECTOR_MODE_P (vmode
))
2088 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
2089 unsigned int pbsize
;
2090 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
2092 /* First check if vec_init optab supports construction from
2093 vector pieces directly. */
2094 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
2095 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
2097 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
2098 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
2099 != CODE_FOR_nothing
))
2101 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
2105 /* Otherwise check if exists an integer type of the same piece size and
2106 if vec_init optab supports construction from it directly. */
2107 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
2108 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
2109 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
2110 != CODE_FOR_nothing
))
2112 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
2113 return build_vector_type (*ptype
, nelts
);
2120 /* A subroutine of get_load_store_type, with a subset of the same
2121 arguments. Handle the case where STMT_INFO is part of a grouped load
2124 For stores, the statements in the group are all consecutive
2125 and there is no gap at the end. For loads, the statements in the
2126 group might not be consecutive; there can be gaps between statements
2127 as well as at the end. */
2130 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2131 tree vectype
, slp_tree slp_node
,
2132 bool masked_p
, vec_load_store_type vls_type
,
2133 vect_memory_access_type
*memory_access_type
,
2134 poly_int64
*poffset
,
2135 dr_alignment_support
*alignment_support_scheme
,
2137 gather_scatter_info
*gs_info
)
2139 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2140 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2141 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2142 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2143 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2144 bool single_element_p
= (stmt_info
== first_stmt_info
2145 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2146 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2147 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2149 /* True if the vectorized statements would access beyond the last
2150 statement in the group. */
2151 bool overrun_p
= false;
2153 /* True if we can cope with such overrun by peeling for gaps, so that
2154 there is at least one final scalar iteration after the vector loop. */
2155 bool can_overrun_p
= (!masked_p
2156 && vls_type
== VLS_LOAD
2160 /* There can only be a gap at the end of the group if the stride is
2161 known at compile time. */
2162 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2164 /* Stores can't yet have gaps. */
2165 gcc_assert (slp_node
|| vls_type
== VLS_LOAD
|| gap
== 0);
2169 /* For SLP vectorization we directly vectorize a subchain
2170 without permutation. */
2171 if (! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
2173 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node
)[0]);
2174 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2176 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2177 separated by the stride, until we have a complete vector.
2178 Fall back to scalar accesses if that isn't possible. */
2179 if (multiple_p (nunits
, group_size
))
2180 *memory_access_type
= VMAT_STRIDED_SLP
;
2182 *memory_access_type
= VMAT_ELEMENTWISE
;
2186 overrun_p
= loop_vinfo
&& gap
!= 0;
2187 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2189 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2190 "Grouped store with gaps requires"
2191 " non-consecutive accesses\n");
2194 /* An overrun is fine if the trailing elements are smaller
2195 than the alignment boundary B. Every vector access will
2196 be a multiple of B and so we are guaranteed to access a
2197 non-gap element in the same B-sized block. */
2199 && gap
< (vect_known_alignment_in_bytes (first_dr_info
,
2201 / vect_get_scalar_dr_size (first_dr_info
)))
2204 /* If the gap splits the vector in half and the target
2205 can do half-vector operations avoid the epilogue peeling
2206 by simply loading half of the vector only. Usually
2207 the construction with an upper zero half will be elided. */
2208 dr_alignment_support alss
;
2209 int misalign
= dr_misalignment (first_dr_info
, vectype
);
2213 && (((alss
= vect_supportable_dr_alignment (vinfo
, first_dr_info
,
2214 vectype
, misalign
)))
2216 || alss
== dr_unaligned_supported
)
2217 && known_eq (nunits
, (group_size
- gap
) * 2)
2218 && known_eq (nunits
, group_size
)
2219 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2223 if (overrun_p
&& !can_overrun_p
)
2225 if (dump_enabled_p ())
2226 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2227 "Peeling for outer loop is not supported\n");
2230 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2233 if (single_element_p
)
2234 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2235 only correct for single element "interleaving" SLP. */
2236 *memory_access_type
= get_negative_load_store_type
2237 (vinfo
, stmt_info
, vectype
, vls_type
, 1, poffset
);
2240 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2241 separated by the stride, until we have a complete vector.
2242 Fall back to scalar accesses if that isn't possible. */
2243 if (multiple_p (nunits
, group_size
))
2244 *memory_access_type
= VMAT_STRIDED_SLP
;
2246 *memory_access_type
= VMAT_ELEMENTWISE
;
2251 gcc_assert (!loop_vinfo
|| cmp
> 0);
2252 *memory_access_type
= VMAT_CONTIGUOUS
;
2258 /* We can always handle this case using elementwise accesses,
2259 but see if something more efficient is available. */
2260 *memory_access_type
= VMAT_ELEMENTWISE
;
2262 /* If there is a gap at the end of the group then these optimizations
2263 would access excess elements in the last iteration. */
2264 bool would_overrun_p
= (gap
!= 0);
2265 /* An overrun is fine if the trailing elements are smaller than the
2266 alignment boundary B. Every vector access will be a multiple of B
2267 and so we are guaranteed to access a non-gap element in the
2268 same B-sized block. */
2271 && gap
< (vect_known_alignment_in_bytes (first_dr_info
, vectype
)
2272 / vect_get_scalar_dr_size (first_dr_info
)))
2273 would_overrun_p
= false;
2275 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2276 && (can_overrun_p
|| !would_overrun_p
)
2277 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2279 /* First cope with the degenerate case of a single-element
2281 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2284 /* Otherwise try using LOAD/STORE_LANES. */
2285 else if (vls_type
== VLS_LOAD
2286 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2287 : vect_store_lanes_supported (vectype
, group_size
,
2290 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2291 overrun_p
= would_overrun_p
;
2294 /* If that fails, try using permuting loads. */
2295 else if (vls_type
== VLS_LOAD
2296 ? vect_grouped_load_supported (vectype
, single_element_p
,
2298 : vect_grouped_store_supported (vectype
, group_size
))
2300 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2301 overrun_p
= would_overrun_p
;
2305 /* As a last resort, trying using a gather load or scatter store.
2307 ??? Although the code can handle all group sizes correctly,
2308 it probably isn't a win to use separate strided accesses based
2309 on nearby locations. Or, even if it's a win over scalar code,
2310 it might not be a win over vectorizing at a lower VF, if that
2311 allows us to use contiguous accesses. */
2312 if (*memory_access_type
== VMAT_ELEMENTWISE
2315 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2317 *memory_access_type
= VMAT_GATHER_SCATTER
;
2320 if (*memory_access_type
== VMAT_GATHER_SCATTER
2321 || *memory_access_type
== VMAT_ELEMENTWISE
)
2323 *alignment_support_scheme
= dr_unaligned_supported
;
2324 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2328 *misalignment
= dr_misalignment (first_dr_info
, vectype
, *poffset
);
2329 *alignment_support_scheme
2330 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, vectype
,
2334 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2336 /* STMT is the leader of the group. Check the operands of all the
2337 stmts of the group. */
2338 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2339 while (next_stmt_info
)
2341 tree op
= vect_get_store_rhs (next_stmt_info
);
2342 enum vect_def_type dt
;
2343 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2345 if (dump_enabled_p ())
2346 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2347 "use not simple.\n");
2350 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2356 gcc_assert (can_overrun_p
);
2357 if (dump_enabled_p ())
2358 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2359 "Data access with gaps requires scalar "
2361 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2367 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2368 if there is a memory access type that the vectorized form can use,
2369 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2370 or scatters, fill in GS_INFO accordingly. In addition
2371 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2372 the target does not support the alignment scheme. *MISALIGNMENT
2373 is set according to the alignment of the access (including
2374 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2376 SLP says whether we're performing SLP rather than loop vectorization.
2377 MASKED_P is true if the statement is conditional on a vectorized mask.
2378 VECTYPE is the vector type that the vectorized statements will use.
2379 NCOPIES is the number of vector statements that will be needed. */
2382 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2383 tree vectype
, slp_tree slp_node
,
2384 bool masked_p
, vec_load_store_type vls_type
,
2385 unsigned int ncopies
,
2386 vect_memory_access_type
*memory_access_type
,
2387 poly_int64
*poffset
,
2388 dr_alignment_support
*alignment_support_scheme
,
2390 gather_scatter_info
*gs_info
)
2392 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2393 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2394 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2396 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2398 *memory_access_type
= VMAT_GATHER_SCATTER
;
2399 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2401 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2402 &gs_info
->offset_dt
,
2403 &gs_info
->offset_vectype
))
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2407 "%s index use not simple.\n",
2408 vls_type
== VLS_LOAD
? "gather" : "scatter");
2411 else if (gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
2413 if (vls_type
!= VLS_LOAD
)
2415 if (dump_enabled_p ())
2416 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2417 "unsupported emulated scatter.\n");
2420 else if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant ()
2421 || !TYPE_VECTOR_SUBPARTS
2422 (gs_info
->offset_vectype
).is_constant ()
2423 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2424 (gs_info
->offset_vectype
),
2425 TYPE_VECTOR_SUBPARTS (vectype
)))
2427 if (dump_enabled_p ())
2428 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2429 "unsupported vector types for emulated "
2434 /* Gather-scatter accesses perform only component accesses, alignment
2435 is irrelevant for them. */
2436 *alignment_support_scheme
= dr_unaligned_supported
;
2438 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2440 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
,
2442 vls_type
, memory_access_type
, poffset
,
2443 alignment_support_scheme
,
2444 misalignment
, gs_info
))
2447 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2449 gcc_assert (!slp_node
);
2451 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2453 *memory_access_type
= VMAT_GATHER_SCATTER
;
2455 *memory_access_type
= VMAT_ELEMENTWISE
;
2456 /* Alignment is irrelevant here. */
2457 *alignment_support_scheme
= dr_unaligned_supported
;
2461 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2464 gcc_assert (vls_type
== VLS_LOAD
);
2465 *memory_access_type
= VMAT_INVARIANT
;
2466 /* Invariant accesses perform only component accesses, alignment
2467 is irrelevant for them. */
2468 *alignment_support_scheme
= dr_unaligned_supported
;
2473 *memory_access_type
= get_negative_load_store_type
2474 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
, poffset
);
2476 *memory_access_type
= VMAT_CONTIGUOUS
;
2477 *misalignment
= dr_misalignment (STMT_VINFO_DR_INFO (stmt_info
),
2479 *alignment_support_scheme
2480 = vect_supportable_dr_alignment (vinfo
,
2481 STMT_VINFO_DR_INFO (stmt_info
),
2482 vectype
, *misalignment
);
2486 if ((*memory_access_type
== VMAT_ELEMENTWISE
2487 || *memory_access_type
== VMAT_STRIDED_SLP
)
2488 && !nunits
.is_constant ())
2490 if (dump_enabled_p ())
2491 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2492 "Not using elementwise accesses due to variable "
2493 "vectorization factor.\n");
2497 if (*alignment_support_scheme
== dr_unaligned_unsupported
)
2499 if (dump_enabled_p ())
2500 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2501 "unsupported unaligned access\n");
2505 /* FIXME: At the moment the cost model seems to underestimate the
2506 cost of using elementwise accesses. This check preserves the
2507 traditional behavior until that can be fixed. */
2508 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2509 if (!first_stmt_info
)
2510 first_stmt_info
= stmt_info
;
2511 if (*memory_access_type
== VMAT_ELEMENTWISE
2512 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2513 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2514 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2515 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2519 "not falling back to elementwise accesses\n");
2525 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2526 conditional operation STMT_INFO. When returning true, store the mask
2527 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2528 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2529 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2532 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2533 slp_tree slp_node
, unsigned mask_index
,
2534 tree
*mask
, slp_tree
*mask_node
,
2535 vect_def_type
*mask_dt_out
, tree
*mask_vectype_out
)
2537 enum vect_def_type mask_dt
;
2539 slp_tree mask_node_1
;
2540 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, mask_index
,
2541 mask
, &mask_node_1
, &mask_dt
, &mask_vectype
))
2543 if (dump_enabled_p ())
2544 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2545 "mask use not simple.\n");
2549 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask
)))
2551 if (dump_enabled_p ())
2552 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2553 "mask argument is not a boolean.\n");
2557 /* If the caller is not prepared for adjusting an external/constant
2558 SLP mask vector type fail. */
2561 && SLP_TREE_DEF_TYPE (mask_node_1
) != vect_internal_def
)
2563 if (dump_enabled_p ())
2564 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2565 "SLP mask argument is not vectorized.\n");
2569 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2571 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2573 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2575 if (dump_enabled_p ())
2576 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2577 "could not find an appropriate vector mask type.\n");
2581 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2582 TYPE_VECTOR_SUBPARTS (vectype
)))
2584 if (dump_enabled_p ())
2585 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2586 "vector mask type %T"
2587 " does not match vector data type %T.\n",
2588 mask_vectype
, vectype
);
2593 *mask_dt_out
= mask_dt
;
2594 *mask_vectype_out
= mask_vectype
;
2596 *mask_node
= mask_node_1
;
2600 /* Return true if stored value RHS is suitable for vectorizing store
2601 statement STMT_INFO. When returning true, store the type of the
2602 definition in *RHS_DT_OUT, the type of the vectorized store value in
2603 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2606 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2607 slp_tree slp_node
, tree rhs
,
2608 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2609 vec_load_store_type
*vls_type_out
)
2611 /* In the case this is a store from a constant make sure
2612 native_encode_expr can handle it. */
2613 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2615 if (dump_enabled_p ())
2616 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2617 "cannot encode constant as a byte sequence.\n");
2622 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2624 if (gimple_call_internal_p (call
)
2625 && internal_store_fn_p (gimple_call_internal_fn (call
)))
2626 op_no
= internal_fn_stored_value_index (gimple_call_internal_fn (call
));
2629 enum vect_def_type rhs_dt
;
2632 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, op_no
,
2633 &rhs
, &slp_op
, &rhs_dt
, &rhs_vectype
))
2635 if (dump_enabled_p ())
2636 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2637 "use not simple.\n");
2641 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2642 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2644 if (dump_enabled_p ())
2645 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2646 "incompatible vector types.\n");
2650 *rhs_dt_out
= rhs_dt
;
2651 *rhs_vectype_out
= rhs_vectype
;
2652 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2653 *vls_type_out
= VLS_STORE_INVARIANT
;
2655 *vls_type_out
= VLS_STORE
;
2659 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2660 Note that we support masks with floating-point type, in which case the
2661 floats are interpreted as a bitmask. */
2664 vect_build_all_ones_mask (vec_info
*vinfo
,
2665 stmt_vec_info stmt_info
, tree masktype
)
2667 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2668 return build_int_cst (masktype
, -1);
2669 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2671 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2672 mask
= build_vector_from_val (masktype
, mask
);
2673 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2675 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2679 for (int j
= 0; j
< 6; ++j
)
2681 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2682 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2683 mask
= build_vector_from_val (masktype
, mask
);
2684 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2689 /* Build an all-zero merge value of type VECTYPE while vectorizing
2690 STMT_INFO as a gather load. */
2693 vect_build_zero_merge_argument (vec_info
*vinfo
,
2694 stmt_vec_info stmt_info
, tree vectype
)
2697 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2698 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2699 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2703 for (int j
= 0; j
< 6; ++j
)
2705 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2706 merge
= build_real (TREE_TYPE (vectype
), r
);
2710 merge
= build_vector_from_val (vectype
, merge
);
2711 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2714 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2715 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2716 the gather load operation. If the load is conditional, MASK is the
2717 unvectorized condition and MASK_DT is its definition type, otherwise
2721 vect_build_gather_load_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2722 gimple_stmt_iterator
*gsi
,
2724 gather_scatter_info
*gs_info
,
2727 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2728 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2729 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2730 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2731 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2732 edge pe
= loop_preheader_edge (loop
);
2733 enum { NARROW
, NONE
, WIDEN
} modifier
;
2734 poly_uint64 gather_off_nunits
2735 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2737 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2738 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2739 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2740 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2741 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2742 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2743 tree scaletype
= TREE_VALUE (arglist
);
2744 tree real_masktype
= masktype
;
2745 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2747 || TREE_CODE (masktype
) == INTEGER_TYPE
2748 || types_compatible_p (srctype
, masktype
)));
2750 masktype
= truth_type_for (srctype
);
2752 tree mask_halftype
= masktype
;
2753 tree perm_mask
= NULL_TREE
;
2754 tree mask_perm_mask
= NULL_TREE
;
2755 if (known_eq (nunits
, gather_off_nunits
))
2757 else if (known_eq (nunits
* 2, gather_off_nunits
))
2761 /* Currently widening gathers and scatters are only supported for
2762 fixed-length vectors. */
2763 int count
= gather_off_nunits
.to_constant ();
2764 vec_perm_builder
sel (count
, count
, 1);
2765 for (int i
= 0; i
< count
; ++i
)
2766 sel
.quick_push (i
| (count
/ 2));
2768 vec_perm_indices
indices (sel
, 1, count
);
2769 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2772 else if (known_eq (nunits
, gather_off_nunits
* 2))
2776 /* Currently narrowing gathers and scatters are only supported for
2777 fixed-length vectors. */
2778 int count
= nunits
.to_constant ();
2779 vec_perm_builder
sel (count
, count
, 1);
2780 sel
.quick_grow (count
);
2781 for (int i
= 0; i
< count
; ++i
)
2782 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2783 vec_perm_indices
indices (sel
, 2, count
);
2784 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2788 if (mask
&& VECTOR_TYPE_P (real_masktype
))
2790 for (int i
= 0; i
< count
; ++i
)
2791 sel
[i
] = i
| (count
/ 2);
2792 indices
.new_vector (sel
, 2, count
);
2793 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2796 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2801 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2802 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2804 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2805 if (!is_gimple_min_invariant (ptr
))
2808 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2809 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2810 gcc_assert (!new_bb
);
2813 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2815 tree vec_oprnd0
= NULL_TREE
;
2816 tree vec_mask
= NULL_TREE
;
2817 tree src_op
= NULL_TREE
;
2818 tree mask_op
= NULL_TREE
;
2819 tree prev_res
= NULL_TREE
;
2823 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2824 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2827 auto_vec
<tree
> vec_oprnds0
;
2828 auto_vec
<tree
> vec_masks
;
2829 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2830 modifier
== WIDEN
? ncopies
/ 2 : ncopies
,
2831 gs_info
->offset
, &vec_oprnds0
);
2833 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2834 modifier
== NARROW
? ncopies
/ 2 : ncopies
,
2835 mask
, &vec_masks
, masktype
);
2836 for (int j
= 0; j
< ncopies
; ++j
)
2839 if (modifier
== WIDEN
&& (j
& 1))
2840 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
2841 perm_mask
, stmt_info
, gsi
);
2843 op
= vec_oprnd0
= vec_oprnds0
[modifier
== WIDEN
? j
/ 2 : j
];
2845 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2847 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2848 TYPE_VECTOR_SUBPARTS (idxtype
)));
2849 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2850 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2851 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2852 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2858 if (mask_perm_mask
&& (j
& 1))
2859 mask_op
= permute_vec_elements (vinfo
, mask_op
, mask_op
,
2860 mask_perm_mask
, stmt_info
, gsi
);
2863 if (modifier
== NARROW
)
2866 vec_mask
= vec_masks
[j
/ 2];
2869 vec_mask
= vec_masks
[j
];
2872 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2874 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2875 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2876 gcc_assert (known_eq (sub1
, sub2
));
2877 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2878 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2880 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2881 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2885 if (modifier
== NARROW
&& !VECTOR_TYPE_P (real_masktype
))
2887 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2889 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2890 : VEC_UNPACK_LO_EXPR
,
2892 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2898 tree mask_arg
= mask_op
;
2899 if (masktype
!= real_masktype
)
2901 tree utype
, optype
= TREE_TYPE (mask_op
);
2902 if (VECTOR_TYPE_P (real_masktype
)
2903 || TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2904 utype
= real_masktype
;
2906 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2907 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2908 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2910 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2911 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2913 if (!useless_type_conversion_p (real_masktype
, utype
))
2915 gcc_assert (TYPE_PRECISION (utype
)
2916 <= TYPE_PRECISION (real_masktype
));
2917 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2918 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2919 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2922 src_op
= build_zero_cst (srctype
);
2924 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2927 if (!useless_type_conversion_p (vectype
, rettype
))
2929 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2930 TYPE_VECTOR_SUBPARTS (rettype
)));
2931 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2932 gimple_call_set_lhs (new_stmt
, op
);
2933 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2934 var
= make_ssa_name (vec_dest
);
2935 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2936 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2937 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2941 var
= make_ssa_name (vec_dest
, new_stmt
);
2942 gimple_call_set_lhs (new_stmt
, var
);
2943 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2946 if (modifier
== NARROW
)
2953 var
= permute_vec_elements (vinfo
, prev_res
, var
, perm_mask
,
2955 new_stmt
= SSA_NAME_DEF_STMT (var
);
2958 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
2960 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
2963 /* Prepare the base and offset in GS_INFO for vectorization.
2964 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2965 to the vectorized offset argument for the first copy of STMT_INFO.
2966 STMT_INFO is the statement described by GS_INFO and LOOP is the
2970 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo
,
2971 class loop
*loop
, stmt_vec_info stmt_info
,
2972 slp_tree slp_node
, gather_scatter_info
*gs_info
,
2973 tree
*dataref_ptr
, vec
<tree
> *vec_offset
)
2975 gimple_seq stmts
= NULL
;
2976 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2980 edge pe
= loop_preheader_edge (loop
);
2981 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2982 gcc_assert (!new_bb
);
2985 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_offset
);
2989 = vect_get_num_copies (loop_vinfo
, gs_info
->offset_vectype
);
2990 vect_get_vec_defs_for_operand (loop_vinfo
, stmt_info
, ncopies
,
2991 gs_info
->offset
, vec_offset
,
2992 gs_info
->offset_vectype
);
2996 /* Prepare to implement a grouped or strided load or store using
2997 the gather load or scatter store operation described by GS_INFO.
2998 STMT_INFO is the load or store statement.
3000 Set *DATAREF_BUMP to the amount that should be added to the base
3001 address after each copy of the vectorized statement. Set *VEC_OFFSET
3002 to an invariant offset vector in which element I has the value
3003 I * DR_STEP / SCALE. */
3006 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
3007 loop_vec_info loop_vinfo
,
3008 gather_scatter_info
*gs_info
,
3009 tree
*dataref_bump
, tree
*vec_offset
)
3011 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3012 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3014 tree bump
= size_binop (MULT_EXPR
,
3015 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
3016 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
3017 *dataref_bump
= cse_and_gimplify_to_preheader (loop_vinfo
, bump
);
3019 /* The offset given in GS_INFO can have pointer type, so use the element
3020 type of the vector instead. */
3021 tree offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
3023 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3024 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
3025 ssize_int (gs_info
->scale
));
3026 step
= fold_convert (offset_type
, step
);
3028 /* Create {0, X, X*2, X*3, ...}. */
3029 tree offset
= fold_build2 (VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
3030 build_zero_cst (offset_type
), step
);
3031 *vec_offset
= cse_and_gimplify_to_preheader (loop_vinfo
, offset
);
3034 /* Return the amount that should be added to a vector pointer to move
3035 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3036 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3040 vect_get_data_ptr_increment (vec_info
*vinfo
,
3041 dr_vec_info
*dr_info
, tree aggr_type
,
3042 vect_memory_access_type memory_access_type
)
3044 if (memory_access_type
== VMAT_INVARIANT
)
3045 return size_zero_node
;
3047 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3048 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3049 if (tree_int_cst_sgn (step
) == -1)
3050 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3054 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3057 vectorizable_bswap (vec_info
*vinfo
,
3058 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3059 gimple
**vec_stmt
, slp_tree slp_node
,
3061 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3064 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3065 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3068 op
= gimple_call_arg (stmt
, 0);
3069 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3070 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3072 /* Multiple types in SLP are handled by creating the appropriate number of
3073 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3078 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3080 gcc_assert (ncopies
>= 1);
3082 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3086 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3087 unsigned word_bytes
;
3088 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3091 /* The encoding uses one stepped pattern for each byte in the word. */
3092 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3093 for (unsigned i
= 0; i
< 3; ++i
)
3094 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3095 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3097 vec_perm_indices
indices (elts
, 1, num_bytes
);
3098 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
3104 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3106 if (dump_enabled_p ())
3107 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3108 "incompatible vector types for invariants\n");
3112 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3113 DUMP_VECT_SCOPE ("vectorizable_bswap");
3114 record_stmt_cost (cost_vec
,
3115 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3116 record_stmt_cost (cost_vec
,
3118 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
,
3119 vec_perm
, stmt_info
, 0, vect_body
);
3123 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3126 vec
<tree
> vec_oprnds
= vNULL
;
3127 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
3129 /* Arguments are ready. create the new vector stmt. */
3132 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3135 tree tem
= make_ssa_name (char_vectype
);
3136 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3137 char_vectype
, vop
));
3138 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3139 tree tem2
= make_ssa_name (char_vectype
);
3140 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3141 tem
, tem
, bswap_vconst
);
3142 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3143 tem
= make_ssa_name (vectype
);
3144 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3146 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3148 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3150 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3154 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3156 vec_oprnds
.release ();
3160 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3161 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3162 in a single step. On success, store the binary pack code in
3166 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3167 tree_code
*convert_code
)
3169 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3170 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3174 int multi_step_cvt
= 0;
3175 auto_vec
<tree
, 8> interm_types
;
3176 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3177 &code
, &multi_step_cvt
, &interm_types
)
3181 *convert_code
= code
;
3185 /* Function vectorizable_call.
3187 Check if STMT_INFO performs a function call that can be vectorized.
3188 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3189 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3190 Return true if STMT_INFO is vectorizable in this way. */
3193 vectorizable_call (vec_info
*vinfo
,
3194 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3195 gimple
**vec_stmt
, slp_tree slp_node
,
3196 stmt_vector_for_cost
*cost_vec
)
3202 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3203 tree vectype_out
, vectype_in
;
3204 poly_uint64 nunits_in
;
3205 poly_uint64 nunits_out
;
3206 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3207 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3208 tree fndecl
, new_temp
, rhs_type
;
3209 enum vect_def_type dt
[4]
3210 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3211 vect_unknown_def_type
};
3212 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3213 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3214 int ndts
= ARRAY_SIZE (dt
);
3216 auto_vec
<tree
, 8> vargs
;
3217 enum { NARROW
, NONE
, WIDEN
} modifier
;
3221 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3224 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3228 /* Is STMT_INFO a vectorizable call? */
3229 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3233 if (gimple_call_internal_p (stmt
)
3234 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3235 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3236 /* Handled by vectorizable_load and vectorizable_store. */
3239 if (gimple_call_lhs (stmt
) == NULL_TREE
3240 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3243 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3245 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3247 /* Process function arguments. */
3248 rhs_type
= NULL_TREE
;
3249 vectype_in
= NULL_TREE
;
3250 nargs
= gimple_call_num_args (stmt
);
3252 /* Bail out if the function has more than four arguments, we do not have
3253 interesting builtin functions to vectorize with more than two arguments
3254 except for fma. No arguments is also not good. */
3255 if (nargs
== 0 || nargs
> 4)
3258 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3259 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3260 if (cfn
== CFN_GOMP_SIMD_LANE
)
3263 rhs_type
= unsigned_type_node
;
3267 if (internal_fn_p (cfn
))
3268 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3270 for (i
= 0; i
< nargs
; i
++)
3272 if ((int) i
== mask_opno
)
3274 if (!vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_opno
,
3275 &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3280 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3281 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3283 if (dump_enabled_p ())
3284 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3285 "use not simple.\n");
3289 /* We can only handle calls with arguments of the same type. */
3291 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3293 if (dump_enabled_p ())
3294 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3295 "argument types differ.\n");
3299 rhs_type
= TREE_TYPE (op
);
3302 vectype_in
= vectypes
[i
];
3303 else if (vectypes
[i
]
3304 && !types_compatible_p (vectypes
[i
], vectype_in
))
3306 if (dump_enabled_p ())
3307 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3308 "argument vector types differ.\n");
3312 /* If all arguments are external or constant defs, infer the vector type
3313 from the scalar type. */
3315 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3317 gcc_assert (vectype_in
);
3320 if (dump_enabled_p ())
3321 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3322 "no vectype for scalar type %T\n", rhs_type
);
3326 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3327 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3328 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3329 by a pack of the two vectors into an SI vector. We would need
3330 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3331 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3333 if (dump_enabled_p ())
3334 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3335 "mismatched vector sizes %T and %T\n",
3336 vectype_in
, vectype_out
);
3340 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3341 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3343 if (dump_enabled_p ())
3344 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3345 "mixed mask and nonmask vector types\n");
3350 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3351 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3352 if (known_eq (nunits_in
* 2, nunits_out
))
3354 else if (known_eq (nunits_out
, nunits_in
))
3356 else if (known_eq (nunits_out
* 2, nunits_in
))
3361 /* We only handle functions that do not read or clobber memory. */
3362 if (gimple_vuse (stmt
))
3364 if (dump_enabled_p ())
3365 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3366 "function reads from or writes to memory.\n");
3370 /* For now, we only vectorize functions if a target specific builtin
3371 is available. TODO -- in some cases, it might be profitable to
3372 insert the calls for pieces of the vector, in order to be able
3373 to vectorize other operations in the loop. */
3375 internal_fn ifn
= IFN_LAST
;
3376 tree callee
= gimple_call_fndecl (stmt
);
3378 /* First try using an internal function. */
3379 tree_code convert_code
= ERROR_MARK
;
3381 && (modifier
== NONE
3382 || (modifier
== NARROW
3383 && simple_integer_narrowing (vectype_out
, vectype_in
,
3385 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3388 /* If that fails, try asking for a target-specific built-in function. */
3389 if (ifn
== IFN_LAST
)
3391 if (cfn
!= CFN_LAST
)
3392 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3393 (cfn
, vectype_out
, vectype_in
);
3394 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3395 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3396 (callee
, vectype_out
, vectype_in
);
3399 if (ifn
== IFN_LAST
&& !fndecl
)
3401 if (cfn
== CFN_GOMP_SIMD_LANE
3404 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3405 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3406 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3407 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3409 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3410 { 0, 1, 2, ... vf - 1 } vector. */
3411 gcc_assert (nargs
== 0);
3413 else if (modifier
== NONE
3414 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3415 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3416 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3417 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3418 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3419 slp_op
, vectype_in
, cost_vec
);
3422 if (dump_enabled_p ())
3423 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3424 "function is not vectorizable.\n");
3431 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3432 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3434 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3436 /* Sanity check: make sure that at least one copy of the vectorized stmt
3437 needs to be generated. */
3438 gcc_assert (ncopies
>= 1);
3440 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
3441 internal_fn cond_fn
= get_conditional_internal_fn (ifn
);
3442 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3443 if (!vec_stmt
) /* transformation not required. */
3446 for (i
= 0; i
< nargs
; ++i
)
3447 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
], vectype_in
))
3449 if (dump_enabled_p ())
3450 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3451 "incompatible vector types for invariants\n");
3454 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3455 DUMP_VECT_SCOPE ("vectorizable_call");
3456 vect_model_simple_cost (vinfo
, stmt_info
,
3457 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3458 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3459 record_stmt_cost (cost_vec
, ncopies
/ 2,
3460 vec_promote_demote
, stmt_info
, 0, vect_body
);
3463 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
3464 && (reduc_idx
>= 0 || mask_opno
>= 0))
3467 && (cond_fn
== IFN_LAST
3468 || !direct_internal_fn_supported_p (cond_fn
, vectype_out
,
3469 OPTIMIZE_FOR_SPEED
)))
3471 if (dump_enabled_p ())
3472 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3473 "can't use a fully-masked loop because no"
3474 " conditional operation is available.\n");
3475 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
3479 unsigned int nvectors
3481 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3483 tree scalar_mask
= NULL_TREE
;
3485 scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3486 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3487 vectype_out
, scalar_mask
);
3495 if (dump_enabled_p ())
3496 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3499 scalar_dest
= gimple_call_lhs (stmt
);
3500 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3502 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3503 unsigned int vect_nargs
= nargs
;
3504 if (masked_loop_p
&& reduc_idx
>= 0)
3510 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3512 tree prev_res
= NULL_TREE
;
3513 vargs
.safe_grow (vect_nargs
, true);
3514 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3515 for (j
= 0; j
< ncopies
; ++j
)
3517 /* Build argument list for the vectorized call. */
3520 vec
<tree
> vec_oprnds0
;
3522 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3523 vec_oprnds0
= vec_defs
[0];
3525 /* Arguments are ready. Create the new vector stmt. */
3526 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3529 if (masked_loop_p
&& reduc_idx
>= 0)
3531 unsigned int vec_num
= vec_oprnds0
.length ();
3532 /* Always true for SLP. */
3533 gcc_assert (ncopies
== 1);
3534 vargs
[varg
++] = vect_get_loop_mask (gsi
, masks
, vec_num
,
3538 for (k
= 0; k
< nargs
; k
++)
3540 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3541 vargs
[varg
++] = vec_oprndsk
[i
];
3543 if (masked_loop_p
&& reduc_idx
>= 0)
3544 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3546 if (modifier
== NARROW
)
3548 /* We don't define any narrowing conditional functions
3550 gcc_assert (mask_opno
< 0);
3551 tree half_res
= make_ssa_name (vectype_in
);
3553 = gimple_build_call_internal_vec (ifn
, vargs
);
3554 gimple_call_set_lhs (call
, half_res
);
3555 gimple_call_set_nothrow (call
, true);
3556 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3559 prev_res
= half_res
;
3562 new_temp
= make_ssa_name (vec_dest
);
3563 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3564 prev_res
, half_res
);
3565 vect_finish_stmt_generation (vinfo
, stmt_info
,
3570 if (mask_opno
>= 0 && masked_loop_p
)
3572 unsigned int vec_num
= vec_oprnds0
.length ();
3573 /* Always true for SLP. */
3574 gcc_assert (ncopies
== 1);
3575 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3577 vargs
[mask_opno
] = prepare_vec_mask
3578 (loop_vinfo
, TREE_TYPE (mask
), mask
,
3579 vargs
[mask_opno
], gsi
);
3583 if (ifn
!= IFN_LAST
)
3584 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3586 call
= gimple_build_call_vec (fndecl
, vargs
);
3587 new_temp
= make_ssa_name (vec_dest
, call
);
3588 gimple_call_set_lhs (call
, new_temp
);
3589 gimple_call_set_nothrow (call
, true);
3590 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3593 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3599 if (masked_loop_p
&& reduc_idx
>= 0)
3600 vargs
[varg
++] = vect_get_loop_mask (gsi
, masks
, ncopies
,
3602 for (i
= 0; i
< nargs
; i
++)
3604 op
= gimple_call_arg (stmt
, i
);
3607 vec_defs
.quick_push (vNULL
);
3608 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3612 vargs
[varg
++] = vec_defs
[i
][j
];
3614 if (masked_loop_p
&& reduc_idx
>= 0)
3615 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3617 if (mask_opno
>= 0 && masked_loop_p
)
3619 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3622 = prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
3623 vargs
[mask_opno
], gsi
);
3627 if (cfn
== CFN_GOMP_SIMD_LANE
)
3629 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3631 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3632 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3633 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3634 new_temp
= make_ssa_name (vec_dest
);
3635 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3636 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3638 else if (modifier
== NARROW
)
3640 /* We don't define any narrowing conditional functions at
3642 gcc_assert (mask_opno
< 0);
3643 tree half_res
= make_ssa_name (vectype_in
);
3644 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3645 gimple_call_set_lhs (call
, half_res
);
3646 gimple_call_set_nothrow (call
, true);
3647 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3650 prev_res
= half_res
;
3653 new_temp
= make_ssa_name (vec_dest
);
3654 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3655 prev_res
, half_res
);
3656 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3661 if (ifn
!= IFN_LAST
)
3662 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3664 call
= gimple_build_call_vec (fndecl
, vargs
);
3665 new_temp
= make_ssa_name (vec_dest
, call
);
3666 gimple_call_set_lhs (call
, new_temp
);
3667 gimple_call_set_nothrow (call
, true);
3668 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3672 if (j
== (modifier
== NARROW
? 1 : 0))
3673 *vec_stmt
= new_stmt
;
3674 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3676 for (i
= 0; i
< nargs
; i
++)
3678 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3679 vec_oprndsi
.release ();
3682 else if (modifier
== NARROW
)
3684 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3685 /* We don't define any narrowing conditional functions at present. */
3686 gcc_assert (mask_opno
< 0);
3687 for (j
= 0; j
< ncopies
; ++j
)
3689 /* Build argument list for the vectorized call. */
3691 vargs
.create (nargs
* 2);
3697 vec
<tree
> vec_oprnds0
;
3699 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3700 vec_oprnds0
= vec_defs
[0];
3702 /* Arguments are ready. Create the new vector stmt. */
3703 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3707 for (k
= 0; k
< nargs
; k
++)
3709 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3710 vargs
.quick_push (vec_oprndsk
[i
]);
3711 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3714 if (ifn
!= IFN_LAST
)
3715 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3717 call
= gimple_build_call_vec (fndecl
, vargs
);
3718 new_temp
= make_ssa_name (vec_dest
, call
);
3719 gimple_call_set_lhs (call
, new_temp
);
3720 gimple_call_set_nothrow (call
, true);
3721 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3722 SLP_TREE_VEC_STMTS (slp_node
).quick_push (call
);
3727 for (i
= 0; i
< nargs
; i
++)
3729 op
= gimple_call_arg (stmt
, i
);
3732 vec_defs
.quick_push (vNULL
);
3733 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
3734 op
, &vec_defs
[i
], vectypes
[i
]);
3736 vec_oprnd0
= vec_defs
[i
][2*j
];
3737 vec_oprnd1
= vec_defs
[i
][2*j
+1];
3739 vargs
.quick_push (vec_oprnd0
);
3740 vargs
.quick_push (vec_oprnd1
);
3743 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3744 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3745 gimple_call_set_lhs (new_stmt
, new_temp
);
3746 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3748 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3752 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3754 for (i
= 0; i
< nargs
; i
++)
3756 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3757 vec_oprndsi
.release ();
3761 /* No current target implements this case. */
3766 /* The call in STMT might prevent it from being removed in dce.
3767 We however cannot remove it here, due to the way the ssa name
3768 it defines is mapped to the new definition. So just replace
3769 rhs of the statement with something harmless. */
3774 stmt_info
= vect_orig_stmt (stmt_info
);
3775 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3778 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3779 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3785 struct simd_call_arg_info
3789 HOST_WIDE_INT linear_step
;
3790 enum vect_def_type dt
;
3792 bool simd_lane_linear
;
3795 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3796 is linear within simd lane (but not within whole loop), note it in
3800 vect_simd_lane_linear (tree op
, class loop
*loop
,
3801 struct simd_call_arg_info
*arginfo
)
3803 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3805 if (!is_gimple_assign (def_stmt
)
3806 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3807 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3810 tree base
= gimple_assign_rhs1 (def_stmt
);
3811 HOST_WIDE_INT linear_step
= 0;
3812 tree v
= gimple_assign_rhs2 (def_stmt
);
3813 while (TREE_CODE (v
) == SSA_NAME
)
3816 def_stmt
= SSA_NAME_DEF_STMT (v
);
3817 if (is_gimple_assign (def_stmt
))
3818 switch (gimple_assign_rhs_code (def_stmt
))
3821 t
= gimple_assign_rhs2 (def_stmt
);
3822 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3824 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3825 v
= gimple_assign_rhs1 (def_stmt
);
3828 t
= gimple_assign_rhs2 (def_stmt
);
3829 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3831 linear_step
= tree_to_shwi (t
);
3832 v
= gimple_assign_rhs1 (def_stmt
);
3835 t
= gimple_assign_rhs1 (def_stmt
);
3836 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3837 || (TYPE_PRECISION (TREE_TYPE (v
))
3838 < TYPE_PRECISION (TREE_TYPE (t
))))
3847 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3849 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3850 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3855 arginfo
->linear_step
= linear_step
;
3857 arginfo
->simd_lane_linear
= true;
3863 /* Return the number of elements in vector type VECTYPE, which is associated
3864 with a SIMD clone. At present these vectors always have a constant
3867 static unsigned HOST_WIDE_INT
3868 simd_clone_subparts (tree vectype
)
3870 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3873 /* Function vectorizable_simd_clone_call.
3875 Check if STMT_INFO performs a function call that can be vectorized
3876 by calling a simd clone of the function.
3877 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3878 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3879 Return true if STMT_INFO is vectorizable in this way. */
3882 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3883 gimple_stmt_iterator
*gsi
,
3884 gimple
**vec_stmt
, slp_tree slp_node
,
3885 stmt_vector_for_cost
*)
3890 tree vec_oprnd0
= NULL_TREE
;
3893 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3894 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3895 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3896 tree fndecl
, new_temp
;
3898 auto_vec
<simd_call_arg_info
> arginfo
;
3899 vec
<tree
> vargs
= vNULL
;
3901 tree lhs
, rtype
, ratype
;
3902 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3904 /* Is STMT a vectorizable call? */
3905 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3909 fndecl
= gimple_call_fndecl (stmt
);
3910 if (fndecl
== NULL_TREE
)
3913 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3914 if (node
== NULL
|| node
->simd_clones
== NULL
)
3917 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3920 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3924 if (gimple_call_lhs (stmt
)
3925 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3928 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3930 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3932 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3939 /* Process function arguments. */
3940 nargs
= gimple_call_num_args (stmt
);
3942 /* Bail out if the function has zero arguments. */
3946 arginfo
.reserve (nargs
, true);
3948 for (i
= 0; i
< nargs
; i
++)
3950 simd_call_arg_info thisarginfo
;
3953 thisarginfo
.linear_step
= 0;
3954 thisarginfo
.align
= 0;
3955 thisarginfo
.op
= NULL_TREE
;
3956 thisarginfo
.simd_lane_linear
= false;
3958 op
= gimple_call_arg (stmt
, i
);
3959 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3960 &thisarginfo
.vectype
)
3961 || thisarginfo
.dt
== vect_uninitialized_def
)
3963 if (dump_enabled_p ())
3964 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3965 "use not simple.\n");
3969 if (thisarginfo
.dt
== vect_constant_def
3970 || thisarginfo
.dt
== vect_external_def
)
3971 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3974 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3975 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo
.vectype
))
3977 if (dump_enabled_p ())
3978 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3979 "vector mask arguments are not supported\n");
3984 /* For linear arguments, the analyze phase should have saved
3985 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3986 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3987 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3989 gcc_assert (vec_stmt
);
3990 thisarginfo
.linear_step
3991 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3993 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3994 thisarginfo
.simd_lane_linear
3995 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3996 == boolean_true_node
);
3997 /* If loop has been peeled for alignment, we need to adjust it. */
3998 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3999 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
4000 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
4002 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
4003 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
4004 tree opt
= TREE_TYPE (thisarginfo
.op
);
4005 bias
= fold_convert (TREE_TYPE (step
), bias
);
4006 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
4008 = fold_build2 (POINTER_TYPE_P (opt
)
4009 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
4010 thisarginfo
.op
, bias
);
4014 && thisarginfo
.dt
!= vect_constant_def
4015 && thisarginfo
.dt
!= vect_external_def
4017 && TREE_CODE (op
) == SSA_NAME
4018 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
4020 && tree_fits_shwi_p (iv
.step
))
4022 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
4023 thisarginfo
.op
= iv
.base
;
4025 else if ((thisarginfo
.dt
== vect_constant_def
4026 || thisarginfo
.dt
== vect_external_def
)
4027 && POINTER_TYPE_P (TREE_TYPE (op
)))
4028 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
4029 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4031 if (POINTER_TYPE_P (TREE_TYPE (op
))
4032 && !thisarginfo
.linear_step
4034 && thisarginfo
.dt
!= vect_constant_def
4035 && thisarginfo
.dt
!= vect_external_def
4038 && TREE_CODE (op
) == SSA_NAME
)
4039 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
4041 arginfo
.quick_push (thisarginfo
);
4044 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4045 if (!vf
.is_constant ())
4047 if (dump_enabled_p ())
4048 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4049 "not considering SIMD clones; not yet supported"
4050 " for variable-width vectors.\n");
4054 unsigned int badness
= 0;
4055 struct cgraph_node
*bestn
= NULL
;
4056 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
4057 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
4059 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4060 n
= n
->simdclone
->next_clone
)
4062 unsigned int this_badness
= 0;
4063 unsigned int num_calls
;
4064 if (!constant_multiple_p (vf
, n
->simdclone
->simdlen
, &num_calls
)
4065 || n
->simdclone
->nargs
!= nargs
)
4068 this_badness
+= exact_log2 (num_calls
) * 4096;
4069 if (n
->simdclone
->inbranch
)
4070 this_badness
+= 8192;
4071 int target_badness
= targetm
.simd_clone
.usable (n
);
4072 if (target_badness
< 0)
4074 this_badness
+= target_badness
* 512;
4075 /* FORNOW: Have to add code to add the mask argument. */
4076 if (n
->simdclone
->inbranch
)
4078 for (i
= 0; i
< nargs
; i
++)
4080 switch (n
->simdclone
->args
[i
].arg_type
)
4082 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4083 if (!useless_type_conversion_p
4084 (n
->simdclone
->args
[i
].orig_type
,
4085 TREE_TYPE (gimple_call_arg (stmt
, i
))))
4087 else if (arginfo
[i
].dt
== vect_constant_def
4088 || arginfo
[i
].dt
== vect_external_def
4089 || arginfo
[i
].linear_step
)
4092 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4093 if (arginfo
[i
].dt
!= vect_constant_def
4094 && arginfo
[i
].dt
!= vect_external_def
)
4097 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4098 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4099 if (arginfo
[i
].dt
== vect_constant_def
4100 || arginfo
[i
].dt
== vect_external_def
4101 || (arginfo
[i
].linear_step
4102 != n
->simdclone
->args
[i
].linear_step
))
4105 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4106 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4107 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4108 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4109 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4110 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4114 case SIMD_CLONE_ARG_TYPE_MASK
:
4117 if (i
== (size_t) -1)
4119 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4124 if (arginfo
[i
].align
)
4125 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4126 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4128 if (i
== (size_t) -1)
4130 if (bestn
== NULL
|| this_badness
< badness
)
4133 badness
= this_badness
;
4140 for (i
= 0; i
< nargs
; i
++)
4141 if ((arginfo
[i
].dt
== vect_constant_def
4142 || arginfo
[i
].dt
== vect_external_def
)
4143 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4145 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
));
4146 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4148 if (arginfo
[i
].vectype
== NULL
4149 || !constant_multiple_p (bestn
->simdclone
->simdlen
,
4150 simd_clone_subparts (arginfo
[i
].vectype
)))
4154 fndecl
= bestn
->decl
;
4155 nunits
= bestn
->simdclone
->simdlen
;
4156 ncopies
= vector_unroll_factor (vf
, nunits
);
4158 /* If the function isn't const, only allow it in simd loops where user
4159 has asserted that at least nunits consecutive iterations can be
4160 performed using SIMD instructions. */
4161 if ((loop
== NULL
|| maybe_lt ((unsigned) loop
->safelen
, nunits
))
4162 && gimple_vuse (stmt
))
4165 /* Sanity check: make sure that at least one copy of the vectorized stmt
4166 needs to be generated. */
4167 gcc_assert (ncopies
>= 1);
4169 if (!vec_stmt
) /* transformation not required. */
4171 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4172 for (i
= 0; i
< nargs
; i
++)
4173 if ((bestn
->simdclone
->args
[i
].arg_type
4174 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4175 || (bestn
->simdclone
->args
[i
].arg_type
4176 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4178 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4181 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4182 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4183 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4184 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4185 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4186 tree sll
= arginfo
[i
].simd_lane_linear
4187 ? boolean_true_node
: boolean_false_node
;
4188 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4190 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4191 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4192 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4193 dt, slp_node, cost_vec); */
4199 if (dump_enabled_p ())
4200 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4203 scalar_dest
= gimple_call_lhs (stmt
);
4204 vec_dest
= NULL_TREE
;
4209 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4210 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4211 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4214 rtype
= TREE_TYPE (ratype
);
4218 auto_vec
<vec
<tree
> > vec_oprnds
;
4219 auto_vec
<unsigned> vec_oprnds_i
;
4220 vec_oprnds
.safe_grow_cleared (nargs
, true);
4221 vec_oprnds_i
.safe_grow_cleared (nargs
, true);
4222 for (j
= 0; j
< ncopies
; ++j
)
4224 /* Build argument list for the vectorized call. */
4226 vargs
.create (nargs
);
4230 for (i
= 0; i
< nargs
; i
++)
4232 unsigned int k
, l
, m
, o
;
4234 op
= gimple_call_arg (stmt
, i
);
4235 switch (bestn
->simdclone
->args
[i
].arg_type
)
4237 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4238 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4239 o
= vector_unroll_factor (nunits
,
4240 simd_clone_subparts (atype
));
4241 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4243 if (simd_clone_subparts (atype
)
4244 < simd_clone_subparts (arginfo
[i
].vectype
))
4246 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4247 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4248 / simd_clone_subparts (atype
));
4249 gcc_assert ((k
& (k
- 1)) == 0);
4252 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4253 ncopies
* o
/ k
, op
,
4255 vec_oprnds_i
[i
] = 0;
4256 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4260 vec_oprnd0
= arginfo
[i
].op
;
4261 if ((m
& (k
- 1)) == 0)
4262 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4264 arginfo
[i
].op
= vec_oprnd0
;
4266 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4268 bitsize_int ((m
& (k
- 1)) * prec
));
4270 = gimple_build_assign (make_ssa_name (atype
),
4272 vect_finish_stmt_generation (vinfo
, stmt_info
,
4274 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4278 k
= (simd_clone_subparts (atype
)
4279 / simd_clone_subparts (arginfo
[i
].vectype
));
4280 gcc_assert ((k
& (k
- 1)) == 0);
4281 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4283 vec_alloc (ctor_elts
, k
);
4286 for (l
= 0; l
< k
; l
++)
4288 if (m
== 0 && l
== 0)
4290 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4294 vec_oprnds_i
[i
] = 0;
4295 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4298 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4299 arginfo
[i
].op
= vec_oprnd0
;
4302 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4306 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0
),
4310 = build1 (VIEW_CONVERT_EXPR
, atype
, vec_oprnd0
);
4312 = gimple_build_assign (make_ssa_name (atype
),
4314 vect_finish_stmt_generation (vinfo
, stmt_info
,
4316 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4319 vargs
.safe_push (vec_oprnd0
);
4322 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4324 = gimple_build_assign (make_ssa_name (atype
),
4326 vect_finish_stmt_generation (vinfo
, stmt_info
,
4328 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4333 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4334 vargs
.safe_push (op
);
4336 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4337 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4342 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4343 &stmts
, true, NULL_TREE
);
4347 edge pe
= loop_preheader_edge (loop
);
4348 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4349 gcc_assert (!new_bb
);
4351 if (arginfo
[i
].simd_lane_linear
)
4353 vargs
.safe_push (arginfo
[i
].op
);
4356 tree phi_res
= copy_ssa_name (op
);
4357 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4358 add_phi_arg (new_phi
, arginfo
[i
].op
,
4359 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4361 = POINTER_TYPE_P (TREE_TYPE (op
))
4362 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4363 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4364 ? sizetype
: TREE_TYPE (op
);
4366 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4368 tree tcst
= wide_int_to_tree (type
, cst
);
4369 tree phi_arg
= copy_ssa_name (op
);
4371 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4372 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4373 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4374 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4376 arginfo
[i
].op
= phi_res
;
4377 vargs
.safe_push (phi_res
);
4382 = POINTER_TYPE_P (TREE_TYPE (op
))
4383 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4384 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4385 ? sizetype
: TREE_TYPE (op
);
4387 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4389 tree tcst
= wide_int_to_tree (type
, cst
);
4390 new_temp
= make_ssa_name (TREE_TYPE (op
));
4392 = gimple_build_assign (new_temp
, code
,
4393 arginfo
[i
].op
, tcst
);
4394 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4395 vargs
.safe_push (new_temp
);
4398 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4399 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4400 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4401 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4402 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4403 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4409 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4413 || known_eq (simd_clone_subparts (rtype
), nunits
));
4415 new_temp
= create_tmp_var (ratype
);
4416 else if (useless_type_conversion_p (vectype
, rtype
))
4417 new_temp
= make_ssa_name (vec_dest
, new_call
);
4419 new_temp
= make_ssa_name (rtype
, new_call
);
4420 gimple_call_set_lhs (new_call
, new_temp
);
4422 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4423 gimple
*new_stmt
= new_call
;
4427 if (!multiple_p (simd_clone_subparts (vectype
), nunits
))
4430 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4431 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4432 k
= vector_unroll_factor (nunits
,
4433 simd_clone_subparts (vectype
));
4434 gcc_assert ((k
& (k
- 1)) == 0);
4435 for (l
= 0; l
< k
; l
++)
4440 t
= build_fold_addr_expr (new_temp
);
4441 t
= build2 (MEM_REF
, vectype
, t
,
4442 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4445 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4446 bitsize_int (prec
), bitsize_int (l
* prec
));
4447 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4448 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4450 if (j
== 0 && l
== 0)
4451 *vec_stmt
= new_stmt
;
4452 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4456 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4459 else if (!multiple_p (nunits
, simd_clone_subparts (vectype
)))
4461 unsigned int k
= (simd_clone_subparts (vectype
)
4462 / simd_clone_subparts (rtype
));
4463 gcc_assert ((k
& (k
- 1)) == 0);
4464 if ((j
& (k
- 1)) == 0)
4465 vec_alloc (ret_ctor_elts
, k
);
4469 o
= vector_unroll_factor (nunits
,
4470 simd_clone_subparts (rtype
));
4471 for (m
= 0; m
< o
; m
++)
4473 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4474 size_int (m
), NULL_TREE
, NULL_TREE
);
4475 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4477 vect_finish_stmt_generation (vinfo
, stmt_info
,
4479 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4480 gimple_assign_lhs (new_stmt
));
4482 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4485 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4486 if ((j
& (k
- 1)) != k
- 1)
4488 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4490 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4491 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4493 if ((unsigned) j
== k
- 1)
4494 *vec_stmt
= new_stmt
;
4495 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4500 tree t
= build_fold_addr_expr (new_temp
);
4501 t
= build2 (MEM_REF
, vectype
, t
,
4502 build_int_cst (TREE_TYPE (t
), 0));
4503 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4504 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4505 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4507 else if (!useless_type_conversion_p (vectype
, rtype
))
4509 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
4511 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4512 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4517 *vec_stmt
= new_stmt
;
4518 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4521 for (i
= 0; i
< nargs
; ++i
)
4523 vec
<tree
> oprndsi
= vec_oprnds
[i
];
4528 /* The call in STMT might prevent it from being removed in dce.
4529 We however cannot remove it here, due to the way the ssa name
4530 it defines is mapped to the new definition. So just replace
4531 rhs of the statement with something harmless. */
4539 type
= TREE_TYPE (scalar_dest
);
4540 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4541 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4544 new_stmt
= gimple_build_nop ();
4545 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4546 unlink_stmt_vdef (stmt
);
4552 /* Function vect_gen_widened_results_half
4554 Create a vector stmt whose code, type, number of arguments, and result
4555 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4556 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4557 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4558 needs to be created (DECL is a function-decl of a target-builtin).
4559 STMT_INFO is the original scalar stmt that we are vectorizing. */
4562 vect_gen_widened_results_half (vec_info
*vinfo
, enum tree_code code
,
4563 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4564 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4565 stmt_vec_info stmt_info
)
4570 /* Generate half of the widened result: */
4571 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4572 if (op_type
!= binary_op
)
4574 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4575 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4576 gimple_assign_set_lhs (new_stmt
, new_temp
);
4577 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4583 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4584 For multi-step conversions store the resulting vectors and call the function
4588 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4590 stmt_vec_info stmt_info
,
4591 vec
<tree
> &vec_dsts
,
4592 gimple_stmt_iterator
*gsi
,
4593 slp_tree slp_node
, enum tree_code code
)
4596 tree vop0
, vop1
, new_tmp
, vec_dest
;
4598 vec_dest
= vec_dsts
.pop ();
4600 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4602 /* Create demotion operation. */
4603 vop0
= (*vec_oprnds
)[i
];
4604 vop1
= (*vec_oprnds
)[i
+ 1];
4605 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4606 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4607 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4608 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4611 /* Store the resulting vector for next recursive call. */
4612 (*vec_oprnds
)[i
/2] = new_tmp
;
4615 /* This is the last step of the conversion sequence. Store the
4616 vectors in SLP_NODE or in vector info of the scalar statement
4617 (or in STMT_VINFO_RELATED_STMT chain). */
4619 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4621 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4625 /* For multi-step demotion operations we first generate demotion operations
4626 from the source type to the intermediate types, and then combine the
4627 results (stored in VEC_OPRNDS) in demotion operation to the destination
4631 /* At each level of recursion we have half of the operands we had at the
4633 vec_oprnds
->truncate ((i
+1)/2);
4634 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
4636 stmt_info
, vec_dsts
, gsi
,
4637 slp_node
, VEC_PACK_TRUNC_EXPR
);
4640 vec_dsts
.quick_push (vec_dest
);
4644 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4645 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4646 STMT_INFO. For multi-step conversions store the resulting vectors and
4647 call the function recursively. */
4650 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
4651 vec
<tree
> *vec_oprnds0
,
4652 vec
<tree
> *vec_oprnds1
,
4653 stmt_vec_info stmt_info
, tree vec_dest
,
4654 gimple_stmt_iterator
*gsi
,
4655 enum tree_code code1
,
4656 enum tree_code code2
, int op_type
)
4659 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4660 gimple
*new_stmt1
, *new_stmt2
;
4661 vec
<tree
> vec_tmp
= vNULL
;
4663 vec_tmp
.create (vec_oprnds0
->length () * 2);
4664 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4666 if (op_type
== binary_op
)
4667 vop1
= (*vec_oprnds1
)[i
];
4671 /* Generate the two halves of promotion operation. */
4672 new_stmt1
= vect_gen_widened_results_half (vinfo
, code1
, vop0
, vop1
,
4673 op_type
, vec_dest
, gsi
,
4675 new_stmt2
= vect_gen_widened_results_half (vinfo
, code2
, vop0
, vop1
,
4676 op_type
, vec_dest
, gsi
,
4678 if (is_gimple_call (new_stmt1
))
4680 new_tmp1
= gimple_call_lhs (new_stmt1
);
4681 new_tmp2
= gimple_call_lhs (new_stmt2
);
4685 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4686 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4689 /* Store the results for the next step. */
4690 vec_tmp
.quick_push (new_tmp1
);
4691 vec_tmp
.quick_push (new_tmp2
);
4694 vec_oprnds0
->release ();
4695 *vec_oprnds0
= vec_tmp
;
4698 /* Create vectorized promotion stmts for widening stmts using only half the
4699 potential vector size for input. */
4701 vect_create_half_widening_stmts (vec_info
*vinfo
,
4702 vec
<tree
> *vec_oprnds0
,
4703 vec
<tree
> *vec_oprnds1
,
4704 stmt_vec_info stmt_info
, tree vec_dest
,
4705 gimple_stmt_iterator
*gsi
,
4706 enum tree_code code1
,
4714 vec
<tree
> vec_tmp
= vNULL
;
4716 vec_tmp
.create (vec_oprnds0
->length ());
4717 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4719 tree new_tmp1
, new_tmp2
, new_tmp3
, out_type
;
4721 gcc_assert (op_type
== binary_op
);
4722 vop1
= (*vec_oprnds1
)[i
];
4724 /* Widen the first vector input. */
4725 out_type
= TREE_TYPE (vec_dest
);
4726 new_tmp1
= make_ssa_name (out_type
);
4727 new_stmt1
= gimple_build_assign (new_tmp1
, NOP_EXPR
, vop0
);
4728 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt1
, gsi
);
4729 if (VECTOR_TYPE_P (TREE_TYPE (vop1
)))
4731 /* Widen the second vector input. */
4732 new_tmp2
= make_ssa_name (out_type
);
4733 new_stmt2
= gimple_build_assign (new_tmp2
, NOP_EXPR
, vop1
);
4734 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt2
, gsi
);
4735 /* Perform the operation. With both vector inputs widened. */
4736 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, new_tmp2
);
4740 /* Perform the operation. With the single vector input widened. */
4741 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, vop1
);
4744 new_tmp3
= make_ssa_name (vec_dest
, new_stmt3
);
4745 gimple_assign_set_lhs (new_stmt3
, new_tmp3
);
4746 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt3
, gsi
);
4748 /* Store the results for the next step. */
4749 vec_tmp
.quick_push (new_tmp3
);
4752 vec_oprnds0
->release ();
4753 *vec_oprnds0
= vec_tmp
;
4757 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4758 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4759 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4760 Return true if STMT_INFO is vectorizable in this way. */
4763 vectorizable_conversion (vec_info
*vinfo
,
4764 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4765 gimple
**vec_stmt
, slp_tree slp_node
,
4766 stmt_vector_for_cost
*cost_vec
)
4770 tree op0
, op1
= NULL_TREE
;
4771 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4772 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4773 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4775 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4777 poly_uint64 nunits_in
;
4778 poly_uint64 nunits_out
;
4779 tree vectype_out
, vectype_in
;
4781 tree lhs_type
, rhs_type
;
4782 enum { NARROW
, NONE
, WIDEN
} modifier
;
4783 vec
<tree
> vec_oprnds0
= vNULL
;
4784 vec
<tree
> vec_oprnds1
= vNULL
;
4786 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4787 int multi_step_cvt
= 0;
4788 vec
<tree
> interm_types
= vNULL
;
4789 tree intermediate_type
, cvt_type
= NULL_TREE
;
4791 unsigned short fltsz
;
4793 /* Is STMT a vectorizable conversion? */
4795 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4798 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4802 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4806 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4809 code
= gimple_assign_rhs_code (stmt
);
4810 if (!CONVERT_EXPR_CODE_P (code
)
4811 && code
!= FIX_TRUNC_EXPR
4812 && code
!= FLOAT_EXPR
4813 && code
!= WIDEN_PLUS_EXPR
4814 && code
!= WIDEN_MINUS_EXPR
4815 && code
!= WIDEN_MULT_EXPR
4816 && code
!= WIDEN_LSHIFT_EXPR
)
4819 bool widen_arith
= (code
== WIDEN_PLUS_EXPR
4820 || code
== WIDEN_MINUS_EXPR
4821 || code
== WIDEN_MULT_EXPR
4822 || code
== WIDEN_LSHIFT_EXPR
);
4823 op_type
= TREE_CODE_LENGTH (code
);
4825 /* Check types of lhs and rhs. */
4826 scalar_dest
= gimple_assign_lhs (stmt
);
4827 lhs_type
= TREE_TYPE (scalar_dest
);
4828 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4830 /* Check the operands of the operation. */
4831 slp_tree slp_op0
, slp_op1
= NULL
;
4832 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
4833 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
4835 if (dump_enabled_p ())
4836 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4837 "use not simple.\n");
4841 rhs_type
= TREE_TYPE (op0
);
4842 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4843 && !((INTEGRAL_TYPE_P (lhs_type
)
4844 && INTEGRAL_TYPE_P (rhs_type
))
4845 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4846 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4849 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4850 && ((INTEGRAL_TYPE_P (lhs_type
)
4851 && !type_has_mode_precision_p (lhs_type
))
4852 || (INTEGRAL_TYPE_P (rhs_type
)
4853 && !type_has_mode_precision_p (rhs_type
))))
4855 if (dump_enabled_p ())
4856 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4857 "type conversion to/from bit-precision unsupported."
4862 if (op_type
== binary_op
)
4864 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
4865 || code
== WIDEN_PLUS_EXPR
|| code
== WIDEN_MINUS_EXPR
);
4867 op1
= gimple_assign_rhs2 (stmt
);
4869 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
4870 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
4872 if (dump_enabled_p ())
4873 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4874 "use not simple.\n");
4877 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4880 vectype_in
= vectype1_in
;
4883 /* If op0 is an external or constant def, infer the vector type
4884 from the scalar type. */
4886 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
4888 gcc_assert (vectype_in
);
4891 if (dump_enabled_p ())
4892 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4893 "no vectype for scalar type %T\n", rhs_type
);
4898 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4899 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4901 if (dump_enabled_p ())
4902 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4903 "can't convert between boolean and non "
4904 "boolean vectors %T\n", rhs_type
);
4909 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4910 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4911 if (known_eq (nunits_out
, nunits_in
))
4916 else if (multiple_p (nunits_out
, nunits_in
))
4920 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4924 /* Multiple types in SLP are handled by creating the appropriate number of
4925 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4929 else if (modifier
== NARROW
)
4930 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4932 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4934 /* Sanity check: make sure that at least one copy of the vectorized stmt
4935 needs to be generated. */
4936 gcc_assert (ncopies
>= 1);
4938 bool found_mode
= false;
4939 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4940 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4941 opt_scalar_mode rhs_mode_iter
;
4943 /* Supportable by target? */
4947 if (code
!= FIX_TRUNC_EXPR
4948 && code
!= FLOAT_EXPR
4949 && !CONVERT_EXPR_CODE_P (code
))
4951 if (supportable_convert_operation (code
, vectype_out
, vectype_in
, &code1
))
4955 if (dump_enabled_p ())
4956 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4957 "conversion not supported by target.\n");
4961 if (known_eq (nunits_in
, nunits_out
))
4963 if (!supportable_half_widening_operation (code
, vectype_out
,
4964 vectype_in
, &code1
))
4966 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4969 if (supportable_widening_operation (vinfo
, code
, stmt_info
,
4970 vectype_out
, vectype_in
, &code1
,
4971 &code2
, &multi_step_cvt
,
4974 /* Binary widening operation can only be supported directly by the
4976 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4980 if (code
!= FLOAT_EXPR
4981 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4984 fltsz
= GET_MODE_SIZE (lhs_mode
);
4985 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4987 rhs_mode
= rhs_mode_iter
.require ();
4988 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4992 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4993 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4994 if (cvt_type
== NULL_TREE
)
4997 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4999 if (!supportable_convert_operation (code
, vectype_out
,
5000 cvt_type
, &codecvt1
))
5003 else if (!supportable_widening_operation (vinfo
, code
, stmt_info
,
5004 vectype_out
, cvt_type
,
5005 &codecvt1
, &codecvt2
,
5010 gcc_assert (multi_step_cvt
== 0);
5012 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
5014 vectype_in
, &code1
, &code2
,
5015 &multi_step_cvt
, &interm_types
))
5025 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5026 codecvt2
= ERROR_MARK
;
5030 interm_types
.safe_push (cvt_type
);
5031 cvt_type
= NULL_TREE
;
5036 gcc_assert (op_type
== unary_op
);
5037 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
5038 &code1
, &multi_step_cvt
,
5042 if (code
!= FIX_TRUNC_EXPR
5043 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
5047 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5048 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5049 if (cvt_type
== NULL_TREE
)
5051 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
5054 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
5055 &code1
, &multi_step_cvt
,
5064 if (!vec_stmt
) /* transformation not required. */
5067 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
5068 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
5070 if (dump_enabled_p ())
5071 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5072 "incompatible vector types for invariants\n");
5075 DUMP_VECT_SCOPE ("vectorizable_conversion");
5076 if (modifier
== NONE
)
5078 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
5079 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5082 else if (modifier
== NARROW
)
5084 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
5085 /* The final packing step produces one vector result per copy. */
5086 unsigned int nvectors
5087 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
5088 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5089 multi_step_cvt
, cost_vec
,
5094 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5095 /* The initial unpacking step produces two vector results
5096 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5097 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5098 unsigned int nvectors
5100 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5102 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5103 multi_step_cvt
, cost_vec
,
5106 interm_types
.release ();
5111 if (dump_enabled_p ())
5112 dump_printf_loc (MSG_NOTE
, vect_location
,
5113 "transform conversion. ncopies = %d.\n", ncopies
);
5115 if (op_type
== binary_op
)
5117 if (CONSTANT_CLASS_P (op0
))
5118 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5119 else if (CONSTANT_CLASS_P (op1
))
5120 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5123 /* In case of multi-step conversion, we first generate conversion operations
5124 to the intermediate types, and then from that types to the final one.
5125 We create vector destinations for the intermediate type (TYPES) received
5126 from supportable_*_operation, and store them in the correct order
5127 for future use in vect_create_vectorized_*_stmts (). */
5128 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5129 vec_dest
= vect_create_destination_var (scalar_dest
,
5130 (cvt_type
&& modifier
== WIDEN
)
5131 ? cvt_type
: vectype_out
);
5132 vec_dsts
.quick_push (vec_dest
);
5136 for (i
= interm_types
.length () - 1;
5137 interm_types
.iterate (i
, &intermediate_type
); i
--)
5139 vec_dest
= vect_create_destination_var (scalar_dest
,
5141 vec_dsts
.quick_push (vec_dest
);
5146 vec_dest
= vect_create_destination_var (scalar_dest
,
5148 ? vectype_out
: cvt_type
);
5153 if (modifier
== WIDEN
)
5155 else if (modifier
== NARROW
)
5158 ninputs
= vect_pow2 (multi_step_cvt
);
5166 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5168 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5170 /* Arguments are ready, create the new vector stmt. */
5171 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5172 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
5173 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5174 gimple_assign_set_lhs (new_stmt
, new_temp
);
5175 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5178 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5180 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5185 /* In case the vectorization factor (VF) is bigger than the number
5186 of elements that we can fit in a vectype (nunits), we have to
5187 generate more than one vector stmt - i.e - we need to "unroll"
5188 the vector stmt by a factor VF/nunits. */
5189 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5191 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
5193 if (code
== WIDEN_LSHIFT_EXPR
)
5195 int oprnds_size
= vec_oprnds0
.length ();
5196 vec_oprnds1
.create (oprnds_size
);
5197 for (i
= 0; i
< oprnds_size
; ++i
)
5198 vec_oprnds1
.quick_push (op1
);
5200 /* Arguments are ready. Create the new vector stmts. */
5201 for (i
= multi_step_cvt
; i
>= 0; i
--)
5203 tree this_dest
= vec_dsts
[i
];
5204 enum tree_code c1
= code1
, c2
= code2
;
5205 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5210 if (known_eq (nunits_out
, nunits_in
))
5211 vect_create_half_widening_stmts (vinfo
, &vec_oprnds0
,
5212 &vec_oprnds1
, stmt_info
,
5216 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5217 &vec_oprnds1
, stmt_info
,
5222 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5227 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5228 new_temp
= make_ssa_name (vec_dest
);
5229 new_stmt
= gimple_build_assign (new_temp
, codecvt1
, vop0
);
5230 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5233 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
5236 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5238 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5243 /* In case the vectorization factor (VF) is bigger than the number
5244 of elements that we can fit in a vectype (nunits), we have to
5245 generate more than one vector stmt - i.e - we need to "unroll"
5246 the vector stmt by a factor VF/nunits. */
5247 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5249 /* Arguments are ready. Create the new vector stmts. */
5251 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5253 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5254 new_temp
= make_ssa_name (vec_dest
);
5256 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5257 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5258 vec_oprnds0
[i
] = new_temp
;
5261 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5263 stmt_info
, vec_dsts
, gsi
,
5268 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5270 vec_oprnds0
.release ();
5271 vec_oprnds1
.release ();
5272 interm_types
.release ();
5277 /* Return true if we can assume from the scalar form of STMT_INFO that
5278 neither the scalar nor the vector forms will generate code. STMT_INFO
5279 is known not to involve a data reference. */
5282 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5284 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5288 tree lhs
= gimple_assign_lhs (stmt
);
5289 tree_code code
= gimple_assign_rhs_code (stmt
);
5290 tree rhs
= gimple_assign_rhs1 (stmt
);
5292 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5295 if (CONVERT_EXPR_CODE_P (code
))
5296 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5301 /* Function vectorizable_assignment.
5303 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5304 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5305 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5306 Return true if STMT_INFO is vectorizable in this way. */
5309 vectorizable_assignment (vec_info
*vinfo
,
5310 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5311 gimple
**vec_stmt
, slp_tree slp_node
,
5312 stmt_vector_for_cost
*cost_vec
)
5317 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5319 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5323 vec
<tree
> vec_oprnds
= vNULL
;
5325 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5326 enum tree_code code
;
5329 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5332 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5336 /* Is vectorizable assignment? */
5337 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5341 scalar_dest
= gimple_assign_lhs (stmt
);
5342 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5345 if (STMT_VINFO_DATA_REF (stmt_info
))
5348 code
= gimple_assign_rhs_code (stmt
);
5349 if (!(gimple_assign_single_p (stmt
)
5350 || code
== PAREN_EXPR
5351 || CONVERT_EXPR_CODE_P (code
)))
5354 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5355 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5357 /* Multiple types in SLP are handled by creating the appropriate number of
5358 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5363 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5365 gcc_assert (ncopies
>= 1);
5368 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5369 &dt
[0], &vectype_in
))
5371 if (dump_enabled_p ())
5372 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5373 "use not simple.\n");
5377 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5379 /* We can handle NOP_EXPR conversions that do not change the number
5380 of elements or the vector size. */
5381 if ((CONVERT_EXPR_CODE_P (code
)
5382 || code
== VIEW_CONVERT_EXPR
)
5384 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5385 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5386 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5389 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
5390 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5392 if (dump_enabled_p ())
5393 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5394 "can't convert between boolean and non "
5395 "boolean vectors %T\n", TREE_TYPE (op
));
5400 /* We do not handle bit-precision changes. */
5401 if ((CONVERT_EXPR_CODE_P (code
)
5402 || code
== VIEW_CONVERT_EXPR
)
5403 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5404 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5405 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5406 /* But a conversion that does not change the bit-pattern is ok. */
5407 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5408 > TYPE_PRECISION (TREE_TYPE (op
)))
5409 && TYPE_UNSIGNED (TREE_TYPE (op
))))
5411 if (dump_enabled_p ())
5412 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5413 "type conversion to/from bit-precision "
5418 if (!vec_stmt
) /* transformation not required. */
5421 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
5423 if (dump_enabled_p ())
5424 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5425 "incompatible vector types for invariants\n");
5428 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5429 DUMP_VECT_SCOPE ("vectorizable_assignment");
5430 if (!vect_nop_conversion_p (stmt_info
))
5431 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5437 if (dump_enabled_p ())
5438 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5441 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5444 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
5446 /* Arguments are ready. create the new vector stmt. */
5447 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5449 if (CONVERT_EXPR_CODE_P (code
)
5450 || code
== VIEW_CONVERT_EXPR
)
5451 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5452 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5453 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5454 gimple_assign_set_lhs (new_stmt
, new_temp
);
5455 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5457 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5459 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5462 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5464 vec_oprnds
.release ();
5469 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5470 either as shift by a scalar or by a vector. */
5473 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5476 machine_mode vec_mode
;
5481 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5485 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5487 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5489 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5491 || (optab_handler (optab
, TYPE_MODE (vectype
))
5492 == CODE_FOR_nothing
))
5496 vec_mode
= TYPE_MODE (vectype
);
5497 icode
= (int) optab_handler (optab
, vec_mode
);
5498 if (icode
== CODE_FOR_nothing
)
5505 /* Function vectorizable_shift.
5507 Check if STMT_INFO performs a shift operation that can be vectorized.
5508 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5509 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5510 Return true if STMT_INFO is vectorizable in this way. */
5513 vectorizable_shift (vec_info
*vinfo
,
5514 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5515 gimple
**vec_stmt
, slp_tree slp_node
,
5516 stmt_vector_for_cost
*cost_vec
)
5520 tree op0
, op1
= NULL
;
5521 tree vec_oprnd1
= NULL_TREE
;
5523 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5524 enum tree_code code
;
5525 machine_mode vec_mode
;
5529 machine_mode optab_op2_mode
;
5530 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5532 poly_uint64 nunits_in
;
5533 poly_uint64 nunits_out
;
5538 vec
<tree
> vec_oprnds0
= vNULL
;
5539 vec
<tree
> vec_oprnds1
= vNULL
;
5542 bool scalar_shift_arg
= true;
5543 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5544 bool incompatible_op1_vectype_p
= false;
5546 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5549 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5550 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5554 /* Is STMT a vectorizable binary/unary operation? */
5555 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5559 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5562 code
= gimple_assign_rhs_code (stmt
);
5564 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5565 || code
== RROTATE_EXPR
))
5568 scalar_dest
= gimple_assign_lhs (stmt
);
5569 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5570 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5572 if (dump_enabled_p ())
5573 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5574 "bit-precision shifts not supported.\n");
5579 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5580 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5582 if (dump_enabled_p ())
5583 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5584 "use not simple.\n");
5587 /* If op0 is an external or constant def, infer the vector type
5588 from the scalar type. */
5590 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
5592 gcc_assert (vectype
);
5595 if (dump_enabled_p ())
5596 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5597 "no vectype for scalar type\n");
5601 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5602 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5603 if (maybe_ne (nunits_out
, nunits_in
))
5606 stmt_vec_info op1_def_stmt_info
;
5608 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
5609 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
5611 if (dump_enabled_p ())
5612 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5613 "use not simple.\n");
5617 /* Multiple types in SLP are handled by creating the appropriate number of
5618 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5623 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5625 gcc_assert (ncopies
>= 1);
5627 /* Determine whether the shift amount is a vector, or scalar. If the
5628 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5630 if ((dt
[1] == vect_internal_def
5631 || dt
[1] == vect_induction_def
5632 || dt
[1] == vect_nested_cycle
)
5634 scalar_shift_arg
= false;
5635 else if (dt
[1] == vect_constant_def
5636 || dt
[1] == vect_external_def
5637 || dt
[1] == vect_internal_def
)
5639 /* In SLP, need to check whether the shift count is the same,
5640 in loops if it is a constant or invariant, it is always
5644 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5645 stmt_vec_info slpstmt_info
;
5647 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5649 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5650 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5651 scalar_shift_arg
= false;
5654 /* For internal SLP defs we have to make sure we see scalar stmts
5655 for all vector elements.
5656 ??? For different vectors we could resort to a different
5657 scalar shift operand but code-generation below simply always
5659 if (dt
[1] == vect_internal_def
5660 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5662 scalar_shift_arg
= false;
5665 /* If the shift amount is computed by a pattern stmt we cannot
5666 use the scalar amount directly thus give up and use a vector
5668 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5669 scalar_shift_arg
= false;
5673 if (dump_enabled_p ())
5674 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5675 "operand mode requires invariant argument.\n");
5679 /* Vector shifted by vector. */
5680 bool was_scalar_shift_arg
= scalar_shift_arg
;
5681 if (!scalar_shift_arg
)
5683 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5684 if (dump_enabled_p ())
5685 dump_printf_loc (MSG_NOTE
, vect_location
,
5686 "vector/vector shift/rotate found.\n");
5689 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
5691 incompatible_op1_vectype_p
5692 = (op1_vectype
== NULL_TREE
5693 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5694 TYPE_VECTOR_SUBPARTS (vectype
))
5695 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5696 if (incompatible_op1_vectype_p
5698 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
5699 || slp_op1
->refcnt
!= 1))
5701 if (dump_enabled_p ())
5702 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5703 "unusable type for last operand in"
5704 " vector/vector shift/rotate.\n");
5708 /* See if the machine has a vector shifted by scalar insn and if not
5709 then see if it has a vector shifted by vector insn. */
5712 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5714 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5716 if (dump_enabled_p ())
5717 dump_printf_loc (MSG_NOTE
, vect_location
,
5718 "vector/scalar shift/rotate found.\n");
5722 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5724 && (optab_handler (optab
, TYPE_MODE (vectype
))
5725 != CODE_FOR_nothing
))
5727 scalar_shift_arg
= false;
5729 if (dump_enabled_p ())
5730 dump_printf_loc (MSG_NOTE
, vect_location
,
5731 "vector/vector shift/rotate found.\n");
5734 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
5738 /* Unlike the other binary operators, shifts/rotates have
5739 the rhs being int, instead of the same type as the lhs,
5740 so make sure the scalar is the right type if we are
5741 dealing with vectors of long long/long/short/char. */
5742 incompatible_op1_vectype_p
5744 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
5746 if (incompatible_op1_vectype_p
5747 && dt
[1] == vect_internal_def
)
5749 if (dump_enabled_p ())
5750 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5751 "unusable type for last operand in"
5752 " vector/vector shift/rotate.\n");
5759 /* Supportable by target? */
5762 if (dump_enabled_p ())
5763 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5767 vec_mode
= TYPE_MODE (vectype
);
5768 icode
= (int) optab_handler (optab
, vec_mode
);
5769 if (icode
== CODE_FOR_nothing
)
5771 if (dump_enabled_p ())
5772 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5773 "op not supported by target.\n");
5776 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5777 if (vect_emulated_vector_p (vectype
))
5780 if (!vec_stmt
) /* transformation not required. */
5783 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5784 || ((!scalar_shift_arg
|| dt
[1] == vect_internal_def
)
5785 && (!incompatible_op1_vectype_p
5786 || dt
[1] == vect_constant_def
)
5787 && !vect_maybe_update_slp_op_vectype
5789 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
5791 if (dump_enabled_p ())
5792 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5793 "incompatible vector types for invariants\n");
5796 /* Now adjust the constant shift amount in place. */
5798 && incompatible_op1_vectype_p
5799 && dt
[1] == vect_constant_def
)
5801 for (unsigned i
= 0;
5802 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
5804 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
5805 = fold_convert (TREE_TYPE (vectype
),
5806 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
5807 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
5811 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5812 DUMP_VECT_SCOPE ("vectorizable_shift");
5813 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
5814 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
5820 if (dump_enabled_p ())
5821 dump_printf_loc (MSG_NOTE
, vect_location
,
5822 "transform binary/unary operation.\n");
5824 if (incompatible_op1_vectype_p
&& !slp_node
)
5826 gcc_assert (!scalar_shift_arg
&& was_scalar_shift_arg
);
5827 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5828 if (dt
[1] != vect_constant_def
)
5829 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
5830 TREE_TYPE (vectype
), NULL
);
5834 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5836 if (scalar_shift_arg
&& dt
[1] != vect_internal_def
)
5838 /* Vector shl and shr insn patterns can be defined with scalar
5839 operand 2 (shift operand). In this case, use constant or loop
5840 invariant op1 directly, without extending it to vector mode
5842 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5843 if (!VECTOR_MODE_P (optab_op2_mode
))
5845 if (dump_enabled_p ())
5846 dump_printf_loc (MSG_NOTE
, vect_location
,
5847 "operand 1 using scalar mode.\n");
5849 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
5850 vec_oprnds1
.quick_push (vec_oprnd1
);
5851 /* Store vec_oprnd1 for every vector stmt to be created.
5852 We check during the analysis that all the shift arguments
5854 TODO: Allow different constants for different vector
5855 stmts generated for an SLP instance. */
5857 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
5858 vec_oprnds1
.quick_push (vec_oprnd1
);
5861 else if (!scalar_shift_arg
&& slp_node
&& incompatible_op1_vectype_p
)
5863 if (was_scalar_shift_arg
)
5865 /* If the argument was the same in all lanes create
5866 the correctly typed vector shift amount directly. */
5867 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5868 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
5869 !loop_vinfo
? gsi
: NULL
);
5870 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
5871 !loop_vinfo
? gsi
: NULL
);
5872 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5873 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
5874 vec_oprnds1
.quick_push (vec_oprnd1
);
5876 else if (dt
[1] == vect_constant_def
)
5877 /* The constant shift amount has been adjusted in place. */
5880 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
5883 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5884 (a special case for certain kind of vector shifts); otherwise,
5885 operand 1 should be of a vector type (the usual case). */
5886 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5888 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
5890 /* Arguments are ready. Create the new vector stmt. */
5891 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5893 /* For internal defs where we need to use a scalar shift arg
5894 extract the first lane. */
5895 if (scalar_shift_arg
&& dt
[1] == vect_internal_def
)
5897 vop1
= vec_oprnds1
[0];
5898 new_temp
= make_ssa_name (TREE_TYPE (TREE_TYPE (vop1
)));
5900 = gimple_build_assign (new_temp
,
5901 build3 (BIT_FIELD_REF
, TREE_TYPE (new_temp
),
5903 TYPE_SIZE (TREE_TYPE (new_temp
)),
5904 bitsize_zero_node
));
5905 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5909 vop1
= vec_oprnds1
[i
];
5910 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5911 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5912 gimple_assign_set_lhs (new_stmt
, new_temp
);
5913 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5915 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5917 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5921 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5923 vec_oprnds0
.release ();
5924 vec_oprnds1
.release ();
5930 /* Function vectorizable_operation.
5932 Check if STMT_INFO performs a binary, unary or ternary operation that can
5934 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5935 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5936 Return true if STMT_INFO is vectorizable in this way. */
5939 vectorizable_operation (vec_info
*vinfo
,
5940 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5941 gimple
**vec_stmt
, slp_tree slp_node
,
5942 stmt_vector_for_cost
*cost_vec
)
5946 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5948 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5949 enum tree_code code
, orig_code
;
5950 machine_mode vec_mode
;
5954 bool target_support_p
;
5955 enum vect_def_type dt
[3]
5956 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5958 poly_uint64 nunits_in
;
5959 poly_uint64 nunits_out
;
5961 int ncopies
, vec_num
;
5963 vec
<tree
> vec_oprnds0
= vNULL
;
5964 vec
<tree
> vec_oprnds1
= vNULL
;
5965 vec
<tree
> vec_oprnds2
= vNULL
;
5966 tree vop0
, vop1
, vop2
;
5967 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5969 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5972 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5976 /* Is STMT a vectorizable binary/unary operation? */
5977 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5981 /* Loads and stores are handled in vectorizable_{load,store}. */
5982 if (STMT_VINFO_DATA_REF (stmt_info
))
5985 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5987 /* Shifts are handled in vectorizable_shift. */
5988 if (code
== LSHIFT_EXPR
5989 || code
== RSHIFT_EXPR
5990 || code
== LROTATE_EXPR
5991 || code
== RROTATE_EXPR
)
5994 /* Comparisons are handled in vectorizable_comparison. */
5995 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
5998 /* Conditions are handled in vectorizable_condition. */
5999 if (code
== COND_EXPR
)
6002 /* For pointer addition and subtraction, we should use the normal
6003 plus and minus for the vector operation. */
6004 if (code
== POINTER_PLUS_EXPR
)
6006 if (code
== POINTER_DIFF_EXPR
)
6009 /* Support only unary or binary operations. */
6010 op_type
= TREE_CODE_LENGTH (code
);
6011 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
6013 if (dump_enabled_p ())
6014 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6015 "num. args = %d (not unary/binary/ternary op).\n",
6020 scalar_dest
= gimple_assign_lhs (stmt
);
6021 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6023 /* Most operations cannot handle bit-precision types without extra
6025 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
6027 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
6028 /* Exception are bitwise binary operations. */
6029 && code
!= BIT_IOR_EXPR
6030 && code
!= BIT_XOR_EXPR
6031 && code
!= BIT_AND_EXPR
)
6033 if (dump_enabled_p ())
6034 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6035 "bit-precision arithmetic not supported.\n");
6040 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6041 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6043 if (dump_enabled_p ())
6044 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6045 "use not simple.\n");
6048 /* If op0 is an external or constant def, infer the vector type
6049 from the scalar type. */
6052 /* For boolean type we cannot determine vectype by
6053 invariant value (don't know whether it is a vector
6054 of booleans or vector of integers). We use output
6055 vectype because operations on boolean don't change
6057 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6059 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6061 if (dump_enabled_p ())
6062 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6063 "not supported operation on bool value.\n");
6066 vectype
= vectype_out
;
6069 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6073 gcc_assert (vectype
);
6076 if (dump_enabled_p ())
6077 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6078 "no vectype for scalar type %T\n",
6084 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6085 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6086 if (maybe_ne (nunits_out
, nunits_in
))
6089 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6090 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6091 if (op_type
== binary_op
|| op_type
== ternary_op
)
6093 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6094 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6096 if (dump_enabled_p ())
6097 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6098 "use not simple.\n");
6102 if (op_type
== ternary_op
)
6104 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6105 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6107 if (dump_enabled_p ())
6108 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6109 "use not simple.\n");
6114 /* Multiple types in SLP are handled by creating the appropriate number of
6115 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6120 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6124 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6128 gcc_assert (ncopies
>= 1);
6130 /* Reject attempts to combine mask types with nonmask types, e.g. if
6131 we have an AND between a (nonmask) boolean loaded from memory and
6132 a (mask) boolean result of a comparison.
6134 TODO: We could easily fix these cases up using pattern statements. */
6135 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6136 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6137 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6139 if (dump_enabled_p ())
6140 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6141 "mixed mask and nonmask vector types\n");
6145 /* Supportable by target? */
6147 vec_mode
= TYPE_MODE (vectype
);
6148 if (code
== MULT_HIGHPART_EXPR
)
6149 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6152 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6155 if (dump_enabled_p ())
6156 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6160 target_support_p
= (optab_handler (optab
, vec_mode
)
6161 != CODE_FOR_nothing
);
6164 bool using_emulated_vectors_p
= vect_emulated_vector_p (vectype
);
6165 if (!target_support_p
)
6167 if (dump_enabled_p ())
6168 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6169 "op not supported by target.\n");
6170 /* Check only during analysis. */
6171 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6172 || (!vec_stmt
&& !vect_can_vectorize_without_simd_p (code
)))
6174 if (dump_enabled_p ())
6175 dump_printf_loc (MSG_NOTE
, vect_location
,
6176 "proceeding using word mode.\n");
6177 using_emulated_vectors_p
= true;
6180 if (using_emulated_vectors_p
6181 && !vect_can_vectorize_without_simd_p (code
))
6183 if (dump_enabled_p ())
6184 dump_printf (MSG_NOTE
, "using word mode not possible.\n");
6188 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6189 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6190 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6192 if (!vec_stmt
) /* transformation not required. */
6194 /* If this operation is part of a reduction, a fully-masked loop
6195 should only change the active lanes of the reduction chain,
6196 keeping the inactive lanes as-is. */
6198 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
6201 if (cond_fn
== IFN_LAST
6202 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
6203 OPTIMIZE_FOR_SPEED
))
6205 if (dump_enabled_p ())
6206 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6207 "can't use a fully-masked loop because no"
6208 " conditional operation is available.\n");
6209 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
6212 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6216 /* Put types on constant and invariant SLP children. */
6218 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6219 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6220 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6222 if (dump_enabled_p ())
6223 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6224 "incompatible vector types for invariants\n");
6228 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6229 DUMP_VECT_SCOPE ("vectorizable_operation");
6230 vect_model_simple_cost (vinfo
, stmt_info
,
6231 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6232 if (using_emulated_vectors_p
)
6234 /* The above vect_model_simple_cost call handles constants
6235 in the prologue and (mis-)costs one of the stmts as
6236 vector stmt. See tree-vect-generic.c:do_plus_minus/do_negate
6237 for the actual lowering that will be applied. */
6239 = slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
;
6253 record_stmt_cost (cost_vec
, n
, scalar_stmt
, stmt_info
, 0, vect_body
);
6260 if (dump_enabled_p ())
6261 dump_printf_loc (MSG_NOTE
, vect_location
,
6262 "transform binary/unary operation.\n");
6264 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6266 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6267 vectors with unsigned elements, but the result is signed. So, we
6268 need to compute the MINUS_EXPR into vectype temporary and
6269 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6270 tree vec_cvt_dest
= NULL_TREE
;
6271 if (orig_code
== POINTER_DIFF_EXPR
)
6273 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6274 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6278 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6280 /* In case the vectorization factor (VF) is bigger than the number
6281 of elements that we can fit in a vectype (nunits), we have to generate
6282 more than one vector stmt - i.e - we need to "unroll" the
6283 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6284 from one copy of the vector stmt to the next, in the field
6285 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6286 stages to find the correct vector defs to be used when vectorizing
6287 stmts that use the defs of the current stmt. The example below
6288 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6289 we need to create 4 vectorized stmts):
6291 before vectorization:
6292 RELATED_STMT VEC_STMT
6296 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6298 RELATED_STMT VEC_STMT
6299 VS1_0: vx0 = memref0 VS1_1 -
6300 VS1_1: vx1 = memref1 VS1_2 -
6301 VS1_2: vx2 = memref2 VS1_3 -
6302 VS1_3: vx3 = memref3 - -
6303 S1: x = load - VS1_0
6306 step2: vectorize stmt S2 (done here):
6307 To vectorize stmt S2 we first need to find the relevant vector
6308 def for the first operand 'x'. This is, as usual, obtained from
6309 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6310 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6311 relevant vector def 'vx0'. Having found 'vx0' we can generate
6312 the vector stmt VS2_0, and as usual, record it in the
6313 STMT_VINFO_VEC_STMT of stmt S2.
6314 When creating the second copy (VS2_1), we obtain the relevant vector
6315 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6316 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6317 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6318 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6319 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6320 chain of stmts and pointers:
6321 RELATED_STMT VEC_STMT
6322 VS1_0: vx0 = memref0 VS1_1 -
6323 VS1_1: vx1 = memref1 VS1_2 -
6324 VS1_2: vx2 = memref2 VS1_3 -
6325 VS1_3: vx3 = memref3 - -
6326 S1: x = load - VS1_0
6327 VS2_0: vz0 = vx0 + v1 VS2_1 -
6328 VS2_1: vz1 = vx1 + v1 VS2_2 -
6329 VS2_2: vz2 = vx2 + v1 VS2_3 -
6330 VS2_3: vz3 = vx3 + v1 - -
6331 S2: z = x + 1 - VS2_0 */
6333 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6334 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
6335 /* Arguments are ready. Create the new vector stmt. */
6336 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6338 gimple
*new_stmt
= NULL
;
6339 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6340 ? vec_oprnds1
[i
] : NULL_TREE
);
6341 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
6342 if (masked_loop_p
&& reduc_idx
>= 0)
6344 /* Perform the operation on active elements only and take
6345 inactive elements from the reduction chain input. */
6347 vop2
= reduc_idx
== 1 ? vop1
: vop0
;
6348 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6350 gcall
*call
= gimple_build_call_internal (cond_fn
, 4, mask
,
6352 new_temp
= make_ssa_name (vec_dest
, call
);
6353 gimple_call_set_lhs (call
, new_temp
);
6354 gimple_call_set_nothrow (call
, true);
6355 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
6360 tree mask
= NULL_TREE
;
6361 /* When combining two masks check if either of them is elsewhere
6362 combined with a loop mask, if that's the case we can mark that the
6363 new combined mask doesn't need to be combined with a loop mask. */
6365 && code
== BIT_AND_EXPR
6366 && VECTOR_BOOLEAN_TYPE_P (vectype
))
6368 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op0
,
6371 mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6374 vop0
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
6378 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op1
,
6381 mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6384 vop1
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
6389 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
6390 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6391 gimple_assign_set_lhs (new_stmt
, new_temp
);
6392 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6394 /* Enter the combined value into the vector cond hash so we don't
6395 AND it with a loop mask again. */
6397 loop_vinfo
->vec_cond_masked_set
.add ({ new_temp
, mask
});
6401 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6402 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6404 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6405 gimple_assign_set_lhs (new_stmt
, new_temp
);
6406 vect_finish_stmt_generation (vinfo
, stmt_info
,
6411 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6413 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6417 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6419 vec_oprnds0
.release ();
6420 vec_oprnds1
.release ();
6421 vec_oprnds2
.release ();
6426 /* A helper function to ensure data reference DR_INFO's base alignment. */
6429 ensure_base_align (dr_vec_info
*dr_info
)
6431 /* Alignment is only analyzed for the first element of a DR group,
6432 use that to look at base alignment we need to enforce. */
6433 if (STMT_VINFO_GROUPED_ACCESS (dr_info
->stmt
))
6434 dr_info
= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info
->stmt
));
6436 gcc_assert (dr_info
->misalignment
!= DR_MISALIGNMENT_UNINITIALIZED
);
6438 if (dr_info
->base_misaligned
)
6440 tree base_decl
= dr_info
->base_decl
;
6442 // We should only be able to increase the alignment of a base object if
6443 // we know what its new alignment should be at compile time.
6444 unsigned HOST_WIDE_INT align_base_to
=
6445 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6447 if (decl_in_symtab_p (base_decl
))
6448 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6449 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6451 SET_DECL_ALIGN (base_decl
, align_base_to
);
6452 DECL_USER_ALIGN (base_decl
) = 1;
6454 dr_info
->base_misaligned
= false;
6459 /* Function get_group_alias_ptr_type.
6461 Return the alias type for the group starting at FIRST_STMT_INFO. */
6464 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6466 struct data_reference
*first_dr
, *next_dr
;
6468 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6469 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6470 while (next_stmt_info
)
6472 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6473 if (get_alias_set (DR_REF (first_dr
))
6474 != get_alias_set (DR_REF (next_dr
)))
6476 if (dump_enabled_p ())
6477 dump_printf_loc (MSG_NOTE
, vect_location
,
6478 "conflicting alias set types.\n");
6479 return ptr_type_node
;
6481 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6483 return reference_alias_ptr_type (DR_REF (first_dr
));
6487 /* Function scan_operand_equal_p.
6489 Helper function for check_scan_store. Compare two references
6490 with .GOMP_SIMD_LANE bases. */
6493 scan_operand_equal_p (tree ref1
, tree ref2
)
6495 tree ref
[2] = { ref1
, ref2
};
6496 poly_int64 bitsize
[2], bitpos
[2];
6497 tree offset
[2], base
[2];
6498 for (int i
= 0; i
< 2; ++i
)
6501 int unsignedp
, reversep
, volatilep
= 0;
6502 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6503 &offset
[i
], &mode
, &unsignedp
,
6504 &reversep
, &volatilep
);
6505 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6507 if (TREE_CODE (base
[i
]) == MEM_REF
6508 && offset
[i
] == NULL_TREE
6509 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6511 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6512 if (is_gimple_assign (def_stmt
)
6513 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6514 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6515 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6517 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6519 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6520 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6525 if (!operand_equal_p (base
[0], base
[1], 0))
6527 if (maybe_ne (bitsize
[0], bitsize
[1]))
6529 if (offset
[0] != offset
[1])
6531 if (!offset
[0] || !offset
[1])
6533 if (!operand_equal_p (offset
[0], offset
[1], 0))
6536 for (int i
= 0; i
< 2; ++i
)
6538 step
[i
] = integer_one_node
;
6539 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6541 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6542 if (is_gimple_assign (def_stmt
)
6543 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6544 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6547 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6548 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6551 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6553 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6554 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6556 tree rhs1
= NULL_TREE
;
6557 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6559 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6560 if (gimple_assign_cast_p (def_stmt
))
6561 rhs1
= gimple_assign_rhs1 (def_stmt
);
6563 else if (CONVERT_EXPR_P (offset
[i
]))
6564 rhs1
= TREE_OPERAND (offset
[i
], 0);
6566 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6567 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6568 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6569 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6572 if (!operand_equal_p (offset
[0], offset
[1], 0)
6573 || !operand_equal_p (step
[0], step
[1], 0))
6581 enum scan_store_kind
{
6582 /* Normal permutation. */
6583 scan_store_kind_perm
,
6585 /* Whole vector left shift permutation with zero init. */
6586 scan_store_kind_lshift_zero
,
6588 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6589 scan_store_kind_lshift_cond
6592 /* Function check_scan_store.
6594 Verify if we can perform the needed permutations or whole vector shifts.
6595 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6596 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6597 to do at each step. */
6600 scan_store_can_perm_p (tree vectype
, tree init
,
6601 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6603 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6604 unsigned HOST_WIDE_INT nunits
;
6605 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6607 int units_log2
= exact_log2 (nunits
);
6608 if (units_log2
<= 0)
6612 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6613 for (i
= 0; i
<= units_log2
; ++i
)
6615 unsigned HOST_WIDE_INT j
, k
;
6616 enum scan_store_kind kind
= scan_store_kind_perm
;
6617 vec_perm_builder
sel (nunits
, nunits
, 1);
6618 sel
.quick_grow (nunits
);
6619 if (i
== units_log2
)
6621 for (j
= 0; j
< nunits
; ++j
)
6622 sel
[j
] = nunits
- 1;
6626 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6628 for (k
= 0; j
< nunits
; ++j
, ++k
)
6629 sel
[j
] = nunits
+ k
;
6631 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6632 if (!can_vec_perm_const_p (vec_mode
, indices
))
6634 if (i
== units_log2
)
6637 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6639 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6641 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6642 /* Whole vector shifts shift in zeros, so if init is all zero
6643 constant, there is no need to do anything further. */
6644 if ((TREE_CODE (init
) != INTEGER_CST
6645 && TREE_CODE (init
) != REAL_CST
)
6646 || !initializer_zerop (init
))
6648 tree masktype
= truth_type_for (vectype
);
6649 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6651 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6654 kind
= whole_vector_shift_kind
;
6656 if (use_whole_vector
)
6658 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6659 use_whole_vector
->safe_grow_cleared (i
, true);
6660 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6661 use_whole_vector
->safe_push (kind
);
6669 /* Function check_scan_store.
6671 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6674 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
6675 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6676 vect_memory_access_type memory_access_type
)
6678 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6679 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6682 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6685 || memory_access_type
!= VMAT_CONTIGUOUS
6686 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6687 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6688 || loop_vinfo
== NULL
6689 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6690 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6691 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
6692 || !integer_zerop (DR_INIT (dr_info
->dr
))
6693 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6694 || !alias_sets_conflict_p (get_alias_set (vectype
),
6695 get_alias_set (TREE_TYPE (ref_type
))))
6697 if (dump_enabled_p ())
6698 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6699 "unsupported OpenMP scan store.\n");
6703 /* We need to pattern match code built by OpenMP lowering and simplified
6704 by following optimizations into something we can handle.
6705 #pragma omp simd reduction(inscan,+:r)
6709 #pragma omp scan inclusive (r)
6712 shall have body with:
6713 // Initialization for input phase, store the reduction initializer:
6714 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6715 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6717 // Actual input phase:
6719 r.0_5 = D.2042[_20];
6722 // Initialization for scan phase:
6723 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6729 // Actual scan phase:
6731 r.1_8 = D.2042[_20];
6733 The "omp simd array" variable D.2042 holds the privatized copy used
6734 inside of the loop and D.2043 is another one that holds copies of
6735 the current original list item. The separate GOMP_SIMD_LANE ifn
6736 kinds are there in order to allow optimizing the initializer store
6737 and combiner sequence, e.g. if it is originally some C++ish user
6738 defined reduction, but allow the vectorizer to pattern recognize it
6739 and turn into the appropriate vectorized scan.
6741 For exclusive scan, this is slightly different:
6742 #pragma omp simd reduction(inscan,+:r)
6746 #pragma omp scan exclusive (r)
6749 shall have body with:
6750 // Initialization for input phase, store the reduction initializer:
6751 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6752 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6754 // Actual input phase:
6756 r.0_5 = D.2042[_20];
6759 // Initialization for scan phase:
6760 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6766 // Actual scan phase:
6768 r.1_8 = D.2044[_20];
6771 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6773 /* Match the D.2042[_21] = 0; store above. Just require that
6774 it is a constant or external definition store. */
6775 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6778 if (dump_enabled_p ())
6779 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6780 "unsupported OpenMP scan initializer store.\n");
6784 if (! loop_vinfo
->scan_map
)
6785 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6786 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6787 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6790 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6792 /* These stores can be vectorized normally. */
6796 if (rhs_dt
!= vect_internal_def
)
6799 if (dump_enabled_p ())
6800 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6801 "unsupported OpenMP scan combiner pattern.\n");
6805 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6806 tree rhs
= gimple_assign_rhs1 (stmt
);
6807 if (TREE_CODE (rhs
) != SSA_NAME
)
6810 gimple
*other_store_stmt
= NULL
;
6811 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6812 bool inscan_var_store
6813 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6815 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6817 if (!inscan_var_store
)
6819 use_operand_p use_p
;
6820 imm_use_iterator iter
;
6821 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6823 gimple
*use_stmt
= USE_STMT (use_p
);
6824 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6826 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6827 || !is_gimple_assign (use_stmt
)
6828 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6830 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6832 other_store_stmt
= use_stmt
;
6834 if (other_store_stmt
== NULL
)
6836 rhs
= gimple_assign_lhs (other_store_stmt
);
6837 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6841 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6843 use_operand_p use_p
;
6844 imm_use_iterator iter
;
6845 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6847 gimple
*use_stmt
= USE_STMT (use_p
);
6848 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6850 if (other_store_stmt
)
6852 other_store_stmt
= use_stmt
;
6858 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6859 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6860 || !is_gimple_assign (def_stmt
)
6861 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6864 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6865 /* For pointer addition, we should use the normal plus for the vector
6869 case POINTER_PLUS_EXPR
:
6872 case MULT_HIGHPART_EXPR
:
6877 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6880 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6881 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6882 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6885 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6886 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6887 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6888 || !gimple_assign_load_p (load1_stmt
)
6889 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6890 || !gimple_assign_load_p (load2_stmt
))
6893 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6894 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6895 if (load1_stmt_info
== NULL
6896 || load2_stmt_info
== NULL
6897 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
6898 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
6899 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
6900 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6903 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
6905 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6906 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
6907 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
6909 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6911 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6915 use_operand_p use_p
;
6916 imm_use_iterator iter
;
6917 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
6919 gimple
*use_stmt
= USE_STMT (use_p
);
6920 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
6922 if (other_store_stmt
)
6924 other_store_stmt
= use_stmt
;
6928 if (other_store_stmt
== NULL
)
6930 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
6931 || !gimple_store_p (other_store_stmt
))
6934 stmt_vec_info other_store_stmt_info
6935 = loop_vinfo
->lookup_stmt (other_store_stmt
);
6936 if (other_store_stmt_info
== NULL
6937 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
6938 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6941 gimple
*stmt1
= stmt
;
6942 gimple
*stmt2
= other_store_stmt
;
6943 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6944 std::swap (stmt1
, stmt2
);
6945 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6946 gimple_assign_rhs1 (load2_stmt
)))
6948 std::swap (rhs1
, rhs2
);
6949 std::swap (load1_stmt
, load2_stmt
);
6950 std::swap (load1_stmt_info
, load2_stmt_info
);
6952 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6953 gimple_assign_rhs1 (load1_stmt
)))
6956 tree var3
= NULL_TREE
;
6957 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
6958 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
6959 gimple_assign_rhs1 (load2_stmt
)))
6961 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6963 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6964 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
6965 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
6967 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6968 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
6969 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
6970 || lookup_attribute ("omp simd inscan exclusive",
6971 DECL_ATTRIBUTES (var3
)))
6975 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
6976 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
6977 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
6980 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6981 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
6982 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
6983 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
6984 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6985 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
6988 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6989 std::swap (var1
, var2
);
6991 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6993 if (!lookup_attribute ("omp simd inscan exclusive",
6994 DECL_ATTRIBUTES (var1
)))
6999 if (loop_vinfo
->scan_map
== NULL
)
7001 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7005 /* The IL is as expected, now check if we can actually vectorize it.
7012 should be vectorized as (where _40 is the vectorized rhs
7013 from the D.2042[_21] = 0; store):
7014 _30 = MEM <vector(8) int> [(int *)&D.2043];
7015 _31 = MEM <vector(8) int> [(int *)&D.2042];
7016 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7018 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7019 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7021 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7022 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7023 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7025 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7026 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7028 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7029 MEM <vector(8) int> [(int *)&D.2043] = _39;
7030 MEM <vector(8) int> [(int *)&D.2042] = _38;
7037 should be vectorized as (where _40 is the vectorized rhs
7038 from the D.2042[_21] = 0; store):
7039 _30 = MEM <vector(8) int> [(int *)&D.2043];
7040 _31 = MEM <vector(8) int> [(int *)&D.2042];
7041 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7042 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7044 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7045 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7046 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7048 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7049 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7050 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7052 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7053 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7056 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7057 MEM <vector(8) int> [(int *)&D.2044] = _39;
7058 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7059 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7060 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7061 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7064 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7065 if (units_log2
== -1)
7072 /* Function vectorizable_scan_store.
7074 Helper of vectorizable_score, arguments like on vectorizable_store.
7075 Handle only the transformation, checking is done in check_scan_store. */
7078 vectorizable_scan_store (vec_info
*vinfo
,
7079 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7080 gimple
**vec_stmt
, int ncopies
)
7082 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7083 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7084 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7085 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7087 if (dump_enabled_p ())
7088 dump_printf_loc (MSG_NOTE
, vect_location
,
7089 "transform scan store. ncopies = %d\n", ncopies
);
7091 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7092 tree rhs
= gimple_assign_rhs1 (stmt
);
7093 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7095 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7096 bool inscan_var_store
7097 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7099 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7101 use_operand_p use_p
;
7102 imm_use_iterator iter
;
7103 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7105 gimple
*use_stmt
= USE_STMT (use_p
);
7106 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7108 rhs
= gimple_assign_lhs (use_stmt
);
7113 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7114 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7115 if (code
== POINTER_PLUS_EXPR
)
7117 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7118 && commutative_tree_code (code
));
7119 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7120 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7121 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7122 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7123 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7124 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7125 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7126 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7127 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7128 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7129 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7131 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7133 std::swap (rhs1
, rhs2
);
7134 std::swap (var1
, var2
);
7135 std::swap (load1_dr_info
, load2_dr_info
);
7138 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7141 unsigned HOST_WIDE_INT nunits
;
7142 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7144 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7145 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7146 gcc_assert (units_log2
> 0);
7147 auto_vec
<tree
, 16> perms
;
7148 perms
.quick_grow (units_log2
+ 1);
7149 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7150 for (int i
= 0; i
<= units_log2
; ++i
)
7152 unsigned HOST_WIDE_INT j
, k
;
7153 vec_perm_builder
sel (nunits
, nunits
, 1);
7154 sel
.quick_grow (nunits
);
7155 if (i
== units_log2
)
7156 for (j
= 0; j
< nunits
; ++j
)
7157 sel
[j
] = nunits
- 1;
7160 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7162 for (k
= 0; j
< nunits
; ++j
, ++k
)
7163 sel
[j
] = nunits
+ k
;
7165 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7166 if (!use_whole_vector
.is_empty ()
7167 && use_whole_vector
[i
] != scan_store_kind_perm
)
7169 if (zero_vec
== NULL_TREE
)
7170 zero_vec
= build_zero_cst (vectype
);
7171 if (masktype
== NULL_TREE
7172 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7173 masktype
= truth_type_for (vectype
);
7174 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7177 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7180 tree vec_oprnd1
= NULL_TREE
;
7181 tree vec_oprnd2
= NULL_TREE
;
7182 tree vec_oprnd3
= NULL_TREE
;
7183 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7184 tree dataref_offset
= build_int_cst (ref_type
, 0);
7185 tree bump
= vect_get_data_ptr_increment (vinfo
, dr_info
,
7186 vectype
, VMAT_CONTIGUOUS
);
7187 tree ldataref_ptr
= NULL_TREE
;
7188 tree orig
= NULL_TREE
;
7189 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7190 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7191 auto_vec
<tree
> vec_oprnds1
;
7192 auto_vec
<tree
> vec_oprnds2
;
7193 auto_vec
<tree
> vec_oprnds3
;
7194 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
7195 *init
, &vec_oprnds1
,
7196 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
7197 rhs2
, &vec_oprnds3
);
7198 for (int j
= 0; j
< ncopies
; j
++)
7200 vec_oprnd1
= vec_oprnds1
[j
];
7201 if (ldataref_ptr
== NULL
)
7202 vec_oprnd2
= vec_oprnds2
[j
];
7203 vec_oprnd3
= vec_oprnds3
[j
];
7206 else if (!inscan_var_store
)
7207 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7211 vec_oprnd2
= make_ssa_name (vectype
);
7212 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7213 unshare_expr (ldataref_ptr
),
7215 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7216 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7217 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7218 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7219 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7222 tree v
= vec_oprnd2
;
7223 for (int i
= 0; i
< units_log2
; ++i
)
7225 tree new_temp
= make_ssa_name (vectype
);
7226 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7228 && (use_whole_vector
[i
]
7229 != scan_store_kind_perm
))
7230 ? zero_vec
: vec_oprnd1
, v
,
7232 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7233 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7234 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7236 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7238 /* Whole vector shift shifted in zero bits, but if *init
7239 is not initializer_zerop, we need to replace those elements
7240 with elements from vec_oprnd1. */
7241 tree_vector_builder
vb (masktype
, nunits
, 1);
7242 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7243 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7244 ? boolean_false_node
: boolean_true_node
);
7246 tree new_temp2
= make_ssa_name (vectype
);
7247 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7248 new_temp
, vec_oprnd1
);
7249 vect_finish_stmt_generation (vinfo
, stmt_info
,
7251 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7252 new_temp
= new_temp2
;
7255 /* For exclusive scan, perform the perms[i] permutation once
7258 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7266 tree new_temp2
= make_ssa_name (vectype
);
7267 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7268 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7269 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7274 tree new_temp
= make_ssa_name (vectype
);
7275 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7276 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7277 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7279 tree last_perm_arg
= new_temp
;
7280 /* For exclusive scan, new_temp computed above is the exclusive scan
7281 prefix sum. Turn it into inclusive prefix sum for the broadcast
7282 of the last element into orig. */
7283 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7285 last_perm_arg
= make_ssa_name (vectype
);
7286 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7287 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7288 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7291 orig
= make_ssa_name (vectype
);
7292 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7293 last_perm_arg
, perms
[units_log2
]);
7294 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7295 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7297 if (!inscan_var_store
)
7299 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7300 unshare_expr (dataref_ptr
),
7302 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7303 g
= gimple_build_assign (data_ref
, new_temp
);
7304 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7305 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7309 if (inscan_var_store
)
7310 for (int j
= 0; j
< ncopies
; j
++)
7313 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7315 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7316 unshare_expr (dataref_ptr
),
7318 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7319 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7320 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7321 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7327 /* Function vectorizable_store.
7329 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7330 that can be vectorized.
7331 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7332 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7333 Return true if STMT_INFO is vectorizable in this way. */
7336 vectorizable_store (vec_info
*vinfo
,
7337 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7338 gimple
**vec_stmt
, slp_tree slp_node
,
7339 stmt_vector_for_cost
*cost_vec
)
7343 tree vec_oprnd
= NULL_TREE
;
7345 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7346 class loop
*loop
= NULL
;
7347 machine_mode vec_mode
;
7349 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7350 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7351 tree dataref_ptr
= NULL_TREE
;
7352 tree dataref_offset
= NULL_TREE
;
7353 gimple
*ptr_incr
= NULL
;
7356 stmt_vec_info first_stmt_info
;
7358 unsigned int group_size
, i
;
7359 vec
<tree
> oprnds
= vNULL
;
7360 vec
<tree
> result_chain
= vNULL
;
7361 vec
<tree
> vec_oprnds
= vNULL
;
7362 bool slp
= (slp_node
!= NULL
);
7363 unsigned int vec_num
;
7364 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
7366 gather_scatter_info gs_info
;
7368 vec_load_store_type vls_type
;
7371 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7374 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7378 /* Is vectorizable store? */
7380 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7381 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7383 tree scalar_dest
= gimple_assign_lhs (assign
);
7384 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7385 && is_pattern_stmt_p (stmt_info
))
7386 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7387 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7388 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7389 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7390 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7391 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7392 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7393 && TREE_CODE (scalar_dest
) != MEM_REF
)
7398 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7399 if (!call
|| !gimple_call_internal_p (call
))
7402 internal_fn ifn
= gimple_call_internal_fn (call
);
7403 if (!internal_store_fn_p (ifn
))
7406 if (slp_node
!= NULL
)
7408 if (dump_enabled_p ())
7409 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7410 "SLP of masked stores not supported.\n");
7414 int mask_index
= internal_fn_mask_index (ifn
);
7416 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
7417 &mask
, NULL
, &mask_dt
, &mask_vectype
))
7421 op
= vect_get_store_rhs (stmt_info
);
7423 /* Cannot have hybrid store SLP -- that would mean storing to the
7424 same location twice. */
7425 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7427 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7428 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7432 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7433 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7438 /* Multiple types in SLP are handled by creating the appropriate number of
7439 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7444 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7446 gcc_assert (ncopies
>= 1);
7448 /* FORNOW. This restriction should be relaxed. */
7449 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7451 if (dump_enabled_p ())
7452 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7453 "multiple types in nested loop.\n");
7457 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
7458 op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7461 elem_type
= TREE_TYPE (vectype
);
7462 vec_mode
= TYPE_MODE (vectype
);
7464 if (!STMT_VINFO_DATA_REF (stmt_info
))
7467 vect_memory_access_type memory_access_type
;
7468 enum dr_alignment_support alignment_support_scheme
;
7471 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, vls_type
,
7472 ncopies
, &memory_access_type
, &poffset
,
7473 &alignment_support_scheme
, &misalignment
, &gs_info
))
7478 if (memory_access_type
== VMAT_CONTIGUOUS
)
7480 if (!VECTOR_MODE_P (vec_mode
)
7481 || !can_vec_mask_load_store_p (vec_mode
,
7482 TYPE_MODE (mask_vectype
), false))
7485 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7486 && (memory_access_type
!= VMAT_GATHER_SCATTER
7487 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7489 if (dump_enabled_p ())
7490 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7491 "unsupported access type for masked store.\n");
7497 /* FORNOW. In some cases can vectorize even if data-type not supported
7498 (e.g. - array initialization with 0). */
7499 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7503 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7504 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7505 && memory_access_type
!= VMAT_GATHER_SCATTER
7506 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7509 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7510 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7511 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7515 first_stmt_info
= stmt_info
;
7516 first_dr_info
= dr_info
;
7517 group_size
= vec_num
= 1;
7520 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7522 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7523 memory_access_type
))
7527 if (!vec_stmt
) /* transformation not required. */
7529 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7532 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
7533 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, vls_type
,
7534 group_size
, memory_access_type
,
7535 ncopies
, &gs_info
, mask
);
7538 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
7541 if (dump_enabled_p ())
7542 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7543 "incompatible vector types for invariants\n");
7547 if (dump_enabled_p ()
7548 && memory_access_type
!= VMAT_ELEMENTWISE
7549 && memory_access_type
!= VMAT_GATHER_SCATTER
7550 && alignment_support_scheme
!= dr_aligned
)
7551 dump_printf_loc (MSG_NOTE
, vect_location
,
7552 "Vectorizing an unaligned access.\n");
7554 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7555 vect_model_store_cost (vinfo
, stmt_info
, ncopies
,
7556 memory_access_type
, alignment_support_scheme
,
7557 misalignment
, vls_type
, slp_node
, cost_vec
);
7560 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7564 ensure_base_align (dr_info
);
7566 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7568 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7569 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7570 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7571 tree ptr
, var
, scale
, vec_mask
;
7572 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7573 tree mask_halfvectype
= mask_vectype
;
7574 edge pe
= loop_preheader_edge (loop
);
7577 enum { NARROW
, NONE
, WIDEN
} modifier
;
7578 poly_uint64 scatter_off_nunits
7579 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7581 if (known_eq (nunits
, scatter_off_nunits
))
7583 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7587 /* Currently gathers and scatters are only supported for
7588 fixed-length vectors. */
7589 unsigned int count
= scatter_off_nunits
.to_constant ();
7590 vec_perm_builder
sel (count
, count
, 1);
7591 for (i
= 0; i
< (unsigned int) count
; ++i
)
7592 sel
.quick_push (i
| (count
/ 2));
7594 vec_perm_indices
indices (sel
, 1, count
);
7595 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7597 gcc_assert (perm_mask
!= NULL_TREE
);
7599 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7603 /* Currently gathers and scatters are only supported for
7604 fixed-length vectors. */
7605 unsigned int count
= nunits
.to_constant ();
7606 vec_perm_builder
sel (count
, count
, 1);
7607 for (i
= 0; i
< (unsigned int) count
; ++i
)
7608 sel
.quick_push (i
| (count
/ 2));
7610 vec_perm_indices
indices (sel
, 2, count
);
7611 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7612 gcc_assert (perm_mask
!= NULL_TREE
);
7616 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7621 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7622 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7623 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7624 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7625 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7626 scaletype
= TREE_VALUE (arglist
);
7628 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7629 && TREE_CODE (rettype
) == VOID_TYPE
);
7631 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7632 if (!is_gimple_min_invariant (ptr
))
7634 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7635 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7636 gcc_assert (!new_bb
);
7639 if (mask
== NULL_TREE
)
7641 mask_arg
= build_int_cst (masktype
, -1);
7642 mask_arg
= vect_init_vector (vinfo
, stmt_info
,
7643 mask_arg
, masktype
, NULL
);
7646 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7648 auto_vec
<tree
> vec_oprnds0
;
7649 auto_vec
<tree
> vec_oprnds1
;
7650 auto_vec
<tree
> vec_masks
;
7653 tree mask_vectype
= truth_type_for (vectype
);
7654 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7656 ? ncopies
/ 2 : ncopies
,
7657 mask
, &vec_masks
, mask_vectype
);
7659 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7661 ? ncopies
/ 2 : ncopies
,
7662 gs_info
.offset
, &vec_oprnds0
);
7663 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7665 ? ncopies
/ 2 : ncopies
,
7667 for (j
= 0; j
< ncopies
; ++j
)
7669 if (modifier
== WIDEN
)
7672 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
7673 perm_mask
, stmt_info
, gsi
);
7675 op
= vec_oprnd0
= vec_oprnds0
[j
/ 2];
7676 src
= vec_oprnd1
= vec_oprnds1
[j
];
7678 mask_op
= vec_mask
= vec_masks
[j
];
7680 else if (modifier
== NARROW
)
7683 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
7684 perm_mask
, stmt_info
, gsi
);
7686 src
= vec_oprnd1
= vec_oprnds1
[j
/ 2];
7687 op
= vec_oprnd0
= vec_oprnds0
[j
];
7689 mask_op
= vec_mask
= vec_masks
[j
/ 2];
7693 op
= vec_oprnd0
= vec_oprnds0
[j
];
7694 src
= vec_oprnd1
= vec_oprnds1
[j
];
7696 mask_op
= vec_mask
= vec_masks
[j
];
7699 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7701 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7702 TYPE_VECTOR_SUBPARTS (srctype
)));
7703 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7704 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7706 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7707 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7711 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7713 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7714 TYPE_VECTOR_SUBPARTS (idxtype
)));
7715 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7716 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7718 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7719 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7727 if (modifier
== NARROW
)
7729 var
= vect_get_new_ssa_name (mask_halfvectype
,
7732 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7733 : VEC_UNPACK_LO_EXPR
,
7735 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7738 tree optype
= TREE_TYPE (mask_arg
);
7739 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7742 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7743 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7744 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7746 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7747 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7749 if (!useless_type_conversion_p (masktype
, utype
))
7751 gcc_assert (TYPE_PRECISION (utype
)
7752 <= TYPE_PRECISION (masktype
));
7753 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7754 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7755 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7761 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7762 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7764 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
7766 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7769 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7770 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
7772 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7773 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7778 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7780 /* We vectorize all the stmts of the interleaving group when we
7781 reach the last stmt in the group. */
7782 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7783 < DR_GROUP_SIZE (first_stmt_info
)
7792 grouped_store
= false;
7793 /* VEC_NUM is the number of vect stmts to be created for this
7795 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7796 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7797 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7798 == first_stmt_info
);
7799 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7800 op
= vect_get_store_rhs (first_stmt_info
);
7803 /* VEC_NUM is the number of vect stmts to be created for this
7805 vec_num
= group_size
;
7807 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7810 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7812 if (dump_enabled_p ())
7813 dump_printf_loc (MSG_NOTE
, vect_location
,
7814 "transform store. ncopies = %d\n", ncopies
);
7816 if (memory_access_type
== VMAT_ELEMENTWISE
7817 || memory_access_type
== VMAT_STRIDED_SLP
)
7819 gimple_stmt_iterator incr_gsi
;
7825 tree stride_base
, stride_step
, alias_off
;
7829 /* Checked by get_load_store_type. */
7830 unsigned int const_nunits
= nunits
.to_constant ();
7832 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7833 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7835 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
7837 = fold_build_pointer_plus
7838 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7839 size_binop (PLUS_EXPR
,
7840 convert_to_ptrofftype (dr_offset
),
7841 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7842 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7844 /* For a store with loop-invariant (but other than power-of-2)
7845 stride (i.e. not a grouped access) like so:
7847 for (i = 0; i < n; i += stride)
7850 we generate a new induction variable and new stores from
7851 the components of the (vectorized) rhs:
7853 for (j = 0; ; j += VF*stride)
7858 array[j + stride] = tmp2;
7862 unsigned nstores
= const_nunits
;
7864 tree ltype
= elem_type
;
7865 tree lvectype
= vectype
;
7868 if (group_size
< const_nunits
7869 && const_nunits
% group_size
== 0)
7871 nstores
= const_nunits
/ group_size
;
7873 ltype
= build_vector_type (elem_type
, group_size
);
7876 /* First check if vec_extract optab doesn't support extraction
7877 of vector elts directly. */
7878 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7880 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7881 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
7882 group_size
).exists (&vmode
)
7883 || (convert_optab_handler (vec_extract_optab
,
7884 TYPE_MODE (vectype
), vmode
)
7885 == CODE_FOR_nothing
))
7887 /* Try to avoid emitting an extract of vector elements
7888 by performing the extracts using an integer type of the
7889 same size, extracting from a vector of those and then
7890 re-interpreting it as the original vector type if
7893 = group_size
* GET_MODE_BITSIZE (elmode
);
7894 unsigned int lnunits
= const_nunits
/ group_size
;
7895 /* If we can't construct such a vector fall back to
7896 element extracts from the original vector type and
7897 element size stores. */
7898 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7899 && VECTOR_MODE_P (TYPE_MODE (vectype
))
7900 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
7901 lnunits
).exists (&vmode
)
7902 && (convert_optab_handler (vec_extract_optab
,
7904 != CODE_FOR_nothing
))
7908 ltype
= build_nonstandard_integer_type (lsize
, 1);
7909 lvectype
= build_vector_type (ltype
, nstores
);
7911 /* Else fall back to vector extraction anyway.
7912 Fewer stores are more important than avoiding spilling
7913 of the vector we extract from. Compared to the
7914 construction case in vectorizable_load no store-forwarding
7915 issue exists here for reasonable archs. */
7918 else if (group_size
>= const_nunits
7919 && group_size
% const_nunits
== 0)
7922 lnel
= const_nunits
;
7926 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
7927 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7930 ivstep
= stride_step
;
7931 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
7932 build_int_cst (TREE_TYPE (ivstep
), vf
));
7934 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7936 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7937 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7938 create_iv (stride_base
, ivstep
, NULL
,
7939 loop
, &incr_gsi
, insert_after
,
7941 incr
= gsi_stmt (incr_gsi
);
7943 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7945 alias_off
= build_int_cst (ref_type
, 0);
7946 stmt_vec_info next_stmt_info
= first_stmt_info
;
7947 for (g
= 0; g
< group_size
; g
++)
7949 running_off
= offvar
;
7952 tree size
= TYPE_SIZE_UNIT (ltype
);
7953 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
7955 tree newoff
= copy_ssa_name (running_off
, NULL
);
7956 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7958 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7959 running_off
= newoff
;
7962 op
= vect_get_store_rhs (next_stmt_info
);
7963 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
,
7965 unsigned int group_el
= 0;
7966 unsigned HOST_WIDE_INT
7967 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7968 for (j
= 0; j
< ncopies
; j
++)
7970 vec_oprnd
= vec_oprnds
[j
];
7971 /* Pun the vector to extract from if necessary. */
7972 if (lvectype
!= vectype
)
7974 tree tem
= make_ssa_name (lvectype
);
7976 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
7977 lvectype
, vec_oprnd
));
7978 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
7981 for (i
= 0; i
< nstores
; i
++)
7983 tree newref
, newoff
;
7984 gimple
*incr
, *assign
;
7985 tree size
= TYPE_SIZE (ltype
);
7986 /* Extract the i'th component. */
7987 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
7988 bitsize_int (i
), size
);
7989 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
7992 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
7996 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7998 newref
= build2 (MEM_REF
, ltype
,
7999 running_off
, this_off
);
8000 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
8002 /* And store it to *running_off. */
8003 assign
= gimple_build_assign (newref
, elem
);
8004 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
8008 || group_el
== group_size
)
8010 newoff
= copy_ssa_name (running_off
, NULL
);
8011 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8012 running_off
, stride_step
);
8013 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8015 running_off
= newoff
;
8018 if (g
== group_size
- 1
8021 if (j
== 0 && i
== 0)
8023 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
8027 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8028 vec_oprnds
.release ();
8036 auto_vec
<tree
> dr_chain (group_size
);
8037 oprnds
.create (group_size
);
8039 gcc_assert (alignment_support_scheme
);
8040 vec_loop_masks
*loop_masks
8041 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8042 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8044 vec_loop_lens
*loop_lens
8045 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
8046 ? &LOOP_VINFO_LENS (loop_vinfo
)
8049 /* Shouldn't go with length-based approach if fully masked. */
8050 gcc_assert (!loop_lens
|| !loop_masks
);
8052 /* Targets with store-lane instructions must not require explicit
8053 realignment. vect_supportable_dr_alignment always returns either
8054 dr_aligned or dr_unaligned_supported for masked operations. */
8055 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8058 || alignment_support_scheme
== dr_aligned
8059 || alignment_support_scheme
== dr_unaligned_supported
);
8061 tree offset
= NULL_TREE
;
8062 if (!known_eq (poffset
, 0))
8063 offset
= size_int (poffset
);
8066 tree vec_offset
= NULL_TREE
;
8067 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8069 aggr_type
= NULL_TREE
;
8072 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8074 aggr_type
= elem_type
;
8075 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8076 &bump
, &vec_offset
);
8080 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8081 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8083 aggr_type
= vectype
;
8084 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
8085 memory_access_type
);
8089 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8091 /* In case the vectorization factor (VF) is bigger than the number
8092 of elements that we can fit in a vectype (nunits), we have to generate
8093 more than one vector stmt - i.e - we need to "unroll" the
8094 vector stmt by a factor VF/nunits. */
8096 /* In case of interleaving (non-unit grouped access):
8103 We create vectorized stores starting from base address (the access of the
8104 first stmt in the chain (S2 in the above example), when the last store stmt
8105 of the chain (S4) is reached:
8108 VS2: &base + vec_size*1 = vx0
8109 VS3: &base + vec_size*2 = vx1
8110 VS4: &base + vec_size*3 = vx3
8112 Then permutation statements are generated:
8114 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8115 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8118 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8119 (the order of the data-refs in the output of vect_permute_store_chain
8120 corresponds to the order of scalar stmts in the interleaving chain - see
8121 the documentation of vect_permute_store_chain()).
8123 In case of both multiple types and interleaving, above vector stores and
8124 permutation stmts are created for every copy. The result vector stmts are
8125 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8126 STMT_VINFO_RELATED_STMT for the next copies.
8129 auto_vec
<tree
> vec_masks
;
8130 tree vec_mask
= NULL
;
8131 auto_vec
<tree
> vec_offsets
;
8132 auto_vec
<vec
<tree
> > gvec_oprnds
;
8133 gvec_oprnds
.safe_grow_cleared (group_size
, true);
8134 for (j
= 0; j
< ncopies
; j
++)
8141 /* Get vectorized arguments for SLP_NODE. */
8142 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1,
8144 vec_oprnd
= vec_oprnds
[0];
8148 /* For interleaved stores we collect vectorized defs for all the
8149 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8150 used as an input to vect_permute_store_chain().
8152 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8153 and OPRNDS are of size 1. */
8154 stmt_vec_info next_stmt_info
= first_stmt_info
;
8155 for (i
= 0; i
< group_size
; i
++)
8157 /* Since gaps are not supported for interleaved stores,
8158 DR_GROUP_SIZE is the exact number of stmts in the chain.
8159 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8160 that there is no interleaving, DR_GROUP_SIZE is 1,
8161 and only one iteration of the loop will be executed. */
8162 op
= vect_get_store_rhs (next_stmt_info
);
8163 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
8164 ncopies
, op
, &gvec_oprnds
[i
]);
8165 vec_oprnd
= gvec_oprnds
[i
][0];
8166 dr_chain
.quick_push (gvec_oprnds
[i
][0]);
8167 oprnds
.quick_push (gvec_oprnds
[i
][0]);
8168 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8172 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
8173 mask
, &vec_masks
, mask_vectype
);
8174 vec_mask
= vec_masks
[0];
8178 /* We should have catched mismatched types earlier. */
8179 gcc_assert (useless_type_conversion_p (vectype
,
8180 TREE_TYPE (vec_oprnd
)));
8181 bool simd_lane_access_p
8182 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8183 if (simd_lane_access_p
8185 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8186 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8187 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
8188 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8189 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8190 get_alias_set (TREE_TYPE (ref_type
))))
8192 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8193 dataref_offset
= build_int_cst (ref_type
, 0);
8195 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8197 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
8198 slp_node
, &gs_info
, &dataref_ptr
,
8200 vec_offset
= vec_offsets
[0];
8204 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
8205 simd_lane_access_p
? loop
: NULL
,
8206 offset
, &dummy
, gsi
, &ptr_incr
,
8207 simd_lane_access_p
, bump
);
8211 /* For interleaved stores we created vectorized defs for all the
8212 defs stored in OPRNDS in the previous iteration (previous copy).
8213 DR_CHAIN is then used as an input to vect_permute_store_chain().
8214 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8215 OPRNDS are of size 1. */
8216 for (i
= 0; i
< group_size
; i
++)
8218 vec_oprnd
= gvec_oprnds
[i
][j
];
8219 dr_chain
[i
] = gvec_oprnds
[i
][j
];
8220 oprnds
[i
] = gvec_oprnds
[i
][j
];
8223 vec_mask
= vec_masks
[j
];
8226 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8227 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8228 vec_offset
= vec_offsets
[j
];
8230 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8234 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8238 /* Get an array into which we can store the individual vectors. */
8239 vec_array
= create_vector_array (vectype
, vec_num
);
8241 /* Invalidate the current contents of VEC_ARRAY. This should
8242 become an RTL clobber too, which prevents the vector registers
8243 from being upward-exposed. */
8244 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8246 /* Store the individual vectors into the array. */
8247 for (i
= 0; i
< vec_num
; i
++)
8249 vec_oprnd
= dr_chain
[i
];
8250 write_vector_array (vinfo
, stmt_info
,
8251 gsi
, vec_oprnd
, vec_array
, i
);
8254 tree final_mask
= NULL
;
8256 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8259 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
8260 final_mask
, vec_mask
, gsi
);
8266 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8268 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8269 tree alias_ptr
= build_int_cst (ref_type
, align
);
8270 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8271 dataref_ptr
, alias_ptr
,
8272 final_mask
, vec_array
);
8277 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8278 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8279 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8281 gimple_call_set_lhs (call
, data_ref
);
8283 gimple_call_set_nothrow (call
, true);
8284 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8287 /* Record that VEC_ARRAY is now dead. */
8288 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8296 result_chain
.create (group_size
);
8298 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
8299 gsi
, &result_chain
);
8302 stmt_vec_info next_stmt_info
= first_stmt_info
;
8303 for (i
= 0; i
< vec_num
; i
++)
8306 unsigned HOST_WIDE_INT align
;
8308 tree final_mask
= NULL_TREE
;
8310 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8312 vectype
, vec_num
* j
+ i
);
8314 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
8315 final_mask
, vec_mask
, gsi
);
8317 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8319 tree scale
= size_int (gs_info
.scale
);
8322 call
= gimple_build_call_internal
8323 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8324 scale
, vec_oprnd
, final_mask
);
8326 call
= gimple_build_call_internal
8327 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8329 gimple_call_set_nothrow (call
, true);
8330 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8336 /* Bump the vector pointer. */
8337 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
8338 gsi
, stmt_info
, bump
);
8341 vec_oprnd
= vec_oprnds
[i
];
8342 else if (grouped_store
)
8343 /* For grouped stores vectorized defs are interleaved in
8344 vect_permute_store_chain(). */
8345 vec_oprnd
= result_chain
[i
];
8347 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8348 if (alignment_support_scheme
== dr_aligned
)
8350 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
8352 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
8356 misalign
= misalignment
;
8357 if (dataref_offset
== NULL_TREE
8358 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8359 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8361 align
= least_bit_hwi (misalign
| align
);
8363 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8365 tree perm_mask
= perm_mask_for_reverse (vectype
);
8366 tree perm_dest
= vect_create_destination_var
8367 (vect_get_store_rhs (stmt_info
), vectype
);
8368 tree new_temp
= make_ssa_name (perm_dest
);
8370 /* Generate the permute statement. */
8372 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8373 vec_oprnd
, perm_mask
);
8374 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8376 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8377 vec_oprnd
= new_temp
;
8380 /* Arguments are ready. Create the new vector stmt. */
8383 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8385 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8387 final_mask
, vec_oprnd
);
8388 gimple_call_set_nothrow (call
, true);
8389 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8395 = vect_get_loop_len (loop_vinfo
, loop_lens
,
8396 vec_num
* ncopies
, vec_num
* j
+ i
);
8397 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8398 machine_mode vmode
= TYPE_MODE (vectype
);
8399 opt_machine_mode new_ovmode
8400 = get_len_load_store_mode (vmode
, false);
8401 machine_mode new_vmode
= new_ovmode
.require ();
8402 /* Need conversion if it's wrapped with VnQI. */
8403 if (vmode
!= new_vmode
)
8406 = build_vector_type_for_mode (unsigned_intQI_type_node
,
8409 = vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
8411 = build1 (VIEW_CONVERT_EXPR
, new_vtype
, vec_oprnd
);
8413 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
,
8415 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
8420 = gimple_build_call_internal (IFN_LEN_STORE
, 4, dataref_ptr
,
8421 ptr
, final_len
, vec_oprnd
);
8422 gimple_call_set_nothrow (call
, true);
8423 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8428 data_ref
= fold_build2 (MEM_REF
, vectype
,
8432 : build_int_cst (ref_type
, 0));
8433 if (alignment_support_scheme
== dr_aligned
)
8436 TREE_TYPE (data_ref
)
8437 = build_aligned_type (TREE_TYPE (data_ref
),
8438 align
* BITS_PER_UNIT
);
8439 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8440 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
8441 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8447 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8448 if (!next_stmt_info
)
8455 *vec_stmt
= new_stmt
;
8456 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8460 for (i
= 0; i
< group_size
; ++i
)
8462 vec
<tree
> oprndsi
= gvec_oprnds
[i
];
8466 result_chain
.release ();
8467 vec_oprnds
.release ();
8472 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8473 VECTOR_CST mask. No checks are made that the target platform supports the
8474 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8475 vect_gen_perm_mask_checked. */
8478 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8482 poly_uint64 nunits
= sel
.length ();
8483 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8485 mask_type
= build_vector_type (ssizetype
, nunits
);
8486 return vec_perm_indices_to_tree (mask_type
, sel
);
8489 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8490 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8493 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8495 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
8496 return vect_gen_perm_mask_any (vectype
, sel
);
8499 /* Given a vector variable X and Y, that was generated for the scalar
8500 STMT_INFO, generate instructions to permute the vector elements of X and Y
8501 using permutation mask MASK_VEC, insert them at *GSI and return the
8502 permuted vector variable. */
8505 permute_vec_elements (vec_info
*vinfo
,
8506 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8507 gimple_stmt_iterator
*gsi
)
8509 tree vectype
= TREE_TYPE (x
);
8510 tree perm_dest
, data_ref
;
8513 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8514 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8515 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8517 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8518 data_ref
= make_ssa_name (perm_dest
);
8520 /* Generate the permute statement. */
8521 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8522 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8527 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8528 inserting them on the loops preheader edge. Returns true if we
8529 were successful in doing so (and thus STMT_INFO can be moved then),
8530 otherwise returns false. */
8533 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8539 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8541 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8542 if (!gimple_nop_p (def_stmt
)
8543 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8545 /* Make sure we don't need to recurse. While we could do
8546 so in simple cases when there are more complex use webs
8547 we don't have an easy way to preserve stmt order to fulfil
8548 dependencies within them. */
8551 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8553 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8555 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8556 if (!gimple_nop_p (def_stmt2
)
8557 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8567 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8569 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8570 if (!gimple_nop_p (def_stmt
)
8571 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8573 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8574 gsi_remove (&gsi
, false);
8575 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8582 /* vectorizable_load.
8584 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8585 that can be vectorized.
8586 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8587 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8588 Return true if STMT_INFO is vectorizable in this way. */
8591 vectorizable_load (vec_info
*vinfo
,
8592 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8593 gimple
**vec_stmt
, slp_tree slp_node
,
8594 stmt_vector_for_cost
*cost_vec
)
8597 tree vec_dest
= NULL
;
8598 tree data_ref
= NULL
;
8599 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8600 class loop
*loop
= NULL
;
8601 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8602 bool nested_in_vect_loop
= false;
8607 tree dataref_ptr
= NULL_TREE
;
8608 tree dataref_offset
= NULL_TREE
;
8609 gimple
*ptr_incr
= NULL
;
8612 unsigned int group_size
;
8613 poly_uint64 group_gap_adj
;
8614 tree msq
= NULL_TREE
, lsq
;
8615 tree realignment_token
= NULL_TREE
;
8617 vec
<tree
> dr_chain
= vNULL
;
8618 bool grouped_load
= false;
8619 stmt_vec_info first_stmt_info
;
8620 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8621 bool compute_in_loop
= false;
8622 class loop
*at_loop
;
8624 bool slp
= (slp_node
!= NULL
);
8625 bool slp_perm
= false;
8626 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8629 gather_scatter_info gs_info
;
8631 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8633 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8636 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8640 if (!STMT_VINFO_DATA_REF (stmt_info
))
8643 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8644 int mask_index
= -1;
8645 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8647 scalar_dest
= gimple_assign_lhs (assign
);
8648 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8651 tree_code code
= gimple_assign_rhs_code (assign
);
8652 if (code
!= ARRAY_REF
8653 && code
!= BIT_FIELD_REF
8654 && code
!= INDIRECT_REF
8655 && code
!= COMPONENT_REF
8656 && code
!= IMAGPART_EXPR
8657 && code
!= REALPART_EXPR
8659 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8664 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8665 if (!call
|| !gimple_call_internal_p (call
))
8668 internal_fn ifn
= gimple_call_internal_fn (call
);
8669 if (!internal_load_fn_p (ifn
))
8672 scalar_dest
= gimple_call_lhs (call
);
8676 mask_index
= internal_fn_mask_index (ifn
);
8677 /* ??? For SLP the mask operand is always last. */
8678 if (mask_index
>= 0 && slp_node
)
8679 mask_index
= SLP_TREE_CHILDREN (slp_node
).length () - 1;
8681 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
8682 &mask
, NULL
, &mask_dt
, &mask_vectype
))
8686 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8687 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8691 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8692 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8693 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8698 /* Multiple types in SLP are handled by creating the appropriate number of
8699 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8704 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8706 gcc_assert (ncopies
>= 1);
8708 /* FORNOW. This restriction should be relaxed. */
8709 if (nested_in_vect_loop
&& ncopies
> 1)
8711 if (dump_enabled_p ())
8712 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8713 "multiple types in nested loop.\n");
8717 /* Invalidate assumptions made by dependence analysis when vectorization
8718 on the unrolled body effectively re-orders stmts. */
8720 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8721 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8722 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8724 if (dump_enabled_p ())
8725 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8726 "cannot perform implicit CSE when unrolling "
8727 "with negative dependence distance\n");
8731 elem_type
= TREE_TYPE (vectype
);
8732 mode
= TYPE_MODE (vectype
);
8734 /* FORNOW. In some cases can vectorize even if data-type not supported
8735 (e.g. - data copies). */
8736 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8738 if (dump_enabled_p ())
8739 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8740 "Aligned load, but unsupported type.\n");
8744 /* Check if the load is a part of an interleaving chain. */
8745 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8747 grouped_load
= true;
8749 gcc_assert (!nested_in_vect_loop
);
8750 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8752 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8753 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8755 /* Refuse non-SLP vectorization of SLP-only groups. */
8756 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8758 if (dump_enabled_p ())
8759 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8760 "cannot vectorize load in non-SLP mode.\n");
8764 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8770 /* In BB vectorization we may not actually use a loaded vector
8771 accessing elements in excess of DR_GROUP_SIZE. */
8772 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8773 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
8774 unsigned HOST_WIDE_INT nunits
;
8775 unsigned j
, k
, maxk
= 0;
8776 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
8779 tree vectype
= SLP_TREE_VECTYPE (slp_node
);
8780 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
8781 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
8783 if (dump_enabled_p ())
8784 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8785 "BB vectorization with gaps at the end of "
8786 "a load is not supported\n");
8793 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
8796 if (dump_enabled_p ())
8797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
8799 "unsupported load permutation\n");
8804 /* Invalidate assumptions made by dependence analysis when vectorization
8805 on the unrolled body effectively re-orders stmts. */
8806 if (!PURE_SLP_STMT (stmt_info
)
8807 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8808 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8809 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8811 if (dump_enabled_p ())
8812 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8813 "cannot perform implicit CSE when performing "
8814 "group loads with negative dependence distance\n");
8821 vect_memory_access_type memory_access_type
;
8822 enum dr_alignment_support alignment_support_scheme
;
8825 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, VLS_LOAD
,
8826 ncopies
, &memory_access_type
, &poffset
,
8827 &alignment_support_scheme
, &misalignment
, &gs_info
))
8832 if (memory_access_type
== VMAT_CONTIGUOUS
)
8834 machine_mode vec_mode
= TYPE_MODE (vectype
);
8835 if (!VECTOR_MODE_P (vec_mode
)
8836 || !can_vec_mask_load_store_p (vec_mode
,
8837 TYPE_MODE (mask_vectype
), true))
8840 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8841 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8843 if (dump_enabled_p ())
8844 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8845 "unsupported access type for masked load.\n");
8848 else if (memory_access_type
== VMAT_GATHER_SCATTER
8849 && gs_info
.ifn
== IFN_LAST
8852 if (dump_enabled_p ())
8853 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8854 "unsupported masked emulated gather.\n");
8859 if (!vec_stmt
) /* transformation not required. */
8863 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
8866 if (dump_enabled_p ())
8867 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8868 "incompatible vector types for invariants\n");
8873 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8876 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
8877 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, VLS_LOAD
,
8878 group_size
, memory_access_type
,
8879 ncopies
, &gs_info
, mask
);
8881 if (dump_enabled_p ()
8882 && memory_access_type
!= VMAT_ELEMENTWISE
8883 && memory_access_type
!= VMAT_GATHER_SCATTER
8884 && alignment_support_scheme
!= dr_aligned
)
8885 dump_printf_loc (MSG_NOTE
, vect_location
,
8886 "Vectorizing an unaligned access.\n");
8888 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
8889 vect_model_load_cost (vinfo
, stmt_info
, ncopies
, vf
, memory_access_type
,
8890 alignment_support_scheme
, misalignment
,
8891 &gs_info
, slp_node
, cost_vec
);
8896 gcc_assert (memory_access_type
8897 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8899 if (dump_enabled_p ())
8900 dump_printf_loc (MSG_NOTE
, vect_location
,
8901 "transform load. ncopies = %d\n", ncopies
);
8905 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8906 ensure_base_align (dr_info
);
8908 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8910 vect_build_gather_load_calls (vinfo
,
8911 stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
8915 if (memory_access_type
== VMAT_INVARIANT
)
8917 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
8918 /* If we have versioned for aliasing or the loop doesn't
8919 have any data dependencies that would preclude this,
8920 then we are sure this is a loop invariant load and
8921 thus we can insert it on the preheader edge. */
8922 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8923 && !nested_in_vect_loop
8924 && hoist_defs_of_uses (stmt_info
, loop
));
8927 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
8928 if (dump_enabled_p ())
8929 dump_printf_loc (MSG_NOTE
, vect_location
,
8930 "hoisting out of the vectorized loop: %G", stmt
);
8931 scalar_dest
= copy_ssa_name (scalar_dest
);
8932 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
8933 gsi_insert_on_edge_immediate
8934 (loop_preheader_edge (loop
),
8935 gimple_build_assign (scalar_dest
, rhs
));
8937 /* These copies are all equivalent, but currently the representation
8938 requires a separate STMT_VINFO_VEC_STMT for each one. */
8939 gimple_stmt_iterator gsi2
= *gsi
;
8941 for (j
= 0; j
< ncopies
; j
++)
8944 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8947 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8949 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8951 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8955 *vec_stmt
= new_stmt
;
8956 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8962 if (memory_access_type
== VMAT_ELEMENTWISE
8963 || memory_access_type
== VMAT_STRIDED_SLP
)
8965 gimple_stmt_iterator incr_gsi
;
8970 vec
<constructor_elt
, va_gc
> *v
= NULL
;
8971 tree stride_base
, stride_step
, alias_off
;
8972 /* Checked by get_load_store_type. */
8973 unsigned int const_nunits
= nunits
.to_constant ();
8974 unsigned HOST_WIDE_INT cst_offset
= 0;
8977 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
));
8978 gcc_assert (!nested_in_vect_loop
);
8982 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8983 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8987 first_stmt_info
= stmt_info
;
8988 first_dr_info
= dr_info
;
8990 if (slp
&& grouped_load
)
8992 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8993 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8999 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
9000 * vect_get_place_in_interleaving_chain (stmt_info
,
9003 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
9006 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
9008 = fold_build_pointer_plus
9009 (DR_BASE_ADDRESS (first_dr_info
->dr
),
9010 size_binop (PLUS_EXPR
,
9011 convert_to_ptrofftype (dr_offset
),
9012 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
9013 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
9015 /* For a load with loop-invariant (but other than power-of-2)
9016 stride (i.e. not a grouped access) like so:
9018 for (i = 0; i < n; i += stride)
9021 we generate a new induction variable and new accesses to
9022 form a new vector (or vectors, depending on ncopies):
9024 for (j = 0; ; j += VF*stride)
9026 tmp2 = array[j + stride];
9028 vectemp = {tmp1, tmp2, ...}
9031 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
9032 build_int_cst (TREE_TYPE (stride_step
), vf
));
9034 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
9036 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
9037 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
9038 create_iv (stride_base
, ivstep
, NULL
,
9039 loop
, &incr_gsi
, insert_after
,
9042 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
9044 running_off
= offvar
;
9045 alias_off
= build_int_cst (ref_type
, 0);
9046 int nloads
= const_nunits
;
9048 tree ltype
= TREE_TYPE (vectype
);
9049 tree lvectype
= vectype
;
9050 auto_vec
<tree
> dr_chain
;
9051 if (memory_access_type
== VMAT_STRIDED_SLP
)
9053 if (group_size
< const_nunits
)
9055 /* First check if vec_init optab supports construction from vector
9056 elts directly. Otherwise avoid emitting a constructor of
9057 vector elements by performing the loads using an integer type
9058 of the same size, constructing a vector of those and then
9059 re-interpreting it as the original vector type. This avoids a
9060 huge runtime penalty due to the general inability to perform
9061 store forwarding from smaller stores to a larger load. */
9064 = vector_vector_composition_type (vectype
,
9065 const_nunits
/ group_size
,
9067 if (vtype
!= NULL_TREE
)
9069 nloads
= const_nunits
/ group_size
;
9078 lnel
= const_nunits
;
9081 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
9083 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9084 else if (nloads
== 1)
9089 /* For SLP permutation support we need to load the whole group,
9090 not only the number of vector stmts the permutation result
9094 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9096 unsigned int const_vf
= vf
.to_constant ();
9097 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
9098 dr_chain
.create (ncopies
);
9101 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9103 unsigned int group_el
= 0;
9104 unsigned HOST_WIDE_INT
9105 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
9106 for (j
= 0; j
< ncopies
; j
++)
9109 vec_alloc (v
, nloads
);
9110 gimple
*new_stmt
= NULL
;
9111 for (i
= 0; i
< nloads
; i
++)
9113 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
9114 group_el
* elsz
+ cst_offset
);
9115 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
9116 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9117 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
9118 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9120 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9121 gimple_assign_lhs (new_stmt
));
9125 || group_el
== group_size
)
9127 tree newoff
= copy_ssa_name (running_off
);
9128 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
9129 running_off
, stride_step
);
9130 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
9132 running_off
= newoff
;
9138 tree vec_inv
= build_constructor (lvectype
, v
);
9139 new_temp
= vect_init_vector (vinfo
, stmt_info
,
9140 vec_inv
, lvectype
, gsi
);
9141 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9142 if (lvectype
!= vectype
)
9144 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
9146 build1 (VIEW_CONVERT_EXPR
,
9147 vectype
, new_temp
));
9148 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9155 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
9157 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9162 *vec_stmt
= new_stmt
;
9163 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9169 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
9175 if (memory_access_type
== VMAT_GATHER_SCATTER
9176 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
9177 grouped_load
= false;
9181 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9182 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9183 /* For SLP vectorization we directly vectorize a subchain
9184 without permutation. */
9185 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9186 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9187 /* For BB vectorization always use the first stmt to base
9188 the data ref pointer on. */
9190 first_stmt_info_for_drptr
9191 = vect_find_first_scalar_stmt_in_slp (slp_node
);
9193 /* Check if the chain of loads is already vectorized. */
9194 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
9195 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9196 ??? But we can only do so if there is exactly one
9197 as we have no way to get at the rest. Leave the CSE
9199 ??? With the group load eventually participating
9200 in multiple different permutations (having multiple
9201 slp nodes which refer to the same group) the CSE
9202 is even wrong code. See PR56270. */
9205 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9208 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9211 /* VEC_NUM is the number of vect stmts to be created for this group. */
9214 grouped_load
= false;
9215 /* If an SLP permutation is from N elements to N elements,
9216 and if one vector holds a whole number of N, we can load
9217 the inputs to the permutation in the same way as an
9218 unpermuted sequence. In other cases we need to load the
9219 whole group, not only the number of vector stmts the
9220 permutation result fits in. */
9221 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
9223 && (group_size
!= scalar_lanes
9224 || !multiple_p (nunits
, group_size
)))
9226 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9227 variable VF; see vect_transform_slp_perm_load. */
9228 unsigned int const_vf
= vf
.to_constant ();
9229 unsigned int const_nunits
= nunits
.to_constant ();
9230 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
9231 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
9235 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9237 = group_size
- scalar_lanes
;
9241 vec_num
= group_size
;
9243 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9247 first_stmt_info
= stmt_info
;
9248 first_dr_info
= dr_info
;
9249 group_size
= vec_num
= 1;
9251 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
9253 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9256 gcc_assert (alignment_support_scheme
);
9257 vec_loop_masks
*loop_masks
9258 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
9259 ? &LOOP_VINFO_MASKS (loop_vinfo
)
9261 vec_loop_lens
*loop_lens
9262 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
9263 ? &LOOP_VINFO_LENS (loop_vinfo
)
9266 /* Shouldn't go with length-based approach if fully masked. */
9267 gcc_assert (!loop_lens
|| !loop_masks
);
9269 /* Targets with store-lane instructions must not require explicit
9270 realignment. vect_supportable_dr_alignment always returns either
9271 dr_aligned or dr_unaligned_supported for masked operations. */
9272 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
9275 || alignment_support_scheme
== dr_aligned
9276 || alignment_support_scheme
== dr_unaligned_supported
);
9278 /* In case the vectorization factor (VF) is bigger than the number
9279 of elements that we can fit in a vectype (nunits), we have to generate
9280 more than one vector stmt - i.e - we need to "unroll" the
9281 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9282 from one copy of the vector stmt to the next, in the field
9283 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9284 stages to find the correct vector defs to be used when vectorizing
9285 stmts that use the defs of the current stmt. The example below
9286 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9287 need to create 4 vectorized stmts):
9289 before vectorization:
9290 RELATED_STMT VEC_STMT
9294 step 1: vectorize stmt S1:
9295 We first create the vector stmt VS1_0, and, as usual, record a
9296 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9297 Next, we create the vector stmt VS1_1, and record a pointer to
9298 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9299 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9301 RELATED_STMT VEC_STMT
9302 VS1_0: vx0 = memref0 VS1_1 -
9303 VS1_1: vx1 = memref1 VS1_2 -
9304 VS1_2: vx2 = memref2 VS1_3 -
9305 VS1_3: vx3 = memref3 - -
9306 S1: x = load - VS1_0
9310 /* In case of interleaving (non-unit grouped access):
9317 Vectorized loads are created in the order of memory accesses
9318 starting from the access of the first stmt of the chain:
9321 VS2: vx1 = &base + vec_size*1
9322 VS3: vx3 = &base + vec_size*2
9323 VS4: vx4 = &base + vec_size*3
9325 Then permutation statements are generated:
9327 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9328 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9331 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9332 (the order of the data-refs in the output of vect_permute_load_chain
9333 corresponds to the order of scalar stmts in the interleaving chain - see
9334 the documentation of vect_permute_load_chain()).
9335 The generation of permutation stmts and recording them in
9336 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9338 In case of both multiple types and interleaving, the vector loads and
9339 permutation stmts above are created for every copy. The result vector
9340 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9341 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9343 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9344 on a target that supports unaligned accesses (dr_unaligned_supported)
9345 we generate the following code:
9349 p = p + indx * vectype_size;
9354 Otherwise, the data reference is potentially unaligned on a target that
9355 does not support unaligned accesses (dr_explicit_realign_optimized) -
9356 then generate the following code, in which the data in each iteration is
9357 obtained by two vector loads, one from the previous iteration, and one
9358 from the current iteration:
9360 msq_init = *(floor(p1))
9361 p2 = initial_addr + VS - 1;
9362 realignment_token = call target_builtin;
9365 p2 = p2 + indx * vectype_size
9367 vec_dest = realign_load (msq, lsq, realignment_token)
9372 /* If the misalignment remains the same throughout the execution of the
9373 loop, we can create the init_addr and permutation mask at the loop
9374 preheader. Otherwise, it needs to be created inside the loop.
9375 This can only occur when vectorizing memory accesses in the inner-loop
9376 nested within an outer-loop that is being vectorized. */
9378 if (nested_in_vect_loop
9379 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9380 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9382 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9383 compute_in_loop
= true;
9386 bool diff_first_stmt_info
9387 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
9389 tree offset
= NULL_TREE
;
9390 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9391 || alignment_support_scheme
== dr_explicit_realign
)
9392 && !compute_in_loop
)
9394 /* If we have different first_stmt_info, we can't set up realignment
9395 here, since we can't guarantee first_stmt_info DR has been
9396 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9397 distance from first_stmt_info DR instead as below. */
9398 if (!diff_first_stmt_info
)
9399 msq
= vect_setup_realignment (vinfo
,
9400 first_stmt_info
, gsi
, &realignment_token
,
9401 alignment_support_scheme
, NULL_TREE
,
9403 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9405 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9406 offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9408 gcc_assert (!first_stmt_info_for_drptr
);
9414 if (!known_eq (poffset
, 0))
9416 ? size_binop (PLUS_EXPR
, offset
, size_int (poffset
))
9417 : size_int (poffset
));
9420 tree vec_offset
= NULL_TREE
;
9421 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9423 aggr_type
= NULL_TREE
;
9426 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9428 aggr_type
= elem_type
;
9429 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9430 &bump
, &vec_offset
);
9434 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9435 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9437 aggr_type
= vectype
;
9438 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
9439 memory_access_type
);
9442 vec
<tree
> vec_offsets
= vNULL
;
9443 auto_vec
<tree
> vec_masks
;
9447 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[mask_index
],
9450 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
, mask
,
9451 &vec_masks
, mask_vectype
);
9453 tree vec_mask
= NULL_TREE
;
9454 poly_uint64 group_elt
= 0;
9455 for (j
= 0; j
< ncopies
; j
++)
9457 /* 1. Create the vector or array pointer update chain. */
9460 bool simd_lane_access_p
9461 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9462 if (simd_lane_access_p
9463 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9464 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9465 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9466 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9467 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9468 get_alias_set (TREE_TYPE (ref_type
)))
9469 && (alignment_support_scheme
== dr_aligned
9470 || alignment_support_scheme
== dr_unaligned_supported
))
9472 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9473 dataref_offset
= build_int_cst (ref_type
, 0);
9475 else if (diff_first_stmt_info
)
9478 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
9479 aggr_type
, at_loop
, offset
, &dummy
,
9480 gsi
, &ptr_incr
, simd_lane_access_p
,
9482 /* Adjust the pointer by the difference to first_stmt. */
9483 data_reference_p ptrdr
9484 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9486 = fold_convert (sizetype
,
9487 size_binop (MINUS_EXPR
,
9488 DR_INIT (first_dr_info
->dr
),
9490 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9492 if (alignment_support_scheme
== dr_explicit_realign
)
9494 msq
= vect_setup_realignment (vinfo
,
9495 first_stmt_info_for_drptr
, gsi
,
9497 alignment_support_scheme
,
9498 dataref_ptr
, &at_loop
);
9499 gcc_assert (!compute_in_loop
);
9502 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9504 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
9505 slp_node
, &gs_info
, &dataref_ptr
,
9510 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9512 offset
, &dummy
, gsi
, &ptr_incr
,
9513 simd_lane_access_p
, bump
);
9515 vec_mask
= vec_masks
[0];
9520 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9522 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9523 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9526 vec_mask
= vec_masks
[j
];
9529 if (grouped_load
|| slp_perm
)
9530 dr_chain
.create (vec_num
);
9532 gimple
*new_stmt
= NULL
;
9533 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9537 vec_array
= create_vector_array (vectype
, vec_num
);
9539 tree final_mask
= NULL_TREE
;
9541 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9544 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
9545 final_mask
, vec_mask
, gsi
);
9551 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9553 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
9554 tree alias_ptr
= build_int_cst (ref_type
, align
);
9555 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9556 dataref_ptr
, alias_ptr
,
9562 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9563 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9564 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9566 gimple_call_set_lhs (call
, vec_array
);
9567 gimple_call_set_nothrow (call
, true);
9568 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9571 /* Extract each vector into an SSA_NAME. */
9572 for (i
= 0; i
< vec_num
; i
++)
9574 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
9576 dr_chain
.quick_push (new_temp
);
9579 /* Record the mapping between SSA_NAMEs and statements. */
9580 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
9582 /* Record that VEC_ARRAY is now dead. */
9583 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
9587 for (i
= 0; i
< vec_num
; i
++)
9589 tree final_mask
= NULL_TREE
;
9591 && memory_access_type
!= VMAT_INVARIANT
)
9592 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9594 vectype
, vec_num
* j
+ i
);
9596 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
9597 final_mask
, vec_mask
, gsi
);
9599 if (i
> 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9600 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9601 gsi
, stmt_info
, bump
);
9603 /* 2. Create the vector-load in the loop. */
9604 switch (alignment_support_scheme
)
9607 case dr_unaligned_supported
:
9609 unsigned int misalign
;
9610 unsigned HOST_WIDE_INT align
;
9612 if (memory_access_type
== VMAT_GATHER_SCATTER
9613 && gs_info
.ifn
!= IFN_LAST
)
9615 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9616 vec_offset
= vec_offsets
[vec_num
* j
+ i
];
9617 tree zero
= build_zero_cst (vectype
);
9618 tree scale
= size_int (gs_info
.scale
);
9621 call
= gimple_build_call_internal
9622 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
9623 vec_offset
, scale
, zero
, final_mask
);
9625 call
= gimple_build_call_internal
9626 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
9627 vec_offset
, scale
, zero
);
9628 gimple_call_set_nothrow (call
, true);
9630 data_ref
= NULL_TREE
;
9633 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9635 /* Emulated gather-scatter. */
9636 gcc_assert (!final_mask
);
9637 unsigned HOST_WIDE_INT const_nunits
9638 = nunits
.to_constant ();
9639 unsigned HOST_WIDE_INT const_offset_nunits
9640 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
)
9642 vec
<constructor_elt
, va_gc
> *ctor_elts
;
9643 vec_alloc (ctor_elts
, const_nunits
);
9644 gimple_seq stmts
= NULL
;
9645 /* We support offset vectors with more elements
9646 than the data vector for now. */
9647 unsigned HOST_WIDE_INT factor
9648 = const_offset_nunits
/ const_nunits
;
9649 vec_offset
= vec_offsets
[j
/ factor
];
9650 unsigned elt_offset
= (j
% factor
) * const_nunits
;
9651 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
9652 tree scale
= size_int (gs_info
.scale
);
9654 = get_object_alignment (DR_REF (first_dr_info
->dr
));
9655 tree ltype
= build_aligned_type (TREE_TYPE (vectype
),
9657 for (unsigned k
= 0; k
< const_nunits
; ++k
)
9659 tree boff
= size_binop (MULT_EXPR
,
9660 TYPE_SIZE (idx_type
),
9663 tree idx
= gimple_build (&stmts
, BIT_FIELD_REF
,
9664 idx_type
, vec_offset
,
9665 TYPE_SIZE (idx_type
),
9667 idx
= gimple_convert (&stmts
, sizetype
, idx
);
9668 idx
= gimple_build (&stmts
, MULT_EXPR
,
9669 sizetype
, idx
, scale
);
9670 tree ptr
= gimple_build (&stmts
, PLUS_EXPR
,
9671 TREE_TYPE (dataref_ptr
),
9673 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
9674 tree elt
= make_ssa_name (TREE_TYPE (vectype
));
9675 tree ref
= build2 (MEM_REF
, ltype
, ptr
,
9676 build_int_cst (ref_type
, 0));
9677 new_stmt
= gimple_build_assign (elt
, ref
);
9678 gimple_seq_add_stmt (&stmts
, new_stmt
);
9679 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
, elt
);
9681 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
9682 new_stmt
= gimple_build_assign (NULL_TREE
,
9684 (vectype
, ctor_elts
));
9685 data_ref
= NULL_TREE
;
9690 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9691 if (alignment_support_scheme
== dr_aligned
)
9693 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
9695 align
= dr_alignment
9696 (vect_dr_behavior (vinfo
, first_dr_info
));
9700 misalign
= misalignment
;
9701 if (dataref_offset
== NULL_TREE
9702 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9703 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9705 align
= least_bit_hwi (misalign
| align
);
9709 tree ptr
= build_int_cst (ref_type
,
9710 align
* BITS_PER_UNIT
);
9712 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9715 gimple_call_set_nothrow (call
, true);
9717 data_ref
= NULL_TREE
;
9719 else if (loop_lens
&& memory_access_type
!= VMAT_INVARIANT
)
9722 = vect_get_loop_len (loop_vinfo
, loop_lens
,
9725 tree ptr
= build_int_cst (ref_type
,
9726 align
* BITS_PER_UNIT
);
9728 = gimple_build_call_internal (IFN_LEN_LOAD
, 3,
9731 gimple_call_set_nothrow (call
, true);
9733 data_ref
= NULL_TREE
;
9735 /* Need conversion if it's wrapped with VnQI. */
9736 machine_mode vmode
= TYPE_MODE (vectype
);
9737 opt_machine_mode new_ovmode
9738 = get_len_load_store_mode (vmode
, true);
9739 machine_mode new_vmode
= new_ovmode
.require ();
9740 if (vmode
!= new_vmode
)
9742 tree qi_type
= unsigned_intQI_type_node
;
9744 = build_vector_type_for_mode (qi_type
, new_vmode
);
9745 tree var
= vect_get_new_ssa_name (new_vtype
,
9747 gimple_set_lhs (call
, var
);
9748 vect_finish_stmt_generation (vinfo
, stmt_info
, call
,
9750 tree op
= build1 (VIEW_CONVERT_EXPR
, vectype
, var
);
9752 = gimple_build_assign (vec_dest
,
9753 VIEW_CONVERT_EXPR
, op
);
9758 tree ltype
= vectype
;
9759 tree new_vtype
= NULL_TREE
;
9760 unsigned HOST_WIDE_INT gap
9761 = DR_GROUP_GAP (first_stmt_info
);
9762 unsigned int vect_align
9763 = vect_known_alignment_in_bytes (first_dr_info
,
9765 unsigned int scalar_dr_size
9766 = vect_get_scalar_dr_size (first_dr_info
);
9767 /* If there's no peeling for gaps but we have a gap
9768 with slp loads then load the lower half of the
9769 vector only. See get_group_load_store_type for
9770 when we apply this optimization. */
9773 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9775 && known_eq (nunits
, (group_size
- gap
) * 2)
9776 && known_eq (nunits
, group_size
)
9777 && gap
>= (vect_align
/ scalar_dr_size
))
9781 = vector_vector_composition_type (vectype
, 2,
9783 if (new_vtype
!= NULL_TREE
)
9787 = (dataref_offset
? dataref_offset
9788 : build_int_cst (ref_type
, 0));
9789 if (ltype
!= vectype
9790 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9792 unsigned HOST_WIDE_INT gap_offset
9793 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
9794 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
9795 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
9798 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
9799 if (alignment_support_scheme
== dr_aligned
)
9802 TREE_TYPE (data_ref
)
9803 = build_aligned_type (TREE_TYPE (data_ref
),
9804 align
* BITS_PER_UNIT
);
9805 if (ltype
!= vectype
)
9807 vect_copy_ref_info (data_ref
,
9808 DR_REF (first_dr_info
->dr
));
9809 tree tem
= make_ssa_name (ltype
);
9810 new_stmt
= gimple_build_assign (tem
, data_ref
);
9811 vect_finish_stmt_generation (vinfo
, stmt_info
,
9814 vec
<constructor_elt
, va_gc
> *v
;
9816 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9818 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9819 build_zero_cst (ltype
));
9820 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9824 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9825 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9826 build_zero_cst (ltype
));
9828 gcc_assert (new_vtype
!= NULL_TREE
);
9829 if (new_vtype
== vectype
)
9830 new_stmt
= gimple_build_assign (
9831 vec_dest
, build_constructor (vectype
, v
));
9834 tree new_vname
= make_ssa_name (new_vtype
);
9835 new_stmt
= gimple_build_assign (
9836 new_vname
, build_constructor (new_vtype
, v
));
9837 vect_finish_stmt_generation (vinfo
, stmt_info
,
9839 new_stmt
= gimple_build_assign (
9840 vec_dest
, build1 (VIEW_CONVERT_EXPR
, vectype
,
9847 case dr_explicit_realign
:
9851 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9853 if (compute_in_loop
)
9854 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
9856 dr_explicit_realign
,
9859 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9860 ptr
= copy_ssa_name (dataref_ptr
);
9862 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9863 // For explicit realign the target alignment should be
9864 // known at compile time.
9865 unsigned HOST_WIDE_INT align
=
9866 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9867 new_stmt
= gimple_build_assign
9868 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
9870 (TREE_TYPE (dataref_ptr
),
9871 -(HOST_WIDE_INT
) align
));
9872 vect_finish_stmt_generation (vinfo
, stmt_info
,
9875 = build2 (MEM_REF
, vectype
, ptr
,
9876 build_int_cst (ref_type
, 0));
9877 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9878 vec_dest
= vect_create_destination_var (scalar_dest
,
9880 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9881 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9882 gimple_assign_set_lhs (new_stmt
, new_temp
);
9883 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
9884 vect_finish_stmt_generation (vinfo
, stmt_info
,
9888 bump
= size_binop (MULT_EXPR
, vs
,
9889 TYPE_SIZE_UNIT (elem_type
));
9890 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
9891 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
,
9893 new_stmt
= gimple_build_assign
9894 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
9896 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
9897 ptr
= copy_ssa_name (ptr
, new_stmt
);
9898 gimple_assign_set_lhs (new_stmt
, ptr
);
9899 vect_finish_stmt_generation (vinfo
, stmt_info
,
9902 = build2 (MEM_REF
, vectype
, ptr
,
9903 build_int_cst (ref_type
, 0));
9906 case dr_explicit_realign_optimized
:
9908 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9909 new_temp
= copy_ssa_name (dataref_ptr
);
9911 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9912 // We should only be doing this if we know the target
9913 // alignment at compile time.
9914 unsigned HOST_WIDE_INT align
=
9915 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9916 new_stmt
= gimple_build_assign
9917 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
9918 build_int_cst (TREE_TYPE (dataref_ptr
),
9919 -(HOST_WIDE_INT
) align
));
9920 vect_finish_stmt_generation (vinfo
, stmt_info
,
9923 = build2 (MEM_REF
, vectype
, new_temp
,
9924 build_int_cst (ref_type
, 0));
9930 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9931 /* DATA_REF is null if we've already built the statement. */
9934 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9935 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9937 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9938 gimple_set_lhs (new_stmt
, new_temp
);
9939 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9941 /* 3. Handle explicit realignment if necessary/supported.
9943 vec_dest = realign_load (msq, lsq, realignment_token) */
9944 if (alignment_support_scheme
== dr_explicit_realign_optimized
9945 || alignment_support_scheme
== dr_explicit_realign
)
9947 lsq
= gimple_assign_lhs (new_stmt
);
9948 if (!realignment_token
)
9949 realignment_token
= dataref_ptr
;
9950 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9951 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
9952 msq
, lsq
, realignment_token
);
9953 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9954 gimple_assign_set_lhs (new_stmt
, new_temp
);
9955 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9957 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9960 if (i
== vec_num
- 1 && j
== ncopies
- 1)
9961 add_phi_arg (phi
, lsq
,
9962 loop_latch_edge (containing_loop
),
9968 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9970 tree perm_mask
= perm_mask_for_reverse (vectype
);
9971 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
9972 perm_mask
, stmt_info
, gsi
);
9973 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9976 /* Collect vector loads and later create their permutation in
9977 vect_transform_grouped_load (). */
9978 if (grouped_load
|| slp_perm
)
9979 dr_chain
.quick_push (new_temp
);
9981 /* Store vector loads in the corresponding SLP_NODE. */
9982 if (slp
&& !slp_perm
)
9983 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9985 /* With SLP permutation we load the gaps as well, without
9986 we need to skip the gaps after we manage to fully load
9987 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9988 group_elt
+= nunits
;
9989 if (maybe_ne (group_gap_adj
, 0U)
9991 && known_eq (group_elt
, group_size
- group_gap_adj
))
9993 poly_wide_int bump_val
9994 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9996 if (tree_int_cst_sgn
9997 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
9998 bump_val
= -bump_val
;
9999 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
10000 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10001 gsi
, stmt_info
, bump
);
10005 /* Bump the vector pointer to account for a gap or for excess
10006 elements loaded for a permuted SLP load. */
10007 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
10009 poly_wide_int bump_val
10010 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
10012 if (tree_int_cst_sgn
10013 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
10014 bump_val
= -bump_val
;
10015 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
10016 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
10021 if (slp
&& !slp_perm
)
10027 /* For SLP we know we've seen all possible uses of dr_chain so
10028 direct vect_transform_slp_perm_load to DCE the unused parts.
10029 ??? This is a hack to prevent compile-time issues as seen
10030 in PR101120 and friends. */
10031 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
10032 gsi
, vf
, false, &n_perms
,
10040 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
10041 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
10043 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10047 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10050 dr_chain
.release ();
10053 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10058 /* Function vect_is_simple_cond.
10061 LOOP - the loop that is being vectorized.
10062 COND - Condition that is checked for simple use.
10065 *COMP_VECTYPE - the vector type for the comparison.
10066 *DTS - The def types for the arguments of the comparison
10068 Returns whether a COND can be vectorized. Checks whether
10069 condition operands are supportable using vec_is_simple_use. */
10072 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
10073 slp_tree slp_node
, tree
*comp_vectype
,
10074 enum vect_def_type
*dts
, tree vectype
)
10077 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10081 if (TREE_CODE (cond
) == SSA_NAME
10082 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
10084 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
10085 &slp_op
, &dts
[0], comp_vectype
)
10087 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
10092 if (!COMPARISON_CLASS_P (cond
))
10095 lhs
= TREE_OPERAND (cond
, 0);
10096 rhs
= TREE_OPERAND (cond
, 1);
10098 if (TREE_CODE (lhs
) == SSA_NAME
)
10100 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
10101 &lhs
, &slp_op
, &dts
[0], &vectype1
))
10104 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
10105 || TREE_CODE (lhs
) == FIXED_CST
)
10106 dts
[0] = vect_constant_def
;
10110 if (TREE_CODE (rhs
) == SSA_NAME
)
10112 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
10113 &rhs
, &slp_op
, &dts
[1], &vectype2
))
10116 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
10117 || TREE_CODE (rhs
) == FIXED_CST
)
10118 dts
[1] = vect_constant_def
;
10122 if (vectype1
&& vectype2
10123 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10124 TYPE_VECTOR_SUBPARTS (vectype2
)))
10127 *comp_vectype
= vectype1
? vectype1
: vectype2
;
10128 /* Invariant comparison. */
10129 if (! *comp_vectype
)
10131 tree scalar_type
= TREE_TYPE (lhs
);
10132 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10133 *comp_vectype
= truth_type_for (vectype
);
10136 /* If we can widen the comparison to match vectype do so. */
10137 if (INTEGRAL_TYPE_P (scalar_type
)
10139 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
10140 TYPE_SIZE (TREE_TYPE (vectype
))))
10141 scalar_type
= build_nonstandard_integer_type
10142 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
10143 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
10151 /* vectorizable_condition.
10153 Check if STMT_INFO is conditional modify expression that can be vectorized.
10154 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10155 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10158 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10160 Return true if STMT_INFO is vectorizable in this way. */
10163 vectorizable_condition (vec_info
*vinfo
,
10164 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10166 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10168 tree scalar_dest
= NULL_TREE
;
10169 tree vec_dest
= NULL_TREE
;
10170 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
10171 tree then_clause
, else_clause
;
10172 tree comp_vectype
= NULL_TREE
;
10173 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
10174 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
10177 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10178 enum vect_def_type dts
[4]
10179 = {vect_unknown_def_type
, vect_unknown_def_type
,
10180 vect_unknown_def_type
, vect_unknown_def_type
};
10184 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10186 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10187 vec
<tree
> vec_oprnds0
= vNULL
;
10188 vec
<tree
> vec_oprnds1
= vNULL
;
10189 vec
<tree
> vec_oprnds2
= vNULL
;
10190 vec
<tree
> vec_oprnds3
= vNULL
;
10192 bool masked
= false;
10194 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10197 /* Is vectorizable conditional operation? */
10198 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10202 code
= gimple_assign_rhs_code (stmt
);
10203 if (code
!= COND_EXPR
)
10206 stmt_vec_info reduc_info
= NULL
;
10207 int reduc_index
= -1;
10208 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
10210 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
10213 if (STMT_SLP_TYPE (stmt_info
))
10215 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
10216 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
10217 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
10218 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
10219 || reduc_index
!= -1);
10223 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10227 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10228 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10233 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10237 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10241 gcc_assert (ncopies
>= 1);
10242 if (for_reduction
&& ncopies
> 1)
10243 return false; /* FORNOW */
10245 cond_expr
= gimple_assign_rhs1 (stmt
);
10247 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
10248 &comp_vectype
, &dts
[0], vectype
)
10252 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
10253 slp_tree then_slp_node
, else_slp_node
;
10254 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
10255 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
10257 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
10258 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
10261 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
10264 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
10267 masked
= !COMPARISON_CLASS_P (cond_expr
);
10268 vec_cmp_type
= truth_type_for (comp_vectype
);
10270 if (vec_cmp_type
== NULL_TREE
)
10273 cond_code
= TREE_CODE (cond_expr
);
10276 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
10277 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
10280 /* For conditional reductions, the "then" value needs to be the candidate
10281 value calculated by this iteration while the "else" value needs to be
10282 the result carried over from previous iterations. If the COND_EXPR
10283 is the other way around, we need to swap it. */
10284 bool must_invert_cmp_result
= false;
10285 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
10288 must_invert_cmp_result
= true;
10291 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
10292 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
10293 if (new_code
== ERROR_MARK
)
10294 must_invert_cmp_result
= true;
10297 cond_code
= new_code
;
10298 /* Make sure we don't accidentally use the old condition. */
10299 cond_expr
= NULL_TREE
;
10302 std::swap (then_clause
, else_clause
);
10305 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
10307 /* Boolean values may have another representation in vectors
10308 and therefore we prefer bit operations over comparison for
10309 them (which also works for scalar masks). We store opcodes
10310 to use in bitop1 and bitop2. Statement is vectorized as
10311 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10312 depending on bitop1 and bitop2 arity. */
10316 bitop1
= BIT_NOT_EXPR
;
10317 bitop2
= BIT_AND_EXPR
;
10320 bitop1
= BIT_NOT_EXPR
;
10321 bitop2
= BIT_IOR_EXPR
;
10324 bitop1
= BIT_NOT_EXPR
;
10325 bitop2
= BIT_AND_EXPR
;
10326 std::swap (cond_expr0
, cond_expr1
);
10329 bitop1
= BIT_NOT_EXPR
;
10330 bitop2
= BIT_IOR_EXPR
;
10331 std::swap (cond_expr0
, cond_expr1
);
10334 bitop1
= BIT_XOR_EXPR
;
10337 bitop1
= BIT_XOR_EXPR
;
10338 bitop2
= BIT_NOT_EXPR
;
10343 cond_code
= SSA_NAME
;
10346 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
10347 && reduction_type
== EXTRACT_LAST_REDUCTION
10348 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
10350 if (dump_enabled_p ())
10351 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10352 "reduction comparison operation not supported.\n");
10358 if (bitop1
!= NOP_EXPR
)
10360 machine_mode mode
= TYPE_MODE (comp_vectype
);
10363 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
10364 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10367 if (bitop2
!= NOP_EXPR
)
10369 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
10371 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10376 vect_cost_for_stmt kind
= vector_stmt
;
10377 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10378 /* Count one reduction-like operation per vector. */
10379 kind
= vec_to_scalar
;
10380 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
))
10384 && (!vect_maybe_update_slp_op_vectype
10385 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
10387 && !vect_maybe_update_slp_op_vectype
10388 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
10389 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
10390 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
10392 if (dump_enabled_p ())
10393 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10394 "incompatible vector types for invariants\n");
10398 if (loop_vinfo
&& for_reduction
10399 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
10401 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10402 vect_record_loop_mask (loop_vinfo
, &LOOP_VINFO_MASKS (loop_vinfo
),
10403 ncopies
* vec_num
, vectype
, NULL
);
10404 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10405 else if (STMT_VINFO_DEF_TYPE (reduc_info
) != vect_nested_cycle
)
10407 if (dump_enabled_p ())
10408 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10409 "conditional reduction prevents the use"
10410 " of partial vectors.\n");
10411 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
10415 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10416 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10424 scalar_dest
= gimple_assign_lhs (stmt
);
10425 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10426 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10428 bool swap_cond_operands
= false;
10430 /* See whether another part of the vectorized code applies a loop
10431 mask to the condition, or to its inverse. */
10433 vec_loop_masks
*masks
= NULL
;
10434 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10436 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10437 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10440 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10441 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10442 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10445 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10446 tree_code orig_code
= cond
.code
;
10447 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10448 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10450 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10451 cond_code
= cond
.code
;
10452 swap_cond_operands
= true;
10456 /* Try the inverse of the current mask. We check if the
10457 inverse mask is live and if so we generate a negate of
10458 the current mask such that we still honor NaNs. */
10459 cond
.inverted_p
= true;
10460 cond
.code
= orig_code
;
10461 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10463 bitop1
= orig_code
;
10464 bitop2
= BIT_NOT_EXPR
;
10465 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10466 cond_code
= cond
.code
;
10467 swap_cond_operands
= true;
10474 /* Handle cond expr. */
10476 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10477 cond_expr
, &vec_oprnds0
, comp_vectype
,
10478 then_clause
, &vec_oprnds2
, vectype
,
10479 reduction_type
!= EXTRACT_LAST_REDUCTION
10480 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10482 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10483 cond_expr0
, &vec_oprnds0
, comp_vectype
,
10484 cond_expr1
, &vec_oprnds1
, comp_vectype
,
10485 then_clause
, &vec_oprnds2
, vectype
,
10486 reduction_type
!= EXTRACT_LAST_REDUCTION
10487 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10489 /* Arguments are ready. Create the new vector stmt. */
10490 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10492 vec_then_clause
= vec_oprnds2
[i
];
10493 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10494 vec_else_clause
= vec_oprnds3
[i
];
10496 if (swap_cond_operands
)
10497 std::swap (vec_then_clause
, vec_else_clause
);
10500 vec_compare
= vec_cond_lhs
;
10503 vec_cond_rhs
= vec_oprnds1
[i
];
10504 if (bitop1
== NOP_EXPR
)
10506 gimple_seq stmts
= NULL
;
10507 vec_compare
= gimple_build (&stmts
, cond_code
, vec_cmp_type
,
10508 vec_cond_lhs
, vec_cond_rhs
);
10509 gsi_insert_before (gsi
, stmts
, GSI_SAME_STMT
);
10513 new_temp
= make_ssa_name (vec_cmp_type
);
10515 if (bitop1
== BIT_NOT_EXPR
)
10516 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10520 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10522 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10523 if (bitop2
== NOP_EXPR
)
10524 vec_compare
= new_temp
;
10525 else if (bitop2
== BIT_NOT_EXPR
)
10527 /* Instead of doing ~x ? y : z do x ? z : y. */
10528 vec_compare
= new_temp
;
10529 std::swap (vec_then_clause
, vec_else_clause
);
10533 vec_compare
= make_ssa_name (vec_cmp_type
);
10535 = gimple_build_assign (vec_compare
, bitop2
,
10536 vec_cond_lhs
, new_temp
);
10537 vect_finish_stmt_generation (vinfo
, stmt_info
,
10543 /* If we decided to apply a loop mask to the result of the vector
10544 comparison, AND the comparison with the mask now. Later passes
10545 should then be able to reuse the AND results between mulitple
10549 for (int i = 0; i < 100; ++i)
10550 x[i] = y[i] ? z[i] : 10;
10552 results in following optimized GIMPLE:
10554 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10555 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10556 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10557 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10558 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10559 vect_iftmp.11_47, { 10, ... }>;
10561 instead of using a masked and unmasked forms of
10562 vec != { 0, ... } (masked in the MASK_LOAD,
10563 unmasked in the VEC_COND_EXPR). */
10565 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10566 in cases where that's necessary. */
10568 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
10570 if (!is_gimple_val (vec_compare
))
10572 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10573 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10575 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10576 vec_compare
= vec_compare_name
;
10579 if (must_invert_cmp_result
)
10581 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10582 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10585 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10586 vec_compare
= vec_compare_name
;
10592 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10594 tree tmp2
= make_ssa_name (vec_cmp_type
);
10596 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10598 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
10599 vec_compare
= tmp2
;
10604 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10606 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
10607 tree lhs
= gimple_get_lhs (old_stmt
);
10608 new_stmt
= gimple_build_call_internal
10609 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10611 gimple_call_set_lhs (new_stmt
, lhs
);
10612 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
10613 if (old_stmt
== gsi_stmt (*gsi
))
10614 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
10617 /* In this case we're moving the definition to later in the
10618 block. That doesn't matter because the only uses of the
10619 lhs are in phi statements. */
10620 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
10621 gsi_remove (&old_gsi
, true);
10622 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10627 new_temp
= make_ssa_name (vec_dest
);
10628 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10629 vec_then_clause
, vec_else_clause
);
10630 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10633 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10635 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10639 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10641 vec_oprnds0
.release ();
10642 vec_oprnds1
.release ();
10643 vec_oprnds2
.release ();
10644 vec_oprnds3
.release ();
10649 /* vectorizable_comparison.
10651 Check if STMT_INFO is comparison expression that can be vectorized.
10652 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10653 comparison, put it in VEC_STMT, and insert it at GSI.
10655 Return true if STMT_INFO is vectorizable in this way. */
10658 vectorizable_comparison (vec_info
*vinfo
,
10659 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10661 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10663 tree lhs
, rhs1
, rhs2
;
10664 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10665 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10666 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10668 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10669 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10671 poly_uint64 nunits
;
10673 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10675 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10676 vec
<tree
> vec_oprnds0
= vNULL
;
10677 vec
<tree
> vec_oprnds1
= vNULL
;
10681 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10684 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10687 mask_type
= vectype
;
10688 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10693 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10695 gcc_assert (ncopies
>= 1);
10696 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10699 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10703 code
= gimple_assign_rhs_code (stmt
);
10705 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10708 slp_tree slp_rhs1
, slp_rhs2
;
10709 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10710 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
10713 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10714 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
10717 if (vectype1
&& vectype2
10718 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10719 TYPE_VECTOR_SUBPARTS (vectype2
)))
10722 vectype
= vectype1
? vectype1
: vectype2
;
10724 /* Invariant comparison. */
10727 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10728 vectype
= mask_type
;
10730 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
10732 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10735 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10738 /* Can't compare mask and non-mask types. */
10739 if (vectype1
&& vectype2
10740 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10743 /* Boolean values may have another representation in vectors
10744 and therefore we prefer bit operations over comparison for
10745 them (which also works for scalar masks). We store opcodes
10746 to use in bitop1 and bitop2. Statement is vectorized as
10747 BITOP2 (rhs1 BITOP1 rhs2) or
10748 rhs1 BITOP2 (BITOP1 rhs2)
10749 depending on bitop1 and bitop2 arity. */
10750 bool swap_p
= false;
10751 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10753 if (code
== GT_EXPR
)
10755 bitop1
= BIT_NOT_EXPR
;
10756 bitop2
= BIT_AND_EXPR
;
10758 else if (code
== GE_EXPR
)
10760 bitop1
= BIT_NOT_EXPR
;
10761 bitop2
= BIT_IOR_EXPR
;
10763 else if (code
== LT_EXPR
)
10765 bitop1
= BIT_NOT_EXPR
;
10766 bitop2
= BIT_AND_EXPR
;
10769 else if (code
== LE_EXPR
)
10771 bitop1
= BIT_NOT_EXPR
;
10772 bitop2
= BIT_IOR_EXPR
;
10777 bitop1
= BIT_XOR_EXPR
;
10778 if (code
== EQ_EXPR
)
10779 bitop2
= BIT_NOT_EXPR
;
10785 if (bitop1
== NOP_EXPR
)
10787 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10792 machine_mode mode
= TYPE_MODE (vectype
);
10795 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10796 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10799 if (bitop2
!= NOP_EXPR
)
10801 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10802 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10807 /* Put types on constant and invariant SLP children. */
10809 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
10810 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
10812 if (dump_enabled_p ())
10813 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10814 "incompatible vector types for invariants\n");
10818 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10819 vect_model_simple_cost (vinfo
, stmt_info
,
10820 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10821 dts
, ndts
, slp_node
, cost_vec
);
10828 lhs
= gimple_assign_lhs (stmt
);
10829 mask
= vect_create_destination_var (lhs
, mask_type
);
10831 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10832 rhs1
, &vec_oprnds0
, vectype
,
10833 rhs2
, &vec_oprnds1
, vectype
);
10835 std::swap (vec_oprnds0
, vec_oprnds1
);
10837 /* Arguments are ready. Create the new vector stmt. */
10838 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10841 vec_rhs2
= vec_oprnds1
[i
];
10843 new_temp
= make_ssa_name (mask
);
10844 if (bitop1
== NOP_EXPR
)
10846 new_stmt
= gimple_build_assign (new_temp
, code
,
10847 vec_rhs1
, vec_rhs2
);
10848 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10852 if (bitop1
== BIT_NOT_EXPR
)
10853 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10855 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10857 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10858 if (bitop2
!= NOP_EXPR
)
10860 tree res
= make_ssa_name (mask
);
10861 if (bitop2
== BIT_NOT_EXPR
)
10862 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10864 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
10866 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10870 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10872 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10876 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10878 vec_oprnds0
.release ();
10879 vec_oprnds1
.release ();
10884 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10885 can handle all live statements in the node. Otherwise return true
10886 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10887 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10890 can_vectorize_live_stmts (vec_info
*vinfo
,
10891 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10892 slp_tree slp_node
, slp_instance slp_node_instance
,
10894 stmt_vector_for_cost
*cost_vec
)
10898 stmt_vec_info slp_stmt_info
;
10900 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
10902 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
10903 && !vectorizable_live_operation (vinfo
,
10904 slp_stmt_info
, gsi
, slp_node
,
10905 slp_node_instance
, i
,
10906 vec_stmt_p
, cost_vec
))
10910 else if (STMT_VINFO_LIVE_P (stmt_info
)
10911 && !vectorizable_live_operation (vinfo
, stmt_info
, gsi
,
10912 slp_node
, slp_node_instance
, -1,
10913 vec_stmt_p
, cost_vec
))
10919 /* Make sure the statement is vectorizable. */
10922 vect_analyze_stmt (vec_info
*vinfo
,
10923 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
10924 slp_tree node
, slp_instance node_instance
,
10925 stmt_vector_for_cost
*cost_vec
)
10927 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10928 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
10930 gimple_seq pattern_def_seq
;
10932 if (dump_enabled_p ())
10933 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
10936 if (gimple_has_volatile_ops (stmt_info
->stmt
))
10937 return opt_result::failure_at (stmt_info
->stmt
,
10939 " stmt has volatile operands: %G\n",
10942 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10944 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
10946 gimple_stmt_iterator si
;
10948 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
10950 stmt_vec_info pattern_def_stmt_info
10951 = vinfo
->lookup_stmt (gsi_stmt (si
));
10952 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
10953 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
10955 /* Analyze def stmt of STMT if it's a pattern stmt. */
10956 if (dump_enabled_p ())
10957 dump_printf_loc (MSG_NOTE
, vect_location
,
10958 "==> examining pattern def statement: %G",
10959 pattern_def_stmt_info
->stmt
);
10962 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
10963 need_to_vectorize
, node
, node_instance
,
10971 /* Skip stmts that do not need to be vectorized. In loops this is expected
10973 - the COND_EXPR which is the loop exit condition
10974 - any LABEL_EXPRs in the loop
10975 - computations that are used only for array indexing or loop control.
10976 In basic blocks we only analyze statements that are a part of some SLP
10977 instance, therefore, all the statements are relevant.
10979 Pattern statement needs to be analyzed instead of the original statement
10980 if the original statement is not relevant. Otherwise, we analyze both
10981 statements. In basic blocks we are called from some SLP instance
10982 traversal, don't analyze pattern stmts instead, the pattern stmts
10983 already will be part of SLP instance. */
10985 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
10986 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
10987 && !STMT_VINFO_LIVE_P (stmt_info
))
10989 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10990 && pattern_stmt_info
10991 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10992 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10994 /* Analyze PATTERN_STMT instead of the original stmt. */
10995 stmt_info
= pattern_stmt_info
;
10996 if (dump_enabled_p ())
10997 dump_printf_loc (MSG_NOTE
, vect_location
,
10998 "==> examining pattern statement: %G",
11003 if (dump_enabled_p ())
11004 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
11006 return opt_result::success ();
11009 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
11011 && pattern_stmt_info
11012 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
11013 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
11015 /* Analyze PATTERN_STMT too. */
11016 if (dump_enabled_p ())
11017 dump_printf_loc (MSG_NOTE
, vect_location
,
11018 "==> examining pattern statement: %G",
11019 pattern_stmt_info
->stmt
);
11022 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
11023 node_instance
, cost_vec
);
11028 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
11030 case vect_internal_def
:
11033 case vect_reduction_def
:
11034 case vect_nested_cycle
:
11035 gcc_assert (!bb_vinfo
11036 && (relevance
== vect_used_in_outer
11037 || relevance
== vect_used_in_outer_by_reduction
11038 || relevance
== vect_used_by_reduction
11039 || relevance
== vect_unused_in_scope
11040 || relevance
== vect_used_only_live
));
11043 case vect_induction_def
:
11044 gcc_assert (!bb_vinfo
);
11047 case vect_constant_def
:
11048 case vect_external_def
:
11049 case vect_unknown_def_type
:
11051 gcc_unreachable ();
11054 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11056 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (node
);
11058 if (STMT_VINFO_RELEVANT_P (stmt_info
))
11060 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
11061 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
11062 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
11063 *need_to_vectorize
= true;
11066 if (PURE_SLP_STMT (stmt_info
) && !node
)
11068 if (dump_enabled_p ())
11069 dump_printf_loc (MSG_NOTE
, vect_location
,
11070 "handled only by SLP analysis\n");
11071 return opt_result::success ();
11076 && (STMT_VINFO_RELEVANT_P (stmt_info
)
11077 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
11078 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11079 -mveclibabi= takes preference over library functions with
11080 the simd attribute. */
11081 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11082 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
11084 || vectorizable_conversion (vinfo
, stmt_info
,
11085 NULL
, NULL
, node
, cost_vec
)
11086 || vectorizable_operation (vinfo
, stmt_info
,
11087 NULL
, NULL
, node
, cost_vec
)
11088 || vectorizable_assignment (vinfo
, stmt_info
,
11089 NULL
, NULL
, node
, cost_vec
)
11090 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11091 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11092 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11093 node
, node_instance
, cost_vec
)
11094 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11095 NULL
, node
, cost_vec
)
11096 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11097 || vectorizable_condition (vinfo
, stmt_info
,
11098 NULL
, NULL
, node
, cost_vec
)
11099 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11101 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11102 stmt_info
, NULL
, node
));
11106 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11107 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
11108 NULL
, NULL
, node
, cost_vec
)
11109 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
11111 || vectorizable_shift (vinfo
, stmt_info
,
11112 NULL
, NULL
, node
, cost_vec
)
11113 || vectorizable_operation (vinfo
, stmt_info
,
11114 NULL
, NULL
, node
, cost_vec
)
11115 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
11117 || vectorizable_load (vinfo
, stmt_info
,
11118 NULL
, NULL
, node
, cost_vec
)
11119 || vectorizable_store (vinfo
, stmt_info
,
11120 NULL
, NULL
, node
, cost_vec
)
11121 || vectorizable_condition (vinfo
, stmt_info
,
11122 NULL
, NULL
, node
, cost_vec
)
11123 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11125 || vectorizable_phi (vinfo
, stmt_info
, NULL
, node
, cost_vec
));
11129 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11132 return opt_result::failure_at (stmt_info
->stmt
,
11134 " relevant stmt not supported: %G",
11137 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11138 need extra handling, except for vectorizable reductions. */
11140 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11141 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
11142 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
11143 stmt_info
, NULL
, node
, node_instance
,
11145 return opt_result::failure_at (stmt_info
->stmt
,
11147 " live stmt not supported: %G",
11150 return opt_result::success ();
11154 /* Function vect_transform_stmt.
11156 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11159 vect_transform_stmt (vec_info
*vinfo
,
11160 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11161 slp_tree slp_node
, slp_instance slp_node_instance
)
11163 bool is_store
= false;
11164 gimple
*vec_stmt
= NULL
;
11167 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
11169 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11171 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (slp_node
);
11173 switch (STMT_VINFO_TYPE (stmt_info
))
11175 case type_demotion_vec_info_type
:
11176 case type_promotion_vec_info_type
:
11177 case type_conversion_vec_info_type
:
11178 done
= vectorizable_conversion (vinfo
, stmt_info
,
11179 gsi
, &vec_stmt
, slp_node
, NULL
);
11183 case induc_vec_info_type
:
11184 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
11185 stmt_info
, &vec_stmt
, slp_node
,
11190 case shift_vec_info_type
:
11191 done
= vectorizable_shift (vinfo
, stmt_info
,
11192 gsi
, &vec_stmt
, slp_node
, NULL
);
11196 case op_vec_info_type
:
11197 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11202 case assignment_vec_info_type
:
11203 done
= vectorizable_assignment (vinfo
, stmt_info
,
11204 gsi
, &vec_stmt
, slp_node
, NULL
);
11208 case load_vec_info_type
:
11209 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11214 case store_vec_info_type
:
11215 done
= vectorizable_store (vinfo
, stmt_info
,
11216 gsi
, &vec_stmt
, slp_node
, NULL
);
11218 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
11220 /* In case of interleaving, the whole chain is vectorized when the
11221 last store in the chain is reached. Store stmts before the last
11222 one are skipped, and there vec_stmt_info shouldn't be freed
11224 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
11225 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
11232 case condition_vec_info_type
:
11233 done
= vectorizable_condition (vinfo
, stmt_info
,
11234 gsi
, &vec_stmt
, slp_node
, NULL
);
11238 case comparison_vec_info_type
:
11239 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11244 case call_vec_info_type
:
11245 done
= vectorizable_call (vinfo
, stmt_info
,
11246 gsi
, &vec_stmt
, slp_node
, NULL
);
11249 case call_simd_clone_vec_info_type
:
11250 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11254 case reduc_vec_info_type
:
11255 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11256 gsi
, &vec_stmt
, slp_node
);
11260 case cycle_phi_info_type
:
11261 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11262 &vec_stmt
, slp_node
, slp_node_instance
);
11266 case lc_phi_info_type
:
11267 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11268 stmt_info
, &vec_stmt
, slp_node
);
11272 case phi_info_type
:
11273 done
= vectorizable_phi (vinfo
, stmt_info
, &vec_stmt
, slp_node
, NULL
);
11278 if (!STMT_VINFO_LIVE_P (stmt_info
))
11280 if (dump_enabled_p ())
11281 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11282 "stmt not supported.\n");
11283 gcc_unreachable ();
11288 if (!slp_node
&& vec_stmt
)
11289 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
11291 if (STMT_VINFO_TYPE (stmt_info
) != store_vec_info_type
)
11293 /* Handle stmts whose DEF is used outside the loop-nest that is
11294 being vectorized. */
11295 done
= can_vectorize_live_stmts (vinfo
, stmt_info
, gsi
, slp_node
,
11296 slp_node_instance
, true, NULL
);
11301 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11307 /* Remove a group of stores (for SLP or interleaving), free their
11311 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
11313 stmt_vec_info next_stmt_info
= first_stmt_info
;
11315 while (next_stmt_info
)
11317 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
11318 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
11319 /* Free the attached stmt_vec_info and remove the stmt. */
11320 vinfo
->remove_stmt (next_stmt_info
);
11321 next_stmt_info
= tmp
;
11325 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11326 elements of type SCALAR_TYPE, or null if the target doesn't support
11329 If NUNITS is zero, return a vector type that contains elements of
11330 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11332 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11333 for this vectorization region and want to "autodetect" the best choice.
11334 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11335 and we want the new type to be interoperable with it. PREVAILING_MODE
11336 in this case can be a scalar integer mode or a vector mode; when it
11337 is a vector mode, the function acts like a tree-level version of
11338 related_vector_mode. */
11341 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
11342 tree scalar_type
, poly_uint64 nunits
)
11344 tree orig_scalar_type
= scalar_type
;
11345 scalar_mode inner_mode
;
11346 machine_mode simd_mode
;
11349 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
11350 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
11353 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
11355 /* For vector types of elements whose mode precision doesn't
11356 match their types precision we use a element type of mode
11357 precision. The vectorization routines will have to make sure
11358 they support the proper result truncation/extension.
11359 We also make sure to build vector types with INTEGER_TYPE
11360 component type only. */
11361 if (INTEGRAL_TYPE_P (scalar_type
)
11362 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
11363 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
11364 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
11365 TYPE_UNSIGNED (scalar_type
));
11367 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11368 When the component mode passes the above test simply use a type
11369 corresponding to that mode. The theory is that any use that
11370 would cause problems with this will disable vectorization anyway. */
11371 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
11372 && !INTEGRAL_TYPE_P (scalar_type
))
11373 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
11375 /* We can't build a vector type of elements with alignment bigger than
11377 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
11378 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
11379 TYPE_UNSIGNED (scalar_type
));
11381 /* If we felt back to using the mode fail if there was
11382 no scalar type for it. */
11383 if (scalar_type
== NULL_TREE
)
11386 /* If no prevailing mode was supplied, use the mode the target prefers.
11387 Otherwise lookup a vector mode based on the prevailing mode. */
11388 if (prevailing_mode
== VOIDmode
)
11390 gcc_assert (known_eq (nunits
, 0U));
11391 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
11392 if (SCALAR_INT_MODE_P (simd_mode
))
11394 /* Traditional behavior is not to take the integer mode
11395 literally, but simply to use it as a way of determining
11396 the vector size. It is up to mode_for_vector to decide
11397 what the TYPE_MODE should be.
11399 Note that nunits == 1 is allowed in order to support single
11400 element vector types. */
11401 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
11402 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11406 else if (SCALAR_INT_MODE_P (prevailing_mode
)
11407 || !related_vector_mode (prevailing_mode
,
11408 inner_mode
, nunits
).exists (&simd_mode
))
11410 /* Fall back to using mode_for_vector, mostly in the hope of being
11411 able to use an integer mode. */
11412 if (known_eq (nunits
, 0U)
11413 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
11416 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11420 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
11422 /* In cases where the mode was chosen by mode_for_vector, check that
11423 the target actually supports the chosen mode, or that it at least
11424 allows the vector mode to be replaced by a like-sized integer. */
11425 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11426 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11429 /* Re-attach the address-space qualifier if we canonicalized the scalar
11431 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11432 return build_qualified_type
11433 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11438 /* Function get_vectype_for_scalar_type.
11440 Returns the vector type corresponding to SCALAR_TYPE as supported
11441 by the target. If GROUP_SIZE is nonzero and we're performing BB
11442 vectorization, make sure that the number of elements in the vector
11443 is no bigger than GROUP_SIZE. */
11446 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11447 unsigned int group_size
)
11449 /* For BB vectorization, we should always have a group size once we've
11450 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11451 are tentative requests during things like early data reference
11452 analysis and pattern recognition. */
11453 if (is_a
<bb_vec_info
> (vinfo
))
11454 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11458 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11460 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
11461 vinfo
->vector_mode
= TYPE_MODE (vectype
);
11463 /* Register the natural choice of vector type, before the group size
11464 has been applied. */
11466 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
11468 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11469 try again with an explicit number of elements. */
11472 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
11474 /* Start with the biggest number of units that fits within
11475 GROUP_SIZE and halve it until we find a valid vector type.
11476 Usually either the first attempt will succeed or all will
11477 fail (in the latter case because GROUP_SIZE is too small
11478 for the target), but it's possible that a target could have
11479 a hole between supported vector types.
11481 If GROUP_SIZE is not a power of 2, this has the effect of
11482 trying the largest power of 2 that fits within the group,
11483 even though the group is not a multiple of that vector size.
11484 The BB vectorizer will then try to carve up the group into
11486 unsigned int nunits
= 1 << floor_log2 (group_size
);
11489 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11490 scalar_type
, nunits
);
11493 while (nunits
> 1 && !vectype
);
11499 /* Return the vector type corresponding to SCALAR_TYPE as supported
11500 by the target. NODE, if nonnull, is the SLP tree node that will
11501 use the returned vector type. */
11504 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
11506 unsigned int group_size
= 0;
11508 group_size
= SLP_TREE_LANES (node
);
11509 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11512 /* Function get_mask_type_for_scalar_type.
11514 Returns the mask type corresponding to a result of comparison
11515 of vectors of specified SCALAR_TYPE as supported by target.
11516 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11517 make sure that the number of elements in the vector is no bigger
11518 than GROUP_SIZE. */
11521 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11522 unsigned int group_size
)
11524 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11529 return truth_type_for (vectype
);
11532 /* Function get_same_sized_vectype
11534 Returns a vector type corresponding to SCALAR_TYPE of size
11535 VECTOR_TYPE if supported by the target. */
11538 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11540 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11541 return truth_type_for (vector_type
);
11543 poly_uint64 nunits
;
11544 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
11545 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
11548 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
11549 scalar_type
, nunits
);
11552 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11553 would not change the chosen vector modes. */
11556 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
11558 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
11559 i
!= vinfo
->used_vector_modes
.end (); ++i
)
11560 if (!VECTOR_MODE_P (*i
)
11561 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
11566 /* Function vect_is_simple_use.
11569 VINFO - the vect info of the loop or basic block that is being vectorized.
11570 OPERAND - operand in the loop or bb.
11572 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11573 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11574 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11575 the definition could be anywhere in the function
11576 DT - the type of definition
11578 Returns whether a stmt with OPERAND can be vectorized.
11579 For loops, supportable operands are constants, loop invariants, and operands
11580 that are defined by the current iteration of the loop. Unsupportable
11581 operands are those that are defined by a previous iteration of the loop (as
11582 is the case in reduction/induction computations).
11583 For basic blocks, supportable operands are constants and bb invariants.
11584 For now, operands defined outside the basic block are not supported. */
11587 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11588 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11590 if (def_stmt_info_out
)
11591 *def_stmt_info_out
= NULL
;
11593 *def_stmt_out
= NULL
;
11594 *dt
= vect_unknown_def_type
;
11596 if (dump_enabled_p ())
11598 dump_printf_loc (MSG_NOTE
, vect_location
,
11599 "vect_is_simple_use: operand ");
11600 if (TREE_CODE (operand
) == SSA_NAME
11601 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11602 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11604 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11607 if (CONSTANT_CLASS_P (operand
))
11608 *dt
= vect_constant_def
;
11609 else if (is_gimple_min_invariant (operand
))
11610 *dt
= vect_external_def
;
11611 else if (TREE_CODE (operand
) != SSA_NAME
)
11612 *dt
= vect_unknown_def_type
;
11613 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11614 *dt
= vect_external_def
;
11617 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11618 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11620 *dt
= vect_external_def
;
11623 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11624 def_stmt
= stmt_vinfo
->stmt
;
11625 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11626 if (def_stmt_info_out
)
11627 *def_stmt_info_out
= stmt_vinfo
;
11630 *def_stmt_out
= def_stmt
;
11633 if (dump_enabled_p ())
11635 dump_printf (MSG_NOTE
, ", type of def: ");
11638 case vect_uninitialized_def
:
11639 dump_printf (MSG_NOTE
, "uninitialized\n");
11641 case vect_constant_def
:
11642 dump_printf (MSG_NOTE
, "constant\n");
11644 case vect_external_def
:
11645 dump_printf (MSG_NOTE
, "external\n");
11647 case vect_internal_def
:
11648 dump_printf (MSG_NOTE
, "internal\n");
11650 case vect_induction_def
:
11651 dump_printf (MSG_NOTE
, "induction\n");
11653 case vect_reduction_def
:
11654 dump_printf (MSG_NOTE
, "reduction\n");
11656 case vect_double_reduction_def
:
11657 dump_printf (MSG_NOTE
, "double reduction\n");
11659 case vect_nested_cycle
:
11660 dump_printf (MSG_NOTE
, "nested cycle\n");
11662 case vect_unknown_def_type
:
11663 dump_printf (MSG_NOTE
, "unknown\n");
11668 if (*dt
== vect_unknown_def_type
)
11670 if (dump_enabled_p ())
11671 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11672 "Unsupported pattern.\n");
11679 /* Function vect_is_simple_use.
11681 Same as vect_is_simple_use but also determines the vector operand
11682 type of OPERAND and stores it to *VECTYPE. If the definition of
11683 OPERAND is vect_uninitialized_def, vect_constant_def or
11684 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11685 is responsible to compute the best suited vector type for the
11689 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11690 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11691 gimple
**def_stmt_out
)
11693 stmt_vec_info def_stmt_info
;
11695 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11699 *def_stmt_out
= def_stmt
;
11700 if (def_stmt_info_out
)
11701 *def_stmt_info_out
= def_stmt_info
;
11703 /* Now get a vector type if the def is internal, otherwise supply
11704 NULL_TREE and leave it up to the caller to figure out a proper
11705 type for the use stmt. */
11706 if (*dt
== vect_internal_def
11707 || *dt
== vect_induction_def
11708 || *dt
== vect_reduction_def
11709 || *dt
== vect_double_reduction_def
11710 || *dt
== vect_nested_cycle
)
11712 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11713 gcc_assert (*vectype
!= NULL_TREE
);
11714 if (dump_enabled_p ())
11715 dump_printf_loc (MSG_NOTE
, vect_location
,
11716 "vect_is_simple_use: vectype %T\n", *vectype
);
11718 else if (*dt
== vect_uninitialized_def
11719 || *dt
== vect_constant_def
11720 || *dt
== vect_external_def
)
11721 *vectype
= NULL_TREE
;
11723 gcc_unreachable ();
11728 /* Function vect_is_simple_use.
11730 Same as vect_is_simple_use but determines the operand by operand
11731 position OPERAND from either STMT or SLP_NODE, filling in *OP
11732 and *SLP_DEF (when SLP_NODE is not NULL). */
11735 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
11736 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
11737 enum vect_def_type
*dt
,
11738 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
11742 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
11744 *vectype
= SLP_TREE_VECTYPE (child
);
11745 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
11747 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
11748 return vect_is_simple_use (*op
, vinfo
, dt
, def_stmt_info_out
);
11752 if (def_stmt_info_out
)
11753 *def_stmt_info_out
= NULL
;
11754 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
11755 *dt
= SLP_TREE_DEF_TYPE (child
);
11762 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
11764 if (gimple_assign_rhs_code (ass
) == COND_EXPR
11765 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
11768 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
11770 *op
= gimple_op (ass
, operand
);
11772 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
11773 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
11775 *op
= gimple_op (ass
, operand
+ 1);
11777 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
11778 *op
= gimple_call_arg (call
, operand
);
11780 gcc_unreachable ();
11781 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
11785 /* If OP is not NULL and is external or constant update its vector
11786 type with VECTYPE. Returns true if successful or false if not,
11787 for example when conflicting vector types are present. */
11790 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
11792 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
11794 if (SLP_TREE_VECTYPE (op
))
11795 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
11796 SLP_TREE_VECTYPE (op
) = vectype
;
11800 /* Function supportable_widening_operation
11802 Check whether an operation represented by the code CODE is a
11803 widening operation that is supported by the target platform in
11804 vector form (i.e., when operating on arguments of type VECTYPE_IN
11805 producing a result of type VECTYPE_OUT).
11807 Widening operations we currently support are NOP (CONVERT), FLOAT,
11808 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11809 are supported by the target platform either directly (via vector
11810 tree-codes), or via target builtins.
11813 - CODE1 and CODE2 are codes of vector operations to be used when
11814 vectorizing the operation, if available.
11815 - MULTI_STEP_CVT determines the number of required intermediate steps in
11816 case of multi-step conversion (like char->short->int - in that case
11817 MULTI_STEP_CVT will be 1).
11818 - INTERM_TYPES contains the intermediate type required to perform the
11819 widening operation (short in the above example). */
11822 supportable_widening_operation (vec_info
*vinfo
,
11823 enum tree_code code
, stmt_vec_info stmt_info
,
11824 tree vectype_out
, tree vectype_in
,
11825 enum tree_code
*code1
, enum tree_code
*code2
,
11826 int *multi_step_cvt
,
11827 vec
<tree
> *interm_types
)
11829 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
11830 class loop
*vect_loop
= NULL
;
11831 machine_mode vec_mode
;
11832 enum insn_code icode1
, icode2
;
11833 optab optab1
, optab2
;
11834 tree vectype
= vectype_in
;
11835 tree wide_vectype
= vectype_out
;
11836 enum tree_code c1
, c2
;
11838 tree prev_type
, intermediate_type
;
11839 machine_mode intermediate_mode
, prev_mode
;
11840 optab optab3
, optab4
;
11842 *multi_step_cvt
= 0;
11844 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11848 case WIDEN_MULT_EXPR
:
11849 /* The result of a vectorized widening operation usually requires
11850 two vectors (because the widened results do not fit into one vector).
11851 The generated vector results would normally be expected to be
11852 generated in the same order as in the original scalar computation,
11853 i.e. if 8 results are generated in each vector iteration, they are
11854 to be organized as follows:
11855 vect1: [res1,res2,res3,res4],
11856 vect2: [res5,res6,res7,res8].
11858 However, in the special case that the result of the widening
11859 operation is used in a reduction computation only, the order doesn't
11860 matter (because when vectorizing a reduction we change the order of
11861 the computation). Some targets can take advantage of this and
11862 generate more efficient code. For example, targets like Altivec,
11863 that support widen_mult using a sequence of {mult_even,mult_odd}
11864 generate the following vectors:
11865 vect1: [res1,res3,res5,res7],
11866 vect2: [res2,res4,res6,res8].
11868 When vectorizing outer-loops, we execute the inner-loop sequentially
11869 (each vectorized inner-loop iteration contributes to VF outer-loop
11870 iterations in parallel). We therefore don't allow to change the
11871 order of the computation in the inner-loop during outer-loop
11873 /* TODO: Another case in which order doesn't *really* matter is when we
11874 widen and then contract again, e.g. (short)((int)x * y >> 8).
11875 Normally, pack_trunc performs an even/odd permute, whereas the
11876 repack from an even/odd expansion would be an interleave, which
11877 would be significantly simpler for e.g. AVX2. */
11878 /* In any case, in order to avoid duplicating the code below, recurse
11879 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11880 are properly set up for the caller. If we fail, we'll continue with
11881 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11883 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
11884 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
11885 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
11886 stmt_info
, vectype_out
,
11887 vectype_in
, code1
, code2
,
11888 multi_step_cvt
, interm_types
))
11890 /* Elements in a vector with vect_used_by_reduction property cannot
11891 be reordered if the use chain with this property does not have the
11892 same operation. One such an example is s += a * b, where elements
11893 in a and b cannot be reordered. Here we check if the vector defined
11894 by STMT is only directly used in the reduction statement. */
11895 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
11896 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
11898 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
11901 c1
= VEC_WIDEN_MULT_LO_EXPR
;
11902 c2
= VEC_WIDEN_MULT_HI_EXPR
;
11905 case DOT_PROD_EXPR
:
11906 c1
= DOT_PROD_EXPR
;
11907 c2
= DOT_PROD_EXPR
;
11915 case VEC_WIDEN_MULT_EVEN_EXPR
:
11916 /* Support the recursion induced just above. */
11917 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
11918 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
11921 case WIDEN_LSHIFT_EXPR
:
11922 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
11923 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
11926 case WIDEN_PLUS_EXPR
:
11927 c1
= VEC_WIDEN_PLUS_LO_EXPR
;
11928 c2
= VEC_WIDEN_PLUS_HI_EXPR
;
11931 case WIDEN_MINUS_EXPR
:
11932 c1
= VEC_WIDEN_MINUS_LO_EXPR
;
11933 c2
= VEC_WIDEN_MINUS_HI_EXPR
;
11937 c1
= VEC_UNPACK_LO_EXPR
;
11938 c2
= VEC_UNPACK_HI_EXPR
;
11942 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
11943 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
11946 case FIX_TRUNC_EXPR
:
11947 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
11948 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
11952 gcc_unreachable ();
11955 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
11956 std::swap (c1
, c2
);
11958 if (code
== FIX_TRUNC_EXPR
)
11960 /* The signedness is determined from output operand. */
11961 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11962 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
11964 else if (CONVERT_EXPR_CODE_P (code
)
11965 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
11966 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11967 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
11968 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11970 /* If the input and result modes are the same, a different optab
11971 is needed where we pass in the number of units in vectype. */
11972 optab1
= vec_unpacks_sbool_lo_optab
;
11973 optab2
= vec_unpacks_sbool_hi_optab
;
11977 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11978 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
11981 if (!optab1
|| !optab2
)
11984 vec_mode
= TYPE_MODE (vectype
);
11985 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
11986 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
11992 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11993 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11995 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11997 /* For scalar masks we may have different boolean
11998 vector types having the same QImode. Thus we
11999 add additional check for elements number. */
12000 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
12001 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12005 /* Check if it's a multi-step conversion that can be done using intermediate
12008 prev_type
= vectype
;
12009 prev_mode
= vec_mode
;
12011 if (!CONVERT_EXPR_CODE_P (code
))
12014 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12015 intermediate steps in promotion sequence. We try
12016 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12018 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12019 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12021 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12022 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12024 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
12027 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
12028 TYPE_UNSIGNED (prev_type
));
12030 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12031 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12032 && intermediate_mode
== prev_mode
12033 && SCALAR_INT_MODE_P (prev_mode
))
12035 /* If the input and result modes are the same, a different optab
12036 is needed where we pass in the number of units in vectype. */
12037 optab3
= vec_unpacks_sbool_lo_optab
;
12038 optab4
= vec_unpacks_sbool_hi_optab
;
12042 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12043 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
12046 if (!optab3
|| !optab4
12047 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
12048 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12049 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
12050 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
12051 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
12052 == CODE_FOR_nothing
)
12053 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
12054 == CODE_FOR_nothing
))
12057 interm_types
->quick_push (intermediate_type
);
12058 (*multi_step_cvt
)++;
12060 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
12061 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
12063 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12065 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
12066 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
12070 prev_type
= intermediate_type
;
12071 prev_mode
= intermediate_mode
;
12074 interm_types
->release ();
12079 /* Function supportable_narrowing_operation
12081 Check whether an operation represented by the code CODE is a
12082 narrowing operation that is supported by the target platform in
12083 vector form (i.e., when operating on arguments of type VECTYPE_IN
12084 and producing a result of type VECTYPE_OUT).
12086 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12087 and FLOAT. This function checks if these operations are supported by
12088 the target platform directly via vector tree-codes.
12091 - CODE1 is the code of a vector operation to be used when
12092 vectorizing the operation, if available.
12093 - MULTI_STEP_CVT determines the number of required intermediate steps in
12094 case of multi-step conversion (like int->short->char - in that case
12095 MULTI_STEP_CVT will be 1).
12096 - INTERM_TYPES contains the intermediate type required to perform the
12097 narrowing operation (short in the above example). */
12100 supportable_narrowing_operation (enum tree_code code
,
12101 tree vectype_out
, tree vectype_in
,
12102 enum tree_code
*code1
, int *multi_step_cvt
,
12103 vec
<tree
> *interm_types
)
12105 machine_mode vec_mode
;
12106 enum insn_code icode1
;
12107 optab optab1
, interm_optab
;
12108 tree vectype
= vectype_in
;
12109 tree narrow_vectype
= vectype_out
;
12111 tree intermediate_type
, prev_type
;
12112 machine_mode intermediate_mode
, prev_mode
;
12116 *multi_step_cvt
= 0;
12120 c1
= VEC_PACK_TRUNC_EXPR
;
12121 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
12122 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12123 && TYPE_MODE (narrow_vectype
) == TYPE_MODE (vectype
)
12124 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
12125 optab1
= vec_pack_sbool_trunc_optab
;
12127 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12130 case FIX_TRUNC_EXPR
:
12131 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
12132 /* The signedness is determined from output operand. */
12133 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12137 c1
= VEC_PACK_FLOAT_EXPR
;
12138 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12142 gcc_unreachable ();
12148 vec_mode
= TYPE_MODE (vectype
);
12149 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
12154 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12156 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12158 /* For scalar masks we may have different boolean
12159 vector types having the same QImode. Thus we
12160 add additional check for elements number. */
12161 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
12162 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12166 if (code
== FLOAT_EXPR
)
12169 /* Check if it's a multi-step conversion that can be done using intermediate
12171 prev_mode
= vec_mode
;
12172 prev_type
= vectype
;
12173 if (code
== FIX_TRUNC_EXPR
)
12174 uns
= TYPE_UNSIGNED (vectype_out
);
12176 uns
= TYPE_UNSIGNED (vectype
);
12178 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12179 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12180 costly than signed. */
12181 if (code
== FIX_TRUNC_EXPR
&& uns
)
12183 enum insn_code icode2
;
12186 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
12188 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12189 if (interm_optab
!= unknown_optab
12190 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
12191 && insn_data
[icode1
].operand
[0].mode
12192 == insn_data
[icode2
].operand
[0].mode
)
12195 optab1
= interm_optab
;
12200 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12201 intermediate steps in promotion sequence. We try
12202 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12203 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12204 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12206 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12207 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12209 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
12212 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
12213 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12214 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12215 && intermediate_mode
== prev_mode
12216 && SCALAR_INT_MODE_P (prev_mode
))
12217 interm_optab
= vec_pack_sbool_trunc_optab
;
12220 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
12223 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
12224 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12225 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
12226 == CODE_FOR_nothing
))
12229 interm_types
->quick_push (intermediate_type
);
12230 (*multi_step_cvt
)++;
12232 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12234 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12236 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
12237 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12241 prev_mode
= intermediate_mode
;
12242 prev_type
= intermediate_type
;
12243 optab1
= interm_optab
;
12246 interm_types
->release ();
12250 /* Generate and return a vector mask of MASK_TYPE such that
12251 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12252 Add the statements to SEQ. */
12255 vect_gen_while (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12256 tree end_index
, const char *name
)
12258 tree cmp_type
= TREE_TYPE (start_index
);
12259 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
12260 cmp_type
, mask_type
,
12261 OPTIMIZE_FOR_SPEED
));
12262 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
12263 start_index
, end_index
,
12264 build_zero_cst (mask_type
));
12267 tmp
= make_temp_ssa_name (mask_type
, NULL
, name
);
12269 tmp
= make_ssa_name (mask_type
);
12270 gimple_call_set_lhs (call
, tmp
);
12271 gimple_seq_add_stmt (seq
, call
);
12275 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12276 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12279 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12282 tree tmp
= vect_gen_while (seq
, mask_type
, start_index
, end_index
);
12283 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
12286 /* Try to compute the vector types required to vectorize STMT_INFO,
12287 returning true on success and false if vectorization isn't possible.
12288 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12289 take sure that the number of elements in the vectors is no bigger
12294 - Set *STMT_VECTYPE_OUT to:
12295 - NULL_TREE if the statement doesn't need to be vectorized;
12296 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12298 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12299 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12300 statement does not help to determine the overall number of units. */
12303 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12304 tree
*stmt_vectype_out
,
12305 tree
*nunits_vectype_out
,
12306 unsigned int group_size
)
12308 gimple
*stmt
= stmt_info
->stmt
;
12310 /* For BB vectorization, we should always have a group size once we've
12311 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12312 are tentative requests during things like early data reference
12313 analysis and pattern recognition. */
12314 if (is_a
<bb_vec_info
> (vinfo
))
12315 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
12319 *stmt_vectype_out
= NULL_TREE
;
12320 *nunits_vectype_out
= NULL_TREE
;
12322 if (gimple_get_lhs (stmt
) == NULL_TREE
12323 /* MASK_STORE has no lhs, but is ok. */
12324 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12326 if (is_a
<gcall
*> (stmt
))
12328 /* Ignore calls with no lhs. These must be calls to
12329 #pragma omp simd functions, and what vectorization factor
12330 it really needs can't be determined until
12331 vectorizable_simd_clone_call. */
12332 if (dump_enabled_p ())
12333 dump_printf_loc (MSG_NOTE
, vect_location
,
12334 "defer to SIMD clone analysis.\n");
12335 return opt_result::success ();
12338 return opt_result::failure_at (stmt
,
12339 "not vectorized: irregular stmt.%G", stmt
);
12343 tree scalar_type
= NULL_TREE
;
12344 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
12346 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12347 if (dump_enabled_p ())
12348 dump_printf_loc (MSG_NOTE
, vect_location
,
12349 "precomputed vectype: %T\n", vectype
);
12351 else if (vect_use_mask_type_p (stmt_info
))
12353 unsigned int precision
= stmt_info
->mask_precision
;
12354 scalar_type
= build_nonstandard_integer_type (precision
, 1);
12355 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
12357 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
12358 " data-type %T\n", scalar_type
);
12359 if (dump_enabled_p ())
12360 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12364 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
12365 scalar_type
= TREE_TYPE (DR_REF (dr
));
12366 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12367 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
12369 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
12371 if (dump_enabled_p ())
12374 dump_printf_loc (MSG_NOTE
, vect_location
,
12375 "get vectype for scalar type (group size %d):"
12376 " %T\n", group_size
, scalar_type
);
12378 dump_printf_loc (MSG_NOTE
, vect_location
,
12379 "get vectype for scalar type: %T\n", scalar_type
);
12381 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
12383 return opt_result::failure_at (stmt
,
12385 " unsupported data-type %T\n",
12388 if (dump_enabled_p ())
12389 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12392 if (scalar_type
&& VECTOR_MODE_P (TYPE_MODE (scalar_type
)))
12393 return opt_result::failure_at (stmt
,
12394 "not vectorized: vector stmt in loop:%G",
12397 *stmt_vectype_out
= vectype
;
12399 /* Don't try to compute scalar types if the stmt produces a boolean
12400 vector; use the existing vector type instead. */
12401 tree nunits_vectype
= vectype
;
12402 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12404 /* The number of units is set according to the smallest scalar
12405 type (or the largest vector size, but we only support one
12406 vector size per vectorization). */
12407 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
12408 TREE_TYPE (vectype
));
12409 if (scalar_type
!= TREE_TYPE (vectype
))
12411 if (dump_enabled_p ())
12412 dump_printf_loc (MSG_NOTE
, vect_location
,
12413 "get vectype for smallest scalar type: %T\n",
12415 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12417 if (!nunits_vectype
)
12418 return opt_result::failure_at
12419 (stmt
, "not vectorized: unsupported data-type %T\n",
12421 if (dump_enabled_p ())
12422 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
12427 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
12428 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)))
12429 return opt_result::failure_at (stmt
,
12430 "Not vectorized: Incompatible number "
12431 "of vector subparts between %T and %T\n",
12432 nunits_vectype
, *stmt_vectype_out
);
12434 if (dump_enabled_p ())
12436 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12437 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12438 dump_printf (MSG_NOTE
, "\n");
12441 *nunits_vectype_out
= nunits_vectype
;
12442 return opt_result::success ();
12445 /* Generate and return statement sequence that sets vector length LEN that is:
12447 min_of_start_and_end = min (START_INDEX, END_INDEX);
12448 left_len = END_INDEX - min_of_start_and_end;
12449 rhs = min (left_len, LEN_LIMIT);
12452 Note: the cost of the code generated by this function is modeled
12453 by vect_estimate_min_profitable_iters, so changes here may need
12454 corresponding changes there. */
12457 vect_gen_len (tree len
, tree start_index
, tree end_index
, tree len_limit
)
12459 gimple_seq stmts
= NULL
;
12460 tree len_type
= TREE_TYPE (len
);
12461 gcc_assert (TREE_TYPE (start_index
) == len_type
);
12463 tree min
= gimple_build (&stmts
, MIN_EXPR
, len_type
, start_index
, end_index
);
12464 tree left_len
= gimple_build (&stmts
, MINUS_EXPR
, len_type
, end_index
, min
);
12465 tree rhs
= gimple_build (&stmts
, MIN_EXPR
, len_type
, left_len
, len_limit
);
12466 gimple
* stmt
= gimple_build_assign (len
, rhs
);
12467 gimple_seq_add_stmt (&stmts
, stmt
);