1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
95 tree vectype
, int misalign
,
96 enum vect_cost_model_location where
)
98 if ((kind
== vector_load
|| kind
== unaligned_load
)
99 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
100 kind
= vector_gather_load
;
101 if ((kind
== vector_store
|| kind
== unaligned_store
)
102 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
103 kind
= vector_scatter_store
;
105 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, vectype
, misalign
};
106 body_cost_vec
->safe_push (si
);
109 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
117 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
127 read_vector_array (vec_info
*vinfo
,
128 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
129 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
131 tree vect_type
, vect
, vect_name
, array_ref
;
134 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
135 vect_type
= TREE_TYPE (TREE_TYPE (array
));
136 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
137 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
138 build_int_cst (size_type_node
, n
),
139 NULL_TREE
, NULL_TREE
);
141 new_stmt
= gimple_build_assign (vect
, array_ref
);
142 vect_name
= make_ssa_name (vect
, new_stmt
);
143 gimple_assign_set_lhs (new_stmt
, vect_name
);
144 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
154 write_vector_array (vec_info
*vinfo
,
155 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
156 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
161 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
162 build_int_cst (size_type_node
, n
),
163 NULL_TREE
, NULL_TREE
);
165 new_stmt
= gimple_build_assign (array_ref
, vect
);
166 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
174 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
178 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
188 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
189 gimple_stmt_iterator
*gsi
, tree var
)
191 tree clobber
= build_clobber (TREE_TYPE (var
));
192 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
193 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
203 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
204 enum vect_relevant relevant
, bool live_p
)
206 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
207 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE
, vect_location
,
211 "mark relevant %d, live %d: %G", relevant
, live_p
,
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE
, vect_location
,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info
= stmt_info
;
230 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
232 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
233 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
236 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
237 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
238 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
240 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE
, vect_location
,
245 "already marked relevant/live.\n");
249 worklist
->safe_push (stmt_info
);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
259 loop_vec_info loop_vinfo
)
264 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
268 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
270 enum vect_def_type dt
= vect_uninitialized_def
;
272 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
276 "use not simple.\n");
280 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
299 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
300 enum vect_relevant
*relevant
, bool *live_p
)
302 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
304 imm_use_iterator imm_iter
;
308 *relevant
= vect_unused_in_scope
;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info
->stmt
)
313 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
314 *relevant
= vect_used_in_scope
;
316 /* changing memory. */
317 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
318 if (gimple_vdef (stmt_info
->stmt
)
319 && !gimple_clobber_p (stmt_info
->stmt
))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE
, vect_location
,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant
= vect_used_in_scope
;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
330 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
332 basic_block bb
= gimple_bb (USE_STMT (use_p
));
333 if (!flow_bb_inside_loop_p (loop
, bb
))
335 if (is_gimple_debug (USE_STMT (use_p
)))
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE
, vect_location
,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
345 gcc_assert (bb
== single_exit (loop
)->dest
);
352 if (*live_p
&& *relevant
== vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE
, vect_location
,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant
= vect_used_only_live
;
361 return (*live_p
|| *relevant
);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
371 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info
))
381 /* STMT has a data_ref. FORNOW this means that its of one of
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
395 if (!assign
|| !gimple_assign_copy_p (assign
))
397 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
398 if (call
&& gimple_call_internal_p (call
))
400 internal_fn ifn
= gimple_call_internal_fn (call
);
401 int mask_index
= internal_fn_mask_index (ifn
);
403 && use
== gimple_call_arg (call
, mask_index
))
405 int stored_value_index
= internal_fn_stored_value_index (ifn
);
406 if (stored_value_index
>= 0
407 && use
== gimple_call_arg (call
, stored_value_index
))
409 if (internal_gather_scatter_fn_p (ifn
)
410 && use
== gimple_call_arg (call
, 1))
416 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
418 operand
= gimple_assign_rhs1 (assign
);
419 if (TREE_CODE (operand
) != SSA_NAME
)
430 Function process_use.
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
457 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
458 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
461 stmt_vec_info dstmt_vinfo
;
462 enum vect_def_type dt
;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
470 return opt_result::failure_at (stmt_vinfo
->stmt
,
472 " unsupported use in stmt.\n");
475 return opt_result::success ();
477 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
478 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
485 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
487 && bb
->loop_father
== def_bb
->loop_father
)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
503 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE
, vect_location
,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
511 case vect_unused_in_scope
:
512 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
513 vect_used_in_scope
: vect_unused_in_scope
;
516 case vect_used_in_outer_by_reduction
:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
518 relevant
= vect_used_by_reduction
;
521 case vect_used_in_outer
:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
523 relevant
= vect_used_in_scope
;
526 case vect_used_in_scope
:
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
541 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE
, vect_location
,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
549 case vect_unused_in_scope
:
550 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
552 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
555 case vect_used_by_reduction
:
556 case vect_used_only_live
:
557 relevant
= vect_used_in_outer_by_reduction
;
560 case vect_used_in_scope
:
561 relevant
= vect_used_in_outer
;
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
572 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
576 loop_latch_edge (bb
->loop_father
))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE
, vect_location
,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
610 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
611 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
612 unsigned int nbbs
= loop
->num_nodes
;
613 gimple_stmt_iterator si
;
617 enum vect_relevant relevant
;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec
<stmt_vec_info
, 64> worklist
;
623 /* 1. Init worklist. */
624 for (i
= 0; i
< nbbs
; i
++)
627 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
629 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
634 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
635 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
637 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
639 if (is_gimple_debug (gsi_stmt (si
)))
641 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE
, vect_location
,
644 "init: stmt relevant? %G", stmt_info
->stmt
);
646 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
647 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
651 /* 2. Process_worklist */
652 while (worklist
.length () > 0)
657 stmt_vec_info stmt_vinfo
= worklist
.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE
, vect_location
,
660 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
665 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
680 case vect_reduction_def
:
681 gcc_assert (relevant
!= vect_unused_in_scope
);
682 if (relevant
!= vect_unused_in_scope
683 && relevant
!= vect_used_in_scope
684 && relevant
!= vect_used_by_reduction
685 && relevant
!= vect_used_only_live
)
686 return opt_result::failure_at
687 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
690 case vect_nested_cycle
:
691 if (relevant
!= vect_unused_in_scope
692 && relevant
!= vect_used_in_outer_by_reduction
693 && relevant
!= vect_used_in_outer
)
694 return opt_result::failure_at
695 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
698 case vect_double_reduction_def
:
699 if (relevant
!= vect_unused_in_scope
700 && relevant
!= vect_used_by_reduction
701 && relevant
!= vect_used_only_live
)
702 return opt_result::failure_at
703 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
710 if (is_pattern_stmt_p (stmt_vinfo
))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
717 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
718 tree op
= gimple_assign_rhs1 (assign
);
721 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
724 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
725 loop_vinfo
, relevant
, &worklist
, false);
728 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
729 loop_vinfo
, relevant
, &worklist
, false);
734 for (; i
< gimple_num_ops (assign
); i
++)
736 op
= gimple_op (assign
, i
);
737 if (TREE_CODE (op
) == SSA_NAME
)
740 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
747 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
749 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
751 tree arg
= gimple_call_arg (call
, i
);
753 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
761 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
763 tree op
= USE_FROM_PTR (use_p
);
765 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
773 gather_scatter_info gs_info
;
774 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
777 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
786 } /* while worklist */
788 return opt_result::success ();
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
798 vect_model_simple_cost (vec_info
*,
799 stmt_vec_info stmt_info
, int ncopies
,
800 enum vect_def_type
*dt
,
803 stmt_vector_for_cost
*cost_vec
,
804 vect_cost_for_stmt kind
= vector_stmt
)
806 int inside_cost
= 0, prologue_cost
= 0;
808 gcc_assert (cost_vec
!= NULL
);
810 /* ??? Somehow we need to fix this at the callers. */
812 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
818 for (int i
= 0; i
< ndts
; i
++)
819 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
820 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
821 stmt_info
, 0, vect_prologue
);
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
825 stmt_info
, 0, vect_body
);
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE
, vect_location
,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. If WIDEN_ARITH
840 is true the stmt is doing widening arithmetic. */
843 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
844 enum vect_def_type
*dt
,
845 unsigned int ncopies
, int pwr
,
846 stmt_vector_for_cost
*cost_vec
,
850 int inside_cost
= 0, prologue_cost
= 0;
852 for (i
= 0; i
< pwr
+ 1; i
++)
854 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
,
856 ? vector_stmt
: vec_promote_demote
,
857 stmt_info
, 0, vect_body
);
861 /* FORNOW: Assuming maximum 2 args per stmts. */
862 for (i
= 0; i
< 2; i
++)
863 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
864 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
865 stmt_info
, 0, vect_prologue
);
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE
, vect_location
,
869 "vect_model_promotion_demotion_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
873 /* Returns true if the current function returns DECL. */
876 cfun_returns (tree decl
)
880 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
882 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
885 if (gimple_return_retval (ret
) == decl
)
887 /* We often end up with an aggregate copy to the result decl,
888 handle that case as well. First skip intermediate clobbers
893 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
895 while (gimple_clobber_p (def
));
896 if (is_a
<gassign
*> (def
)
897 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
898 && gimple_assign_rhs1 (def
) == decl
)
904 /* Function vect_model_store_cost
906 Models cost for stores. In the case of grouped accesses, one access
907 has the overhead of the grouped access attributed to it. */
910 vect_model_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
911 vect_memory_access_type memory_access_type
,
912 dr_alignment_support alignment_support_scheme
,
914 vec_load_store_type vls_type
, slp_tree slp_node
,
915 stmt_vector_for_cost
*cost_vec
)
917 unsigned int inside_cost
= 0, prologue_cost
= 0;
918 stmt_vec_info first_stmt_info
= stmt_info
;
919 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
921 /* ??? Somehow we need to fix this at the callers. */
923 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
925 if (vls_type
== VLS_STORE_INVARIANT
)
928 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
929 stmt_info
, 0, vect_prologue
);
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node
&& grouped_access_p
)
935 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
947 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
949 /* Uses a high and low interleave or shuffle operations for each
951 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
952 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
953 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
954 stmt_info
, 0, vect_body
);
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE
, vect_location
,
958 "vect_model_store_cost: strided group_size = %d .\n",
962 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
963 /* Costs of the stores. */
964 if (memory_access_type
== VMAT_ELEMENTWISE
965 || memory_access_type
== VMAT_GATHER_SCATTER
)
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
969 inside_cost
+= record_stmt_cost (cost_vec
,
970 ncopies
* assumed_nunits
,
971 scalar_store
, stmt_info
, 0, vect_body
);
974 vect_get_store_cost (vinfo
, stmt_info
, ncopies
, alignment_support_scheme
,
975 misalignment
, &inside_cost
, cost_vec
);
977 if (memory_access_type
== VMAT_ELEMENTWISE
978 || memory_access_type
== VMAT_STRIDED_SLP
)
980 /* N scalar stores plus extracting the elements. */
981 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
982 inside_cost
+= record_stmt_cost (cost_vec
,
983 ncopies
* assumed_nunits
,
984 vec_to_scalar
, stmt_info
, 0, vect_body
);
987 /* When vectorizing a store into the function result assign
988 a penalty if the function returns in a multi-register location.
989 In this case we assume we'll end up with having to spill the
990 vector result and do piecewise loads as a conservative estimate. */
991 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
993 && (TREE_CODE (base
) == RESULT_DECL
994 || (DECL_P (base
) && cfun_returns (base
)))
995 && !aggregate_value_p (base
, cfun
->decl
))
997 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
998 /* ??? Handle PARALLEL in some way. */
1001 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1002 /* Assume that a single reg-reg move is possible and cheap,
1003 do not account for vector to gp register move cost. */
1007 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1009 stmt_info
, 0, vect_epilogue
);
1011 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1013 stmt_info
, 0, vect_epilogue
);
1018 if (dump_enabled_p ())
1019 dump_printf_loc (MSG_NOTE
, vect_location
,
1020 "vect_model_store_cost: inside_cost = %d, "
1021 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1025 /* Calculate cost of DR's memory access. */
1027 vect_get_store_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1028 dr_alignment_support alignment_support_scheme
,
1030 unsigned int *inside_cost
,
1031 stmt_vector_for_cost
*body_cost_vec
)
1033 switch (alignment_support_scheme
)
1037 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1038 vector_store
, stmt_info
, 0,
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_NOTE
, vect_location
,
1043 "vect_model_store_cost: aligned.\n");
1047 case dr_unaligned_supported
:
1049 /* Here, we assign an additional cost for the unaligned store. */
1050 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1051 unaligned_store
, stmt_info
,
1052 misalignment
, vect_body
);
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE
, vect_location
,
1055 "vect_model_store_cost: unaligned supported by "
1060 case dr_unaligned_unsupported
:
1062 *inside_cost
= VECT_MAX_COST
;
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1066 "vect_model_store_cost: unsupported access.\n");
1076 /* Function vect_model_load_cost
1078 Models cost for loads. In the case of grouped accesses, one access has
1079 the overhead of the grouped access attributed to it. Since unaligned
1080 accesses are supported for loads, we also account for the costs of the
1081 access scheme chosen. */
1084 vect_model_load_cost (vec_info
*vinfo
,
1085 stmt_vec_info stmt_info
, unsigned ncopies
, poly_uint64 vf
,
1086 vect_memory_access_type memory_access_type
,
1087 dr_alignment_support alignment_support_scheme
,
1089 gather_scatter_info
*gs_info
,
1091 stmt_vector_for_cost
*cost_vec
)
1093 unsigned int inside_cost
= 0, prologue_cost
= 0;
1094 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1096 gcc_assert (cost_vec
);
1098 /* ??? Somehow we need to fix this at the callers. */
1100 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1102 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1104 /* If the load is permuted then the alignment is determined by
1105 the first group element not by the first scalar stmt DR. */
1106 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1107 /* Record the cost for the permutation. */
1108 unsigned n_perms
, n_loads
;
1109 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
,
1110 vf
, true, &n_perms
, &n_loads
);
1111 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1112 first_stmt_info
, 0, vect_body
);
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1119 /* Grouped loads read all elements in the group at once,
1120 so we want the DR for the first statement. */
1121 stmt_vec_info first_stmt_info
= stmt_info
;
1122 if (!slp_node
&& grouped_access_p
)
1123 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1125 /* True if we should include any once-per-group costs as well as
1126 the cost of the statement itself. For SLP we only get called
1127 once per group anyhow. */
1128 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1130 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1131 ones we actually need. Account for the cost of unused results. */
1132 if (first_stmt_p
&& !slp_node
&& memory_access_type
== VMAT_LOAD_STORE_LANES
)
1134 unsigned int gaps
= DR_GROUP_SIZE (first_stmt_info
);
1135 stmt_vec_info next_stmt_info
= first_stmt_info
;
1139 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
1141 while (next_stmt_info
);
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_NOTE
, vect_location
,
1146 "vect_model_load_cost: %d unused vectors.\n",
1148 vect_get_load_cost (vinfo
, stmt_info
, ncopies
* gaps
,
1149 alignment_support_scheme
, misalignment
, false,
1150 &inside_cost
, &prologue_cost
,
1151 cost_vec
, cost_vec
, true);
1155 /* We assume that the cost of a single load-lanes instruction is
1156 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1157 access is instead being provided by a load-and-permute operation,
1158 include the cost of the permutes. */
1160 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1162 /* Uses an even and odd extract operations or shuffle operations
1163 for each needed permute. */
1164 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1165 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1166 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1167 stmt_info
, 0, vect_body
);
1169 if (dump_enabled_p ())
1170 dump_printf_loc (MSG_NOTE
, vect_location
,
1171 "vect_model_load_cost: strided group_size = %d .\n",
1175 /* The loads themselves. */
1176 if (memory_access_type
== VMAT_ELEMENTWISE
1177 || memory_access_type
== VMAT_GATHER_SCATTER
)
1179 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1180 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1181 if (memory_access_type
== VMAT_GATHER_SCATTER
1182 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
1183 /* For emulated gathers N offset vector element extracts
1184 (we assume the scalar scaling and ptr + offset add is consumed by
1186 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
* assumed_nunits
,
1187 vec_to_scalar
, stmt_info
, 0,
1189 /* N scalar loads plus gathering them into a vector. */
1190 inside_cost
+= record_stmt_cost (cost_vec
,
1191 ncopies
* assumed_nunits
,
1192 scalar_load
, stmt_info
, 0, vect_body
);
1194 else if (memory_access_type
== VMAT_INVARIANT
)
1196 /* Invariant loads will ideally be hoisted and splat to a vector. */
1197 prologue_cost
+= record_stmt_cost (cost_vec
, 1,
1198 scalar_load
, stmt_info
, 0,
1200 prologue_cost
+= record_stmt_cost (cost_vec
, 1,
1201 scalar_to_vec
, stmt_info
, 0,
1205 vect_get_load_cost (vinfo
, stmt_info
, ncopies
,
1206 alignment_support_scheme
, misalignment
, first_stmt_p
,
1207 &inside_cost
, &prologue_cost
,
1208 cost_vec
, cost_vec
, true);
1209 if (memory_access_type
== VMAT_ELEMENTWISE
1210 || memory_access_type
== VMAT_STRIDED_SLP
1211 || (memory_access_type
== VMAT_GATHER_SCATTER
1212 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
))
1213 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1214 stmt_info
, 0, vect_body
);
1216 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_NOTE
, vect_location
,
1218 "vect_model_load_cost: inside_cost = %d, "
1219 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1223 /* Calculate cost of DR's memory access. */
1225 vect_get_load_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1226 dr_alignment_support alignment_support_scheme
,
1228 bool add_realign_cost
, unsigned int *inside_cost
,
1229 unsigned int *prologue_cost
,
1230 stmt_vector_for_cost
*prologue_cost_vec
,
1231 stmt_vector_for_cost
*body_cost_vec
,
1232 bool record_prologue_costs
)
1234 switch (alignment_support_scheme
)
1238 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1239 stmt_info
, 0, vect_body
);
1241 if (dump_enabled_p ())
1242 dump_printf_loc (MSG_NOTE
, vect_location
,
1243 "vect_model_load_cost: aligned.\n");
1247 case dr_unaligned_supported
:
1249 /* Here, we assign an additional cost for the unaligned load. */
1250 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1251 unaligned_load
, stmt_info
,
1252 misalignment
, vect_body
);
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE
, vect_location
,
1256 "vect_model_load_cost: unaligned supported by "
1261 case dr_explicit_realign
:
1263 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1264 vector_load
, stmt_info
, 0, vect_body
);
1265 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1266 vec_perm
, stmt_info
, 0, vect_body
);
1268 /* FIXME: If the misalignment remains fixed across the iterations of
1269 the containing loop, the following cost should be added to the
1271 if (targetm
.vectorize
.builtin_mask_for_load
)
1272 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1273 stmt_info
, 0, vect_body
);
1275 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE
, vect_location
,
1277 "vect_model_load_cost: explicit realign\n");
1281 case dr_explicit_realign_optimized
:
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE
, vect_location
,
1285 "vect_model_load_cost: unaligned software "
1288 /* Unaligned software pipeline has a load of an address, an initial
1289 load, and possibly a mask operation to "prime" the loop. However,
1290 if this is an access in a group of loads, which provide grouped
1291 access, then the above cost should only be considered for one
1292 access in the group. Inside the loop, there is a load op
1293 and a realignment op. */
1295 if (add_realign_cost
&& record_prologue_costs
)
1297 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1298 vector_stmt
, stmt_info
,
1300 if (targetm
.vectorize
.builtin_mask_for_load
)
1301 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1302 vector_stmt
, stmt_info
,
1306 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1307 stmt_info
, 0, vect_body
);
1308 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1309 stmt_info
, 0, vect_body
);
1311 if (dump_enabled_p ())
1312 dump_printf_loc (MSG_NOTE
, vect_location
,
1313 "vect_model_load_cost: explicit realign optimized"
1319 case dr_unaligned_unsupported
:
1321 *inside_cost
= VECT_MAX_COST
;
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1325 "vect_model_load_cost: unsupported access.\n");
1334 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1335 the loop preheader for the vectorized stmt STMT_VINFO. */
1338 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1339 gimple_stmt_iterator
*gsi
)
1342 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1344 vinfo
->insert_on_entry (stmt_vinfo
, new_stmt
);
1346 if (dump_enabled_p ())
1347 dump_printf_loc (MSG_NOTE
, vect_location
,
1348 "created new init_stmt: %G", new_stmt
);
1351 /* Function vect_init_vector.
1353 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1354 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1355 vector type a vector with all elements equal to VAL is created first.
1356 Place the initialization at GSI if it is not NULL. Otherwise, place the
1357 initialization at the loop preheader.
1358 Return the DEF of INIT_STMT.
1359 It will be used in the vectorization of STMT_INFO. */
1362 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1363 gimple_stmt_iterator
*gsi
)
1368 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1369 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1371 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1372 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1374 /* Scalar boolean value should be transformed into
1375 all zeros or all ones value before building a vector. */
1376 if (VECTOR_BOOLEAN_TYPE_P (type
))
1378 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1379 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1381 if (CONSTANT_CLASS_P (val
))
1382 val
= integer_zerop (val
) ? false_val
: true_val
;
1385 new_temp
= make_ssa_name (TREE_TYPE (type
));
1386 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1387 val
, true_val
, false_val
);
1388 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1394 gimple_seq stmts
= NULL
;
1395 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1396 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1397 TREE_TYPE (type
), val
);
1399 /* ??? Condition vectorization expects us to do
1400 promotion of invariant/external defs. */
1401 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1402 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1403 !gsi_end_p (gsi2
); )
1405 init_stmt
= gsi_stmt (gsi2
);
1406 gsi_remove (&gsi2
, false);
1407 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1411 val
= build_vector_from_val (type
, val
);
1414 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1415 init_stmt
= gimple_build_assign (new_temp
, val
);
1416 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1421 /* Function vect_get_vec_defs_for_operand.
1423 OP is an operand in STMT_VINFO. This function returns a vector of
1424 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1426 In the case that OP is an SSA_NAME which is defined in the loop, then
1427 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1429 In case OP is an invariant or constant, a new stmt that creates a vector def
1430 needs to be introduced. VECTYPE may be used to specify a required type for
1431 vector invariant. */
1434 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1436 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1439 enum vect_def_type dt
;
1441 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1443 if (dump_enabled_p ())
1444 dump_printf_loc (MSG_NOTE
, vect_location
,
1445 "vect_get_vec_defs_for_operand: %T\n", op
);
1447 stmt_vec_info def_stmt_info
;
1448 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1449 &def_stmt_info
, &def_stmt
);
1450 gcc_assert (is_simple_use
);
1451 if (def_stmt
&& dump_enabled_p ())
1452 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1454 vec_oprnds
->create (ncopies
);
1455 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1457 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1461 vector_type
= vectype
;
1462 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1463 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1464 vector_type
= truth_type_for (stmt_vectype
);
1466 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1468 gcc_assert (vector_type
);
1469 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1471 vec_oprnds
->quick_push (vop
);
1475 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1476 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1477 for (unsigned i
= 0; i
< ncopies
; ++i
)
1478 vec_oprnds
->quick_push (gimple_get_lhs
1479 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1484 /* Get vectorized definitions for OP0 and OP1. */
1487 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1489 tree op0
, vec
<tree
> *vec_oprnds0
, tree vectype0
,
1490 tree op1
, vec
<tree
> *vec_oprnds1
, tree vectype1
,
1491 tree op2
, vec
<tree
> *vec_oprnds2
, tree vectype2
,
1492 tree op3
, vec
<tree
> *vec_oprnds3
, tree vectype3
)
1497 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1499 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1501 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1503 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1508 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1509 op0
, vec_oprnds0
, vectype0
);
1511 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1512 op1
, vec_oprnds1
, vectype1
);
1514 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1515 op2
, vec_oprnds2
, vectype2
);
1517 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1518 op3
, vec_oprnds3
, vectype3
);
1523 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1525 tree op0
, vec
<tree
> *vec_oprnds0
,
1526 tree op1
, vec
<tree
> *vec_oprnds1
,
1527 tree op2
, vec
<tree
> *vec_oprnds2
,
1528 tree op3
, vec
<tree
> *vec_oprnds3
)
1530 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1531 op0
, vec_oprnds0
, NULL_TREE
,
1532 op1
, vec_oprnds1
, NULL_TREE
,
1533 op2
, vec_oprnds2
, NULL_TREE
,
1534 op3
, vec_oprnds3
, NULL_TREE
);
1537 /* Helper function called by vect_finish_replace_stmt and
1538 vect_finish_stmt_generation. Set the location of the new
1539 statement and create and return a stmt_vec_info for it. */
1542 vect_finish_stmt_generation_1 (vec_info
*,
1543 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1545 if (dump_enabled_p ())
1546 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1550 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1552 /* While EH edges will generally prevent vectorization, stmt might
1553 e.g. be in a must-not-throw region. Ensure newly created stmts
1554 that could throw are part of the same region. */
1555 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1556 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1557 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1560 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1563 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1564 which sets the same scalar result as STMT_INFO did. Create and return a
1565 stmt_vec_info for VEC_STMT. */
1568 vect_finish_replace_stmt (vec_info
*vinfo
,
1569 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1571 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1572 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1574 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1575 gsi_replace (&gsi
, vec_stmt
, true);
1577 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1580 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1581 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1584 vect_finish_stmt_generation (vec_info
*vinfo
,
1585 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1586 gimple_stmt_iterator
*gsi
)
1588 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1590 if (!gsi_end_p (*gsi
)
1591 && gimple_has_mem_ops (vec_stmt
))
1593 gimple
*at_stmt
= gsi_stmt (*gsi
);
1594 tree vuse
= gimple_vuse (at_stmt
);
1595 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1597 tree vdef
= gimple_vdef (at_stmt
);
1598 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1599 gimple_set_modified (vec_stmt
, true);
1600 /* If we have an SSA vuse and insert a store, update virtual
1601 SSA form to avoid triggering the renamer. Do so only
1602 if we can easily see all uses - which is what almost always
1603 happens with the way vectorized stmts are inserted. */
1604 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1605 && ((is_gimple_assign (vec_stmt
)
1606 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1607 || (is_gimple_call (vec_stmt
)
1608 && !(gimple_call_flags (vec_stmt
)
1609 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1611 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1612 gimple_set_vdef (vec_stmt
, new_vdef
);
1613 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1617 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1618 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1621 /* We want to vectorize a call to combined function CFN with function
1622 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1623 as the types of all inputs. Check whether this is possible using
1624 an internal function, returning its code if so or IFN_LAST if not. */
1627 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1628 tree vectype_out
, tree vectype_in
)
1631 if (internal_fn_p (cfn
))
1632 ifn
= as_internal_fn (cfn
);
1634 ifn
= associated_internal_fn (fndecl
);
1635 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1637 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1638 if (info
.vectorizable
)
1640 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1641 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1642 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1643 OPTIMIZE_FOR_SPEED
))
1651 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1652 gimple_stmt_iterator
*);
1654 /* Check whether a load or store statement in the loop described by
1655 LOOP_VINFO is possible in a loop using partial vectors. This is
1656 testing whether the vectorizer pass has the appropriate support,
1657 as well as whether the target does.
1659 VLS_TYPE says whether the statement is a load or store and VECTYPE
1660 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1661 says how the load or store is going to be implemented and GROUP_SIZE
1662 is the number of load or store statements in the containing group.
1663 If the access is a gather load or scatter store, GS_INFO describes
1664 its arguments. If the load or store is conditional, SCALAR_MASK is the
1665 condition under which it occurs.
1667 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1668 vectors is not supported, otherwise record the required rgroup control
1672 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo
, tree vectype
,
1673 vec_load_store_type vls_type
,
1675 vect_memory_access_type
1677 gather_scatter_info
*gs_info
,
1680 /* Invariant loads need no special support. */
1681 if (memory_access_type
== VMAT_INVARIANT
)
1684 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1685 machine_mode vecmode
= TYPE_MODE (vectype
);
1686 bool is_load
= (vls_type
== VLS_LOAD
);
1687 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1690 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1691 : !vect_store_lanes_supported (vectype
, group_size
, true))
1693 if (dump_enabled_p ())
1694 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1695 "can't operate on partial vectors because"
1696 " the target doesn't have an appropriate"
1697 " load/store-lanes instruction.\n");
1698 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1701 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1702 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1706 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1708 internal_fn ifn
= (is_load
1709 ? IFN_MASK_GATHER_LOAD
1710 : IFN_MASK_SCATTER_STORE
);
1711 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1712 gs_info
->memory_type
,
1713 gs_info
->offset_vectype
,
1716 if (dump_enabled_p ())
1717 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1718 "can't operate on partial vectors because"
1719 " the target doesn't have an appropriate"
1720 " gather load or scatter store instruction.\n");
1721 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1724 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1725 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1729 if (memory_access_type
!= VMAT_CONTIGUOUS
1730 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1732 /* Element X of the data must come from iteration i * VF + X of the
1733 scalar loop. We need more work to support other mappings. */
1734 if (dump_enabled_p ())
1735 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1736 "can't operate on partial vectors because an"
1737 " access isn't contiguous.\n");
1738 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1742 if (!VECTOR_MODE_P (vecmode
))
1744 if (dump_enabled_p ())
1745 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1746 "can't operate on partial vectors when emulating"
1747 " vector operations.\n");
1748 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1752 /* We might load more scalars than we need for permuting SLP loads.
1753 We checked in get_group_load_store_type that the extra elements
1754 don't leak into a new vector. */
1755 auto get_valid_nvectors
= [] (poly_uint64 size
, poly_uint64 nunits
)
1757 unsigned int nvectors
;
1758 if (can_div_away_from_zero_p (size
, nunits
, &nvectors
))
1763 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1764 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1765 machine_mode mask_mode
;
1766 bool using_partial_vectors_p
= false;
1767 if (targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1768 && can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1770 unsigned int nvectors
= get_valid_nvectors (group_size
* vf
, nunits
);
1771 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1772 using_partial_vectors_p
= true;
1776 if (get_len_load_store_mode (vecmode
, is_load
).exists (&vmode
))
1778 unsigned int nvectors
= get_valid_nvectors (group_size
* vf
, nunits
);
1779 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
1780 unsigned factor
= (vecmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vecmode
);
1781 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, factor
);
1782 using_partial_vectors_p
= true;
1785 if (!using_partial_vectors_p
)
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1789 "can't operate on partial vectors because the"
1790 " target doesn't have the appropriate partial"
1791 " vectorization load or store.\n");
1792 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1796 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1797 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1798 that needs to be applied to all loads and stores in a vectorized loop.
1799 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1801 MASK_TYPE is the type of both masks. If new statements are needed,
1802 insert them before GSI. */
1805 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1806 gimple_stmt_iterator
*gsi
)
1808 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1812 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1813 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1814 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1815 vec_mask
, loop_mask
);
1816 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1820 /* Determine whether we can use a gather load or scatter store to vectorize
1821 strided load or store STMT_INFO by truncating the current offset to a
1822 smaller width. We need to be able to construct an offset vector:
1824 { 0, X, X*2, X*3, ... }
1826 without loss of precision, where X is STMT_INFO's DR_STEP.
1828 Return true if this is possible, describing the gather load or scatter
1829 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1832 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1833 loop_vec_info loop_vinfo
, bool masked_p
,
1834 gather_scatter_info
*gs_info
)
1836 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1837 data_reference
*dr
= dr_info
->dr
;
1838 tree step
= DR_STEP (dr
);
1839 if (TREE_CODE (step
) != INTEGER_CST
)
1841 /* ??? Perhaps we could use range information here? */
1842 if (dump_enabled_p ())
1843 dump_printf_loc (MSG_NOTE
, vect_location
,
1844 "cannot truncate variable step.\n");
1848 /* Get the number of bits in an element. */
1849 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1850 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1851 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1853 /* Set COUNT to the upper limit on the number of elements - 1.
1854 Start with the maximum vectorization factor. */
1855 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1857 /* Try lowering COUNT to the number of scalar latch iterations. */
1858 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1859 widest_int max_iters
;
1860 if (max_loop_iterations (loop
, &max_iters
)
1861 && max_iters
< count
)
1862 count
= max_iters
.to_shwi ();
1864 /* Try scales of 1 and the element size. */
1865 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1866 wi::overflow_type overflow
= wi::OVF_NONE
;
1867 for (int i
= 0; i
< 2; ++i
)
1869 int scale
= scales
[i
];
1871 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1874 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1875 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1878 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1879 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1881 /* Find the narrowest viable offset type. */
1882 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1883 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1886 /* See whether the target supports the operation with an offset
1887 no narrower than OFFSET_TYPE. */
1888 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1889 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1890 vectype
, memory_type
, offset_type
, scale
,
1891 &gs_info
->ifn
, &gs_info
->offset_vectype
)
1892 || gs_info
->ifn
== IFN_LAST
)
1895 gs_info
->decl
= NULL_TREE
;
1896 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1897 but we don't need to store that here. */
1898 gs_info
->base
= NULL_TREE
;
1899 gs_info
->element_type
= TREE_TYPE (vectype
);
1900 gs_info
->offset
= fold_convert (offset_type
, step
);
1901 gs_info
->offset_dt
= vect_constant_def
;
1902 gs_info
->scale
= scale
;
1903 gs_info
->memory_type
= memory_type
;
1907 if (overflow
&& dump_enabled_p ())
1908 dump_printf_loc (MSG_NOTE
, vect_location
,
1909 "truncating gather/scatter offset to %d bits"
1910 " might change its value.\n", element_bits
);
1915 /* Return true if we can use gather/scatter internal functions to
1916 vectorize STMT_INFO, which is a grouped or strided load or store.
1917 MASKED_P is true if load or store is conditional. When returning
1918 true, fill in GS_INFO with the information required to perform the
1922 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1923 loop_vec_info loop_vinfo
, bool masked_p
,
1924 gather_scatter_info
*gs_info
)
1926 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1927 || gs_info
->ifn
== IFN_LAST
)
1928 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1931 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1932 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1934 gcc_assert (TYPE_PRECISION (new_offset_type
)
1935 >= TYPE_PRECISION (old_offset_type
));
1936 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1938 if (dump_enabled_p ())
1939 dump_printf_loc (MSG_NOTE
, vect_location
,
1940 "using gather/scatter for strided/grouped access,"
1941 " scale = %d\n", gs_info
->scale
);
1946 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1947 elements with a known constant step. Return -1 if that step
1948 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1951 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
1953 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1954 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
1958 /* If the target supports a permute mask that reverses the elements in
1959 a vector of type VECTYPE, return that mask, otherwise return null. */
1962 perm_mask_for_reverse (tree vectype
)
1964 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1966 /* The encoding has a single stepped pattern. */
1967 vec_perm_builder
sel (nunits
, 1, 3);
1968 for (int i
= 0; i
< 3; ++i
)
1969 sel
.quick_push (nunits
- 1 - i
);
1971 vec_perm_indices
indices (sel
, 1, nunits
);
1972 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
1974 return vect_gen_perm_mask_checked (vectype
, indices
);
1977 /* A subroutine of get_load_store_type, with a subset of the same
1978 arguments. Handle the case where STMT_INFO is a load or store that
1979 accesses consecutive elements with a negative step. Sets *POFFSET
1980 to the offset to be applied to the DR for the first access. */
1982 static vect_memory_access_type
1983 get_negative_load_store_type (vec_info
*vinfo
,
1984 stmt_vec_info stmt_info
, tree vectype
,
1985 vec_load_store_type vls_type
,
1986 unsigned int ncopies
, poly_int64
*poffset
)
1988 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1989 dr_alignment_support alignment_support_scheme
;
1993 if (dump_enabled_p ())
1994 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1995 "multiple types with negative step.\n");
1996 return VMAT_ELEMENTWISE
;
1999 /* For backward running DRs the first access in vectype actually is
2000 N-1 elements before the address of the DR. */
2001 *poffset
= ((-TYPE_VECTOR_SUBPARTS (vectype
) + 1)
2002 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype
))));
2004 int misalignment
= dr_misalignment (dr_info
, vectype
, *poffset
);
2005 alignment_support_scheme
2006 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
, misalignment
);
2007 if (alignment_support_scheme
!= dr_aligned
2008 && alignment_support_scheme
!= dr_unaligned_supported
)
2010 if (dump_enabled_p ())
2011 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2012 "negative step but alignment required.\n");
2014 return VMAT_ELEMENTWISE
;
2017 if (vls_type
== VLS_STORE_INVARIANT
)
2019 if (dump_enabled_p ())
2020 dump_printf_loc (MSG_NOTE
, vect_location
,
2021 "negative step with invariant source;"
2022 " no permute needed.\n");
2023 return VMAT_CONTIGUOUS_DOWN
;
2026 if (!perm_mask_for_reverse (vectype
))
2028 if (dump_enabled_p ())
2029 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2030 "negative step and reversing not supported.\n");
2032 return VMAT_ELEMENTWISE
;
2035 return VMAT_CONTIGUOUS_REVERSE
;
2038 /* STMT_INFO is either a masked or unconditional store. Return the value
2042 vect_get_store_rhs (stmt_vec_info stmt_info
)
2044 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2046 gcc_assert (gimple_assign_single_p (assign
));
2047 return gimple_assign_rhs1 (assign
);
2049 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2051 internal_fn ifn
= gimple_call_internal_fn (call
);
2052 int index
= internal_fn_stored_value_index (ifn
);
2053 gcc_assert (index
>= 0);
2054 return gimple_call_arg (call
, index
);
2059 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2061 This function returns a vector type which can be composed with NETLS pieces,
2062 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2063 same vector size as the return vector. It checks target whether supports
2064 pieces-size vector mode for construction firstly, if target fails to, check
2065 pieces-size scalar mode for construction further. It returns NULL_TREE if
2066 fails to find the available composition.
2068 For example, for (vtype=V16QI, nelts=4), we can probably get:
2069 - V16QI with PTYPE V4QI.
2070 - V4SI with PTYPE SI.
2074 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
2076 gcc_assert (VECTOR_TYPE_P (vtype
));
2077 gcc_assert (known_gt (nelts
, 0U));
2079 machine_mode vmode
= TYPE_MODE (vtype
);
2080 if (!VECTOR_MODE_P (vmode
))
2083 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
2084 unsigned int pbsize
;
2085 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
2087 /* First check if vec_init optab supports construction from
2088 vector pieces directly. */
2089 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
2090 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
2092 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
2093 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
2094 != CODE_FOR_nothing
))
2096 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
2100 /* Otherwise check if exists an integer type of the same piece size and
2101 if vec_init optab supports construction from it directly. */
2102 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
2103 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
2104 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
2105 != CODE_FOR_nothing
))
2107 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
2108 return build_vector_type (*ptype
, nelts
);
2115 /* A subroutine of get_load_store_type, with a subset of the same
2116 arguments. Handle the case where STMT_INFO is part of a grouped load
2119 For stores, the statements in the group are all consecutive
2120 and there is no gap at the end. For loads, the statements in the
2121 group might not be consecutive; there can be gaps between statements
2122 as well as at the end. */
2125 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2126 tree vectype
, slp_tree slp_node
,
2127 bool masked_p
, vec_load_store_type vls_type
,
2128 vect_memory_access_type
*memory_access_type
,
2129 poly_int64
*poffset
,
2130 dr_alignment_support
*alignment_support_scheme
,
2132 gather_scatter_info
*gs_info
)
2134 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2135 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2136 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2137 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2138 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2139 bool single_element_p
= (stmt_info
== first_stmt_info
2140 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2141 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2142 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2144 /* True if the vectorized statements would access beyond the last
2145 statement in the group. */
2146 bool overrun_p
= false;
2148 /* True if we can cope with such overrun by peeling for gaps, so that
2149 there is at least one final scalar iteration after the vector loop. */
2150 bool can_overrun_p
= (!masked_p
2151 && vls_type
== VLS_LOAD
2155 /* There can only be a gap at the end of the group if the stride is
2156 known at compile time. */
2157 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2159 /* Stores can't yet have gaps. */
2160 gcc_assert (slp_node
|| vls_type
== VLS_LOAD
|| gap
== 0);
2164 /* For SLP vectorization we directly vectorize a subchain
2165 without permutation. */
2166 if (! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
2168 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node
)[0]);
2169 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2171 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2172 separated by the stride, until we have a complete vector.
2173 Fall back to scalar accesses if that isn't possible. */
2174 if (multiple_p (nunits
, group_size
))
2175 *memory_access_type
= VMAT_STRIDED_SLP
;
2177 *memory_access_type
= VMAT_ELEMENTWISE
;
2181 overrun_p
= loop_vinfo
&& gap
!= 0;
2182 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2184 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2185 "Grouped store with gaps requires"
2186 " non-consecutive accesses\n");
2189 /* An overrun is fine if the trailing elements are smaller
2190 than the alignment boundary B. Every vector access will
2191 be a multiple of B and so we are guaranteed to access a
2192 non-gap element in the same B-sized block. */
2194 && gap
< (vect_known_alignment_in_bytes (first_dr_info
,
2196 / vect_get_scalar_dr_size (first_dr_info
)))
2199 /* If the gap splits the vector in half and the target
2200 can do half-vector operations avoid the epilogue peeling
2201 by simply loading half of the vector only. Usually
2202 the construction with an upper zero half will be elided. */
2203 dr_alignment_support alss
;
2204 int misalign
= dr_misalignment (first_dr_info
, vectype
);
2208 && (((alss
= vect_supportable_dr_alignment (vinfo
, first_dr_info
,
2209 vectype
, misalign
)))
2211 || alss
== dr_unaligned_supported
)
2212 && known_eq (nunits
, (group_size
- gap
) * 2)
2213 && known_eq (nunits
, group_size
)
2214 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2218 if (overrun_p
&& !can_overrun_p
)
2220 if (dump_enabled_p ())
2221 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2222 "Peeling for outer loop is not supported\n");
2225 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2228 if (single_element_p
)
2229 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2230 only correct for single element "interleaving" SLP. */
2231 *memory_access_type
= get_negative_load_store_type
2232 (vinfo
, stmt_info
, vectype
, vls_type
, 1, poffset
);
2235 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2236 separated by the stride, until we have a complete vector.
2237 Fall back to scalar accesses if that isn't possible. */
2238 if (multiple_p (nunits
, group_size
))
2239 *memory_access_type
= VMAT_STRIDED_SLP
;
2241 *memory_access_type
= VMAT_ELEMENTWISE
;
2246 gcc_assert (!loop_vinfo
|| cmp
> 0);
2247 *memory_access_type
= VMAT_CONTIGUOUS
;
2253 /* We can always handle this case using elementwise accesses,
2254 but see if something more efficient is available. */
2255 *memory_access_type
= VMAT_ELEMENTWISE
;
2257 /* If there is a gap at the end of the group then these optimizations
2258 would access excess elements in the last iteration. */
2259 bool would_overrun_p
= (gap
!= 0);
2260 /* An overrun is fine if the trailing elements are smaller than the
2261 alignment boundary B. Every vector access will be a multiple of B
2262 and so we are guaranteed to access a non-gap element in the
2263 same B-sized block. */
2266 && gap
< (vect_known_alignment_in_bytes (first_dr_info
, vectype
)
2267 / vect_get_scalar_dr_size (first_dr_info
)))
2268 would_overrun_p
= false;
2270 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2271 && (can_overrun_p
|| !would_overrun_p
)
2272 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2274 /* First cope with the degenerate case of a single-element
2276 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2279 /* Otherwise try using LOAD/STORE_LANES. */
2280 else if (vls_type
== VLS_LOAD
2281 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2282 : vect_store_lanes_supported (vectype
, group_size
,
2285 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2286 overrun_p
= would_overrun_p
;
2289 /* If that fails, try using permuting loads. */
2290 else if (vls_type
== VLS_LOAD
2291 ? vect_grouped_load_supported (vectype
, single_element_p
,
2293 : vect_grouped_store_supported (vectype
, group_size
))
2295 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2296 overrun_p
= would_overrun_p
;
2300 /* As a last resort, trying using a gather load or scatter store.
2302 ??? Although the code can handle all group sizes correctly,
2303 it probably isn't a win to use separate strided accesses based
2304 on nearby locations. Or, even if it's a win over scalar code,
2305 it might not be a win over vectorizing at a lower VF, if that
2306 allows us to use contiguous accesses. */
2307 if (*memory_access_type
== VMAT_ELEMENTWISE
2310 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2312 *memory_access_type
= VMAT_GATHER_SCATTER
;
2315 if (*memory_access_type
== VMAT_GATHER_SCATTER
2316 || *memory_access_type
== VMAT_ELEMENTWISE
)
2318 *alignment_support_scheme
= dr_unaligned_supported
;
2319 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2323 *misalignment
= dr_misalignment (first_dr_info
, vectype
, *poffset
);
2324 *alignment_support_scheme
2325 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, vectype
,
2329 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2331 /* STMT is the leader of the group. Check the operands of all the
2332 stmts of the group. */
2333 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2334 while (next_stmt_info
)
2336 tree op
= vect_get_store_rhs (next_stmt_info
);
2337 enum vect_def_type dt
;
2338 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2340 if (dump_enabled_p ())
2341 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2342 "use not simple.\n");
2345 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2351 gcc_assert (can_overrun_p
);
2352 if (dump_enabled_p ())
2353 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2354 "Data access with gaps requires scalar "
2356 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2362 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2363 if there is a memory access type that the vectorized form can use,
2364 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2365 or scatters, fill in GS_INFO accordingly. In addition
2366 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2367 the target does not support the alignment scheme. *MISALIGNMENT
2368 is set according to the alignment of the access (including
2369 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2371 SLP says whether we're performing SLP rather than loop vectorization.
2372 MASKED_P is true if the statement is conditional on a vectorized mask.
2373 VECTYPE is the vector type that the vectorized statements will use.
2374 NCOPIES is the number of vector statements that will be needed. */
2377 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2378 tree vectype
, slp_tree slp_node
,
2379 bool masked_p
, vec_load_store_type vls_type
,
2380 unsigned int ncopies
,
2381 vect_memory_access_type
*memory_access_type
,
2382 poly_int64
*poffset
,
2383 dr_alignment_support
*alignment_support_scheme
,
2385 gather_scatter_info
*gs_info
)
2387 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2388 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2389 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2391 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2393 *memory_access_type
= VMAT_GATHER_SCATTER
;
2394 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2396 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2397 &gs_info
->offset_dt
,
2398 &gs_info
->offset_vectype
))
2400 if (dump_enabled_p ())
2401 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2402 "%s index use not simple.\n",
2403 vls_type
== VLS_LOAD
? "gather" : "scatter");
2406 else if (gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
2408 if (vls_type
!= VLS_LOAD
)
2410 if (dump_enabled_p ())
2411 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2412 "unsupported emulated scatter.\n");
2415 else if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant ()
2416 || !TYPE_VECTOR_SUBPARTS
2417 (gs_info
->offset_vectype
).is_constant ()
2418 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2419 (gs_info
->offset_vectype
),
2420 TYPE_VECTOR_SUBPARTS (vectype
)))
2422 if (dump_enabled_p ())
2423 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2424 "unsupported vector types for emulated "
2429 /* Gather-scatter accesses perform only component accesses, alignment
2430 is irrelevant for them. */
2431 *alignment_support_scheme
= dr_unaligned_supported
;
2433 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2435 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
,
2437 vls_type
, memory_access_type
, poffset
,
2438 alignment_support_scheme
,
2439 misalignment
, gs_info
))
2442 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2444 gcc_assert (!slp_node
);
2446 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2448 *memory_access_type
= VMAT_GATHER_SCATTER
;
2450 *memory_access_type
= VMAT_ELEMENTWISE
;
2451 /* Alignment is irrelevant here. */
2452 *alignment_support_scheme
= dr_unaligned_supported
;
2456 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2459 gcc_assert (vls_type
== VLS_LOAD
);
2460 *memory_access_type
= VMAT_INVARIANT
;
2461 /* Invariant accesses perform only component accesses, alignment
2462 is irrelevant for them. */
2463 *alignment_support_scheme
= dr_unaligned_supported
;
2468 *memory_access_type
= get_negative_load_store_type
2469 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
, poffset
);
2471 *memory_access_type
= VMAT_CONTIGUOUS
;
2472 *misalignment
= dr_misalignment (STMT_VINFO_DR_INFO (stmt_info
),
2474 *alignment_support_scheme
2475 = vect_supportable_dr_alignment (vinfo
,
2476 STMT_VINFO_DR_INFO (stmt_info
),
2477 vectype
, *misalignment
);
2481 if ((*memory_access_type
== VMAT_ELEMENTWISE
2482 || *memory_access_type
== VMAT_STRIDED_SLP
)
2483 && !nunits
.is_constant ())
2485 if (dump_enabled_p ())
2486 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2487 "Not using elementwise accesses due to variable "
2488 "vectorization factor.\n");
2492 if (*alignment_support_scheme
== dr_unaligned_unsupported
)
2494 if (dump_enabled_p ())
2495 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2496 "unsupported unaligned access\n");
2500 /* FIXME: At the moment the cost model seems to underestimate the
2501 cost of using elementwise accesses. This check preserves the
2502 traditional behavior until that can be fixed. */
2503 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2504 if (!first_stmt_info
)
2505 first_stmt_info
= stmt_info
;
2506 if (*memory_access_type
== VMAT_ELEMENTWISE
2507 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2508 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2509 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2510 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2512 if (dump_enabled_p ())
2513 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2514 "not falling back to elementwise accesses\n");
2520 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2521 conditional operation STMT_INFO. When returning true, store the mask
2522 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2523 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2524 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2527 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2528 slp_tree slp_node
, unsigned mask_index
,
2529 tree
*mask
, slp_tree
*mask_node
,
2530 vect_def_type
*mask_dt_out
, tree
*mask_vectype_out
)
2532 enum vect_def_type mask_dt
;
2534 slp_tree mask_node_1
;
2535 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, mask_index
,
2536 mask
, &mask_node_1
, &mask_dt
, &mask_vectype
))
2538 if (dump_enabled_p ())
2539 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2540 "mask use not simple.\n");
2544 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask
)))
2546 if (dump_enabled_p ())
2547 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2548 "mask argument is not a boolean.\n");
2552 /* If the caller is not prepared for adjusting an external/constant
2553 SLP mask vector type fail. */
2556 && SLP_TREE_DEF_TYPE (mask_node_1
) != vect_internal_def
)
2558 if (dump_enabled_p ())
2559 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2560 "SLP mask argument is not vectorized.\n");
2564 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2566 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2568 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2570 if (dump_enabled_p ())
2571 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2572 "could not find an appropriate vector mask type.\n");
2576 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2577 TYPE_VECTOR_SUBPARTS (vectype
)))
2579 if (dump_enabled_p ())
2580 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2581 "vector mask type %T"
2582 " does not match vector data type %T.\n",
2583 mask_vectype
, vectype
);
2588 *mask_dt_out
= mask_dt
;
2589 *mask_vectype_out
= mask_vectype
;
2591 *mask_node
= mask_node_1
;
2595 /* Return true if stored value RHS is suitable for vectorizing store
2596 statement STMT_INFO. When returning true, store the type of the
2597 definition in *RHS_DT_OUT, the type of the vectorized store value in
2598 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2601 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2602 slp_tree slp_node
, tree rhs
,
2603 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2604 vec_load_store_type
*vls_type_out
)
2606 /* In the case this is a store from a constant make sure
2607 native_encode_expr can handle it. */
2608 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2610 if (dump_enabled_p ())
2611 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2612 "cannot encode constant as a byte sequence.\n");
2617 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2619 if (gimple_call_internal_p (call
)
2620 && internal_store_fn_p (gimple_call_internal_fn (call
)))
2621 op_no
= internal_fn_stored_value_index (gimple_call_internal_fn (call
));
2624 enum vect_def_type rhs_dt
;
2627 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, op_no
,
2628 &rhs
, &slp_op
, &rhs_dt
, &rhs_vectype
))
2630 if (dump_enabled_p ())
2631 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2632 "use not simple.\n");
2636 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2637 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2639 if (dump_enabled_p ())
2640 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2641 "incompatible vector types.\n");
2645 *rhs_dt_out
= rhs_dt
;
2646 *rhs_vectype_out
= rhs_vectype
;
2647 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2648 *vls_type_out
= VLS_STORE_INVARIANT
;
2650 *vls_type_out
= VLS_STORE
;
2654 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2655 Note that we support masks with floating-point type, in which case the
2656 floats are interpreted as a bitmask. */
2659 vect_build_all_ones_mask (vec_info
*vinfo
,
2660 stmt_vec_info stmt_info
, tree masktype
)
2662 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2663 return build_int_cst (masktype
, -1);
2664 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2666 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2667 mask
= build_vector_from_val (masktype
, mask
);
2668 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2670 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2674 for (int j
= 0; j
< 6; ++j
)
2676 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2677 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2678 mask
= build_vector_from_val (masktype
, mask
);
2679 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2684 /* Build an all-zero merge value of type VECTYPE while vectorizing
2685 STMT_INFO as a gather load. */
2688 vect_build_zero_merge_argument (vec_info
*vinfo
,
2689 stmt_vec_info stmt_info
, tree vectype
)
2692 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2693 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2694 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2698 for (int j
= 0; j
< 6; ++j
)
2700 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2701 merge
= build_real (TREE_TYPE (vectype
), r
);
2705 merge
= build_vector_from_val (vectype
, merge
);
2706 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2709 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2710 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2711 the gather load operation. If the load is conditional, MASK is the
2712 unvectorized condition and MASK_DT is its definition type, otherwise
2716 vect_build_gather_load_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2717 gimple_stmt_iterator
*gsi
,
2719 gather_scatter_info
*gs_info
,
2722 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2723 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2724 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2725 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2726 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2727 edge pe
= loop_preheader_edge (loop
);
2728 enum { NARROW
, NONE
, WIDEN
} modifier
;
2729 poly_uint64 gather_off_nunits
2730 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2732 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2733 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2734 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2735 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2736 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2737 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2738 tree scaletype
= TREE_VALUE (arglist
);
2739 tree real_masktype
= masktype
;
2740 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2742 || TREE_CODE (masktype
) == INTEGER_TYPE
2743 || types_compatible_p (srctype
, masktype
)));
2744 if (mask
&& TREE_CODE (masktype
) == INTEGER_TYPE
)
2745 masktype
= truth_type_for (srctype
);
2747 tree mask_halftype
= masktype
;
2748 tree perm_mask
= NULL_TREE
;
2749 tree mask_perm_mask
= NULL_TREE
;
2750 if (known_eq (nunits
, gather_off_nunits
))
2752 else if (known_eq (nunits
* 2, gather_off_nunits
))
2756 /* Currently widening gathers and scatters are only supported for
2757 fixed-length vectors. */
2758 int count
= gather_off_nunits
.to_constant ();
2759 vec_perm_builder
sel (count
, count
, 1);
2760 for (int i
= 0; i
< count
; ++i
)
2761 sel
.quick_push (i
| (count
/ 2));
2763 vec_perm_indices
indices (sel
, 1, count
);
2764 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2767 else if (known_eq (nunits
, gather_off_nunits
* 2))
2771 /* Currently narrowing gathers and scatters are only supported for
2772 fixed-length vectors. */
2773 int count
= nunits
.to_constant ();
2774 vec_perm_builder
sel (count
, count
, 1);
2775 sel
.quick_grow (count
);
2776 for (int i
= 0; i
< count
; ++i
)
2777 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2778 vec_perm_indices
indices (sel
, 2, count
);
2779 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2783 if (mask
&& masktype
== real_masktype
)
2785 for (int i
= 0; i
< count
; ++i
)
2786 sel
[i
] = i
| (count
/ 2);
2787 indices
.new_vector (sel
, 2, count
);
2788 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2791 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2796 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2797 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2799 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2800 if (!is_gimple_min_invariant (ptr
))
2803 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2804 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2805 gcc_assert (!new_bb
);
2808 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2810 tree vec_oprnd0
= NULL_TREE
;
2811 tree vec_mask
= NULL_TREE
;
2812 tree src_op
= NULL_TREE
;
2813 tree mask_op
= NULL_TREE
;
2814 tree prev_res
= NULL_TREE
;
2818 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2819 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2822 auto_vec
<tree
> vec_oprnds0
;
2823 auto_vec
<tree
> vec_masks
;
2824 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2825 modifier
== WIDEN
? ncopies
/ 2 : ncopies
,
2826 gs_info
->offset
, &vec_oprnds0
);
2828 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2829 modifier
== NARROW
? ncopies
/ 2 : ncopies
,
2830 mask
, &vec_masks
, masktype
);
2831 for (int j
= 0; j
< ncopies
; ++j
)
2834 if (modifier
== WIDEN
&& (j
& 1))
2835 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
2836 perm_mask
, stmt_info
, gsi
);
2838 op
= vec_oprnd0
= vec_oprnds0
[modifier
== WIDEN
? j
/ 2 : j
];
2840 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2842 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2843 TYPE_VECTOR_SUBPARTS (idxtype
)));
2844 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2845 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2846 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2847 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2853 if (mask_perm_mask
&& (j
& 1))
2854 mask_op
= permute_vec_elements (vinfo
, mask_op
, mask_op
,
2855 mask_perm_mask
, stmt_info
, gsi
);
2858 if (modifier
== NARROW
)
2861 vec_mask
= vec_masks
[j
/ 2];
2864 vec_mask
= vec_masks
[j
];
2867 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2869 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2870 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2871 gcc_assert (known_eq (sub1
, sub2
));
2872 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2873 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2875 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2876 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2880 if (modifier
== NARROW
&& masktype
!= real_masktype
)
2882 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2884 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2885 : VEC_UNPACK_LO_EXPR
,
2887 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2893 tree mask_arg
= mask_op
;
2894 if (masktype
!= real_masktype
)
2896 tree utype
, optype
= TREE_TYPE (mask_op
);
2897 if (TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2898 utype
= real_masktype
;
2900 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2901 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2902 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2904 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2905 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2907 if (!useless_type_conversion_p (real_masktype
, utype
))
2909 gcc_assert (TYPE_PRECISION (utype
)
2910 <= TYPE_PRECISION (real_masktype
));
2911 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2912 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2913 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2916 src_op
= build_zero_cst (srctype
);
2918 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2921 if (!useless_type_conversion_p (vectype
, rettype
))
2923 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2924 TYPE_VECTOR_SUBPARTS (rettype
)));
2925 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2926 gimple_call_set_lhs (new_stmt
, op
);
2927 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2928 var
= make_ssa_name (vec_dest
);
2929 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2930 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2931 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2935 var
= make_ssa_name (vec_dest
, new_stmt
);
2936 gimple_call_set_lhs (new_stmt
, var
);
2937 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2940 if (modifier
== NARROW
)
2947 var
= permute_vec_elements (vinfo
, prev_res
, var
, perm_mask
,
2949 new_stmt
= SSA_NAME_DEF_STMT (var
);
2952 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
2954 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
2957 /* Prepare the base and offset in GS_INFO for vectorization.
2958 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2959 to the vectorized offset argument for the first copy of STMT_INFO.
2960 STMT_INFO is the statement described by GS_INFO and LOOP is the
2964 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo
,
2965 class loop
*loop
, stmt_vec_info stmt_info
,
2966 gather_scatter_info
*gs_info
,
2967 tree
*dataref_ptr
, vec
<tree
> *vec_offset
)
2969 gimple_seq stmts
= NULL
;
2970 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2974 edge pe
= loop_preheader_edge (loop
);
2975 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2976 gcc_assert (!new_bb
);
2978 unsigned ncopies
= vect_get_num_copies (loop_vinfo
, gs_info
->offset_vectype
);
2979 vect_get_vec_defs_for_operand (loop_vinfo
, stmt_info
, ncopies
,
2980 gs_info
->offset
, vec_offset
,
2981 gs_info
->offset_vectype
);
2984 /* Prepare to implement a grouped or strided load or store using
2985 the gather load or scatter store operation described by GS_INFO.
2986 STMT_INFO is the load or store statement.
2988 Set *DATAREF_BUMP to the amount that should be added to the base
2989 address after each copy of the vectorized statement. Set *VEC_OFFSET
2990 to an invariant offset vector in which element I has the value
2991 I * DR_STEP / SCALE. */
2994 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2995 loop_vec_info loop_vinfo
,
2996 gather_scatter_info
*gs_info
,
2997 tree
*dataref_bump
, tree
*vec_offset
)
2999 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3000 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3002 tree bump
= size_binop (MULT_EXPR
,
3003 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
3004 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
3005 *dataref_bump
= cse_and_gimplify_to_preheader (loop_vinfo
, bump
);
3007 /* The offset given in GS_INFO can have pointer type, so use the element
3008 type of the vector instead. */
3009 tree offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
3011 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3012 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
3013 ssize_int (gs_info
->scale
));
3014 step
= fold_convert (offset_type
, step
);
3016 /* Create {0, X, X*2, X*3, ...}. */
3017 tree offset
= fold_build2 (VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
3018 build_zero_cst (offset_type
), step
);
3019 *vec_offset
= cse_and_gimplify_to_preheader (loop_vinfo
, offset
);
3022 /* Return the amount that should be added to a vector pointer to move
3023 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3024 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3028 vect_get_data_ptr_increment (vec_info
*vinfo
,
3029 dr_vec_info
*dr_info
, tree aggr_type
,
3030 vect_memory_access_type memory_access_type
)
3032 if (memory_access_type
== VMAT_INVARIANT
)
3033 return size_zero_node
;
3035 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3036 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3037 if (tree_int_cst_sgn (step
) == -1)
3038 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3042 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3045 vectorizable_bswap (vec_info
*vinfo
,
3046 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3047 gimple
**vec_stmt
, slp_tree slp_node
,
3049 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3052 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3053 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3056 op
= gimple_call_arg (stmt
, 0);
3057 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3058 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3060 /* Multiple types in SLP are handled by creating the appropriate number of
3061 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3066 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3068 gcc_assert (ncopies
>= 1);
3070 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3074 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3075 unsigned word_bytes
;
3076 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3079 /* The encoding uses one stepped pattern for each byte in the word. */
3080 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3081 for (unsigned i
= 0; i
< 3; ++i
)
3082 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3083 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3085 vec_perm_indices
indices (elts
, 1, num_bytes
);
3086 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
3092 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3094 if (dump_enabled_p ())
3095 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3096 "incompatible vector types for invariants\n");
3100 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3101 DUMP_VECT_SCOPE ("vectorizable_bswap");
3102 record_stmt_cost (cost_vec
,
3103 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3104 record_stmt_cost (cost_vec
,
3106 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
,
3107 vec_perm
, stmt_info
, 0, vect_body
);
3111 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3114 vec
<tree
> vec_oprnds
= vNULL
;
3115 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
3117 /* Arguments are ready. create the new vector stmt. */
3120 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3123 tree tem
= make_ssa_name (char_vectype
);
3124 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3125 char_vectype
, vop
));
3126 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3127 tree tem2
= make_ssa_name (char_vectype
);
3128 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3129 tem
, tem
, bswap_vconst
);
3130 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3131 tem
= make_ssa_name (vectype
);
3132 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3134 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3136 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3138 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3142 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3144 vec_oprnds
.release ();
3148 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3149 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3150 in a single step. On success, store the binary pack code in
3154 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3155 tree_code
*convert_code
)
3157 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3158 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3162 int multi_step_cvt
= 0;
3163 auto_vec
<tree
, 8> interm_types
;
3164 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3165 &code
, &multi_step_cvt
, &interm_types
)
3169 *convert_code
= code
;
3173 /* Function vectorizable_call.
3175 Check if STMT_INFO performs a function call that can be vectorized.
3176 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3177 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3178 Return true if STMT_INFO is vectorizable in this way. */
3181 vectorizable_call (vec_info
*vinfo
,
3182 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3183 gimple
**vec_stmt
, slp_tree slp_node
,
3184 stmt_vector_for_cost
*cost_vec
)
3190 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3191 tree vectype_out
, vectype_in
;
3192 poly_uint64 nunits_in
;
3193 poly_uint64 nunits_out
;
3194 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3195 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3196 tree fndecl
, new_temp
, rhs_type
;
3197 enum vect_def_type dt
[4]
3198 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3199 vect_unknown_def_type
};
3200 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3201 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3202 int ndts
= ARRAY_SIZE (dt
);
3204 auto_vec
<tree
, 8> vargs
;
3205 auto_vec
<tree
, 8> orig_vargs
;
3206 enum { NARROW
, NONE
, WIDEN
} modifier
;
3210 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3213 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3217 /* Is STMT_INFO a vectorizable call? */
3218 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3222 if (gimple_call_internal_p (stmt
)
3223 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3224 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3225 /* Handled by vectorizable_load and vectorizable_store. */
3228 if (gimple_call_lhs (stmt
) == NULL_TREE
3229 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3232 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3234 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3236 /* Process function arguments. */
3237 rhs_type
= NULL_TREE
;
3238 vectype_in
= NULL_TREE
;
3239 nargs
= gimple_call_num_args (stmt
);
3241 /* Bail out if the function has more than four arguments, we do not have
3242 interesting builtin functions to vectorize with more than two arguments
3243 except for fma. No arguments is also not good. */
3244 if (nargs
== 0 || nargs
> 4)
3247 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3248 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3249 if (cfn
== CFN_GOMP_SIMD_LANE
)
3252 rhs_type
= unsigned_type_node
;
3256 if (internal_fn_p (cfn
))
3257 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3259 for (i
= 0; i
< nargs
; i
++)
3261 if ((int) i
== mask_opno
)
3263 if (!vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_opno
,
3264 &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3269 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3270 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3272 if (dump_enabled_p ())
3273 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3274 "use not simple.\n");
3278 /* We can only handle calls with arguments of the same type. */
3280 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3282 if (dump_enabled_p ())
3283 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3284 "argument types differ.\n");
3288 rhs_type
= TREE_TYPE (op
);
3291 vectype_in
= vectypes
[i
];
3292 else if (vectypes
[i
]
3293 && !types_compatible_p (vectypes
[i
], vectype_in
))
3295 if (dump_enabled_p ())
3296 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3297 "argument vector types differ.\n");
3301 /* If all arguments are external or constant defs, infer the vector type
3302 from the scalar type. */
3304 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3306 gcc_assert (vectype_in
);
3309 if (dump_enabled_p ())
3310 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3311 "no vectype for scalar type %T\n", rhs_type
);
3315 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3316 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3317 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3318 by a pack of the two vectors into an SI vector. We would need
3319 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3320 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3322 if (dump_enabled_p ())
3323 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3324 "mismatched vector sizes %T and %T\n",
3325 vectype_in
, vectype_out
);
3329 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3330 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3332 if (dump_enabled_p ())
3333 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3334 "mixed mask and nonmask vector types\n");
3339 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3340 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3341 if (known_eq (nunits_in
* 2, nunits_out
))
3343 else if (known_eq (nunits_out
, nunits_in
))
3345 else if (known_eq (nunits_out
* 2, nunits_in
))
3350 /* We only handle functions that do not read or clobber memory. */
3351 if (gimple_vuse (stmt
))
3353 if (dump_enabled_p ())
3354 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3355 "function reads from or writes to memory.\n");
3359 /* For now, we only vectorize functions if a target specific builtin
3360 is available. TODO -- in some cases, it might be profitable to
3361 insert the calls for pieces of the vector, in order to be able
3362 to vectorize other operations in the loop. */
3364 internal_fn ifn
= IFN_LAST
;
3365 tree callee
= gimple_call_fndecl (stmt
);
3367 /* First try using an internal function. */
3368 tree_code convert_code
= ERROR_MARK
;
3370 && (modifier
== NONE
3371 || (modifier
== NARROW
3372 && simple_integer_narrowing (vectype_out
, vectype_in
,
3374 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3377 /* If that fails, try asking for a target-specific built-in function. */
3378 if (ifn
== IFN_LAST
)
3380 if (cfn
!= CFN_LAST
)
3381 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3382 (cfn
, vectype_out
, vectype_in
);
3383 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3384 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3385 (callee
, vectype_out
, vectype_in
);
3388 if (ifn
== IFN_LAST
&& !fndecl
)
3390 if (cfn
== CFN_GOMP_SIMD_LANE
3393 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3394 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3395 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3396 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3398 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3399 { 0, 1, 2, ... vf - 1 } vector. */
3400 gcc_assert (nargs
== 0);
3402 else if (modifier
== NONE
3403 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3404 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3405 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3406 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3407 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3408 slp_op
, vectype_in
, cost_vec
);
3411 if (dump_enabled_p ())
3412 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3413 "function is not vectorizable.\n");
3420 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3421 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3423 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3425 /* Sanity check: make sure that at least one copy of the vectorized stmt
3426 needs to be generated. */
3427 gcc_assert (ncopies
>= 1);
3429 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3430 if (!vec_stmt
) /* transformation not required. */
3433 for (i
= 0; i
< nargs
; ++i
)
3434 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
], vectype_in
))
3436 if (dump_enabled_p ())
3437 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3438 "incompatible vector types for invariants\n");
3441 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3442 DUMP_VECT_SCOPE ("vectorizable_call");
3443 vect_model_simple_cost (vinfo
, stmt_info
,
3444 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3445 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3446 record_stmt_cost (cost_vec
, ncopies
/ 2,
3447 vec_promote_demote
, stmt_info
, 0, vect_body
);
3449 if (loop_vinfo
&& mask_opno
>= 0)
3451 unsigned int nvectors
= (slp_node
3452 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3454 tree scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3455 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3456 vectype_out
, scalar_mask
);
3463 if (dump_enabled_p ())
3464 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3467 scalar_dest
= gimple_call_lhs (stmt
);
3468 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3470 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3472 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3474 tree prev_res
= NULL_TREE
;
3475 vargs
.safe_grow (nargs
, true);
3476 orig_vargs
.safe_grow (nargs
, true);
3477 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3478 for (j
= 0; j
< ncopies
; ++j
)
3480 /* Build argument list for the vectorized call. */
3483 vec
<tree
> vec_oprnds0
;
3485 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3486 vec_oprnds0
= vec_defs
[0];
3488 /* Arguments are ready. Create the new vector stmt. */
3489 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3492 for (k
= 0; k
< nargs
; k
++)
3494 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3495 vargs
[k
] = vec_oprndsk
[i
];
3498 if (modifier
== NARROW
)
3500 /* We don't define any narrowing conditional functions
3502 gcc_assert (mask_opno
< 0);
3503 tree half_res
= make_ssa_name (vectype_in
);
3505 = gimple_build_call_internal_vec (ifn
, vargs
);
3506 gimple_call_set_lhs (call
, half_res
);
3507 gimple_call_set_nothrow (call
, true);
3508 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3511 prev_res
= half_res
;
3514 new_temp
= make_ssa_name (vec_dest
);
3515 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3516 prev_res
, half_res
);
3517 vect_finish_stmt_generation (vinfo
, stmt_info
,
3522 if (mask_opno
>= 0 && masked_loop_p
)
3524 unsigned int vec_num
= vec_oprnds0
.length ();
3525 /* Always true for SLP. */
3526 gcc_assert (ncopies
== 1);
3527 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3529 vargs
[mask_opno
] = prepare_load_store_mask
3530 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3534 if (ifn
!= IFN_LAST
)
3535 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3537 call
= gimple_build_call_vec (fndecl
, vargs
);
3538 new_temp
= make_ssa_name (vec_dest
, call
);
3539 gimple_call_set_lhs (call
, new_temp
);
3540 gimple_call_set_nothrow (call
, true);
3541 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3544 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3549 for (i
= 0; i
< nargs
; i
++)
3551 op
= gimple_call_arg (stmt
, i
);
3554 vec_defs
.quick_push (vNULL
);
3555 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3559 orig_vargs
[i
] = vargs
[i
] = vec_defs
[i
][j
];
3562 if (mask_opno
>= 0 && masked_loop_p
)
3564 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3567 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3568 vargs
[mask_opno
], gsi
);
3572 if (cfn
== CFN_GOMP_SIMD_LANE
)
3574 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3576 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3577 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3578 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3579 new_temp
= make_ssa_name (vec_dest
);
3580 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3581 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3583 else if (modifier
== NARROW
)
3585 /* We don't define any narrowing conditional functions at
3587 gcc_assert (mask_opno
< 0);
3588 tree half_res
= make_ssa_name (vectype_in
);
3589 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3590 gimple_call_set_lhs (call
, half_res
);
3591 gimple_call_set_nothrow (call
, true);
3592 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3595 prev_res
= half_res
;
3598 new_temp
= make_ssa_name (vec_dest
);
3599 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3600 prev_res
, half_res
);
3601 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3606 if (ifn
!= IFN_LAST
)
3607 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3609 call
= gimple_build_call_vec (fndecl
, vargs
);
3610 new_temp
= make_ssa_name (vec_dest
, call
);
3611 gimple_call_set_lhs (call
, new_temp
);
3612 gimple_call_set_nothrow (call
, true);
3613 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3617 if (j
== (modifier
== NARROW
? 1 : 0))
3618 *vec_stmt
= new_stmt
;
3619 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3621 for (i
= 0; i
< nargs
; i
++)
3623 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3624 vec_oprndsi
.release ();
3627 else if (modifier
== NARROW
)
3629 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3630 /* We don't define any narrowing conditional functions at present. */
3631 gcc_assert (mask_opno
< 0);
3632 for (j
= 0; j
< ncopies
; ++j
)
3634 /* Build argument list for the vectorized call. */
3636 vargs
.create (nargs
* 2);
3642 vec
<tree
> vec_oprnds0
;
3644 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3645 vec_oprnds0
= vec_defs
[0];
3647 /* Arguments are ready. Create the new vector stmt. */
3648 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3652 for (k
= 0; k
< nargs
; k
++)
3654 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3655 vargs
.quick_push (vec_oprndsk
[i
]);
3656 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3659 if (ifn
!= IFN_LAST
)
3660 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3662 call
= gimple_build_call_vec (fndecl
, vargs
);
3663 new_temp
= make_ssa_name (vec_dest
, call
);
3664 gimple_call_set_lhs (call
, new_temp
);
3665 gimple_call_set_nothrow (call
, true);
3666 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3667 SLP_TREE_VEC_STMTS (slp_node
).quick_push (call
);
3672 for (i
= 0; i
< nargs
; i
++)
3674 op
= gimple_call_arg (stmt
, i
);
3677 vec_defs
.quick_push (vNULL
);
3678 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
3679 op
, &vec_defs
[i
], vectypes
[i
]);
3681 vec_oprnd0
= vec_defs
[i
][2*j
];
3682 vec_oprnd1
= vec_defs
[i
][2*j
+1];
3684 vargs
.quick_push (vec_oprnd0
);
3685 vargs
.quick_push (vec_oprnd1
);
3688 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3689 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3690 gimple_call_set_lhs (new_stmt
, new_temp
);
3691 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3693 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3697 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3699 for (i
= 0; i
< nargs
; i
++)
3701 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3702 vec_oprndsi
.release ();
3706 /* No current target implements this case. */
3711 /* The call in STMT might prevent it from being removed in dce.
3712 We however cannot remove it here, due to the way the ssa name
3713 it defines is mapped to the new definition. So just replace
3714 rhs of the statement with something harmless. */
3719 stmt_info
= vect_orig_stmt (stmt_info
);
3720 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3723 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3724 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3730 struct simd_call_arg_info
3734 HOST_WIDE_INT linear_step
;
3735 enum vect_def_type dt
;
3737 bool simd_lane_linear
;
3740 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3741 is linear within simd lane (but not within whole loop), note it in
3745 vect_simd_lane_linear (tree op
, class loop
*loop
,
3746 struct simd_call_arg_info
*arginfo
)
3748 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3750 if (!is_gimple_assign (def_stmt
)
3751 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3752 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3755 tree base
= gimple_assign_rhs1 (def_stmt
);
3756 HOST_WIDE_INT linear_step
= 0;
3757 tree v
= gimple_assign_rhs2 (def_stmt
);
3758 while (TREE_CODE (v
) == SSA_NAME
)
3761 def_stmt
= SSA_NAME_DEF_STMT (v
);
3762 if (is_gimple_assign (def_stmt
))
3763 switch (gimple_assign_rhs_code (def_stmt
))
3766 t
= gimple_assign_rhs2 (def_stmt
);
3767 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3769 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3770 v
= gimple_assign_rhs1 (def_stmt
);
3773 t
= gimple_assign_rhs2 (def_stmt
);
3774 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3776 linear_step
= tree_to_shwi (t
);
3777 v
= gimple_assign_rhs1 (def_stmt
);
3780 t
= gimple_assign_rhs1 (def_stmt
);
3781 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3782 || (TYPE_PRECISION (TREE_TYPE (v
))
3783 < TYPE_PRECISION (TREE_TYPE (t
))))
3792 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3794 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3795 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3800 arginfo
->linear_step
= linear_step
;
3802 arginfo
->simd_lane_linear
= true;
3808 /* Return the number of elements in vector type VECTYPE, which is associated
3809 with a SIMD clone. At present these vectors always have a constant
3812 static unsigned HOST_WIDE_INT
3813 simd_clone_subparts (tree vectype
)
3815 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3818 /* Function vectorizable_simd_clone_call.
3820 Check if STMT_INFO performs a function call that can be vectorized
3821 by calling a simd clone of the function.
3822 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3823 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3824 Return true if STMT_INFO is vectorizable in this way. */
3827 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3828 gimple_stmt_iterator
*gsi
,
3829 gimple
**vec_stmt
, slp_tree slp_node
,
3830 stmt_vector_for_cost
*)
3835 tree vec_oprnd0
= NULL_TREE
;
3838 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3839 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3840 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3841 tree fndecl
, new_temp
;
3843 auto_vec
<simd_call_arg_info
> arginfo
;
3844 vec
<tree
> vargs
= vNULL
;
3846 tree lhs
, rtype
, ratype
;
3847 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3849 /* Is STMT a vectorizable call? */
3850 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3854 fndecl
= gimple_call_fndecl (stmt
);
3855 if (fndecl
== NULL_TREE
)
3858 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3859 if (node
== NULL
|| node
->simd_clones
== NULL
)
3862 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3865 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3869 if (gimple_call_lhs (stmt
)
3870 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3873 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3875 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3877 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3884 /* Process function arguments. */
3885 nargs
= gimple_call_num_args (stmt
);
3887 /* Bail out if the function has zero arguments. */
3891 arginfo
.reserve (nargs
, true);
3893 for (i
= 0; i
< nargs
; i
++)
3895 simd_call_arg_info thisarginfo
;
3898 thisarginfo
.linear_step
= 0;
3899 thisarginfo
.align
= 0;
3900 thisarginfo
.op
= NULL_TREE
;
3901 thisarginfo
.simd_lane_linear
= false;
3903 op
= gimple_call_arg (stmt
, i
);
3904 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3905 &thisarginfo
.vectype
)
3906 || thisarginfo
.dt
== vect_uninitialized_def
)
3908 if (dump_enabled_p ())
3909 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3910 "use not simple.\n");
3914 if (thisarginfo
.dt
== vect_constant_def
3915 || thisarginfo
.dt
== vect_external_def
)
3916 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3919 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3920 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo
.vectype
))
3922 if (dump_enabled_p ())
3923 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3924 "vector mask arguments are not supported\n");
3929 /* For linear arguments, the analyze phase should have saved
3930 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3931 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3932 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3934 gcc_assert (vec_stmt
);
3935 thisarginfo
.linear_step
3936 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3938 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3939 thisarginfo
.simd_lane_linear
3940 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3941 == boolean_true_node
);
3942 /* If loop has been peeled for alignment, we need to adjust it. */
3943 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3944 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3945 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3947 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3948 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3949 tree opt
= TREE_TYPE (thisarginfo
.op
);
3950 bias
= fold_convert (TREE_TYPE (step
), bias
);
3951 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3953 = fold_build2 (POINTER_TYPE_P (opt
)
3954 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3955 thisarginfo
.op
, bias
);
3959 && thisarginfo
.dt
!= vect_constant_def
3960 && thisarginfo
.dt
!= vect_external_def
3962 && TREE_CODE (op
) == SSA_NAME
3963 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3965 && tree_fits_shwi_p (iv
.step
))
3967 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3968 thisarginfo
.op
= iv
.base
;
3970 else if ((thisarginfo
.dt
== vect_constant_def
3971 || thisarginfo
.dt
== vect_external_def
)
3972 && POINTER_TYPE_P (TREE_TYPE (op
)))
3973 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3974 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3976 if (POINTER_TYPE_P (TREE_TYPE (op
))
3977 && !thisarginfo
.linear_step
3979 && thisarginfo
.dt
!= vect_constant_def
3980 && thisarginfo
.dt
!= vect_external_def
3983 && TREE_CODE (op
) == SSA_NAME
)
3984 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3986 arginfo
.quick_push (thisarginfo
);
3989 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3990 if (!vf
.is_constant ())
3992 if (dump_enabled_p ())
3993 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3994 "not considering SIMD clones; not yet supported"
3995 " for variable-width vectors.\n");
3999 unsigned int badness
= 0;
4000 struct cgraph_node
*bestn
= NULL
;
4001 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
4002 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
4004 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4005 n
= n
->simdclone
->next_clone
)
4007 unsigned int this_badness
= 0;
4008 unsigned int num_calls
;
4009 if (!constant_multiple_p (vf
, n
->simdclone
->simdlen
, &num_calls
)
4010 || n
->simdclone
->nargs
!= nargs
)
4013 this_badness
+= exact_log2 (num_calls
) * 4096;
4014 if (n
->simdclone
->inbranch
)
4015 this_badness
+= 8192;
4016 int target_badness
= targetm
.simd_clone
.usable (n
);
4017 if (target_badness
< 0)
4019 this_badness
+= target_badness
* 512;
4020 /* FORNOW: Have to add code to add the mask argument. */
4021 if (n
->simdclone
->inbranch
)
4023 for (i
= 0; i
< nargs
; i
++)
4025 switch (n
->simdclone
->args
[i
].arg_type
)
4027 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4028 if (!useless_type_conversion_p
4029 (n
->simdclone
->args
[i
].orig_type
,
4030 TREE_TYPE (gimple_call_arg (stmt
, i
))))
4032 else if (arginfo
[i
].dt
== vect_constant_def
4033 || arginfo
[i
].dt
== vect_external_def
4034 || arginfo
[i
].linear_step
)
4037 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4038 if (arginfo
[i
].dt
!= vect_constant_def
4039 && arginfo
[i
].dt
!= vect_external_def
)
4042 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4043 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4044 if (arginfo
[i
].dt
== vect_constant_def
4045 || arginfo
[i
].dt
== vect_external_def
4046 || (arginfo
[i
].linear_step
4047 != n
->simdclone
->args
[i
].linear_step
))
4050 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4051 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4052 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4053 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4054 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4055 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4059 case SIMD_CLONE_ARG_TYPE_MASK
:
4062 if (i
== (size_t) -1)
4064 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4069 if (arginfo
[i
].align
)
4070 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4071 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4073 if (i
== (size_t) -1)
4075 if (bestn
== NULL
|| this_badness
< badness
)
4078 badness
= this_badness
;
4085 for (i
= 0; i
< nargs
; i
++)
4086 if ((arginfo
[i
].dt
== vect_constant_def
4087 || arginfo
[i
].dt
== vect_external_def
)
4088 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4090 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
));
4091 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4093 if (arginfo
[i
].vectype
== NULL
4094 || !constant_multiple_p (bestn
->simdclone
->simdlen
,
4095 simd_clone_subparts (arginfo
[i
].vectype
)))
4099 fndecl
= bestn
->decl
;
4100 nunits
= bestn
->simdclone
->simdlen
;
4101 ncopies
= vector_unroll_factor (vf
, nunits
);
4103 /* If the function isn't const, only allow it in simd loops where user
4104 has asserted that at least nunits consecutive iterations can be
4105 performed using SIMD instructions. */
4106 if ((loop
== NULL
|| maybe_lt ((unsigned) loop
->safelen
, nunits
))
4107 && gimple_vuse (stmt
))
4110 /* Sanity check: make sure that at least one copy of the vectorized stmt
4111 needs to be generated. */
4112 gcc_assert (ncopies
>= 1);
4114 if (!vec_stmt
) /* transformation not required. */
4116 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4117 for (i
= 0; i
< nargs
; i
++)
4118 if ((bestn
->simdclone
->args
[i
].arg_type
4119 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4120 || (bestn
->simdclone
->args
[i
].arg_type
4121 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4123 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4126 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4127 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4128 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4129 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4130 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4131 tree sll
= arginfo
[i
].simd_lane_linear
4132 ? boolean_true_node
: boolean_false_node
;
4133 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4135 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4136 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4137 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4138 dt, slp_node, cost_vec); */
4144 if (dump_enabled_p ())
4145 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4148 scalar_dest
= gimple_call_lhs (stmt
);
4149 vec_dest
= NULL_TREE
;
4154 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4155 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4156 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4159 rtype
= TREE_TYPE (ratype
);
4163 auto_vec
<vec
<tree
> > vec_oprnds
;
4164 auto_vec
<unsigned> vec_oprnds_i
;
4165 vec_oprnds
.safe_grow_cleared (nargs
, true);
4166 vec_oprnds_i
.safe_grow_cleared (nargs
, true);
4167 for (j
= 0; j
< ncopies
; ++j
)
4169 /* Build argument list for the vectorized call. */
4171 vargs
.create (nargs
);
4175 for (i
= 0; i
< nargs
; i
++)
4177 unsigned int k
, l
, m
, o
;
4179 op
= gimple_call_arg (stmt
, i
);
4180 switch (bestn
->simdclone
->args
[i
].arg_type
)
4182 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4183 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4184 o
= vector_unroll_factor (nunits
,
4185 simd_clone_subparts (atype
));
4186 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4188 if (simd_clone_subparts (atype
)
4189 < simd_clone_subparts (arginfo
[i
].vectype
))
4191 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4192 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4193 / simd_clone_subparts (atype
));
4194 gcc_assert ((k
& (k
- 1)) == 0);
4197 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4198 ncopies
* o
/ k
, op
,
4200 vec_oprnds_i
[i
] = 0;
4201 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4205 vec_oprnd0
= arginfo
[i
].op
;
4206 if ((m
& (k
- 1)) == 0)
4207 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4209 arginfo
[i
].op
= vec_oprnd0
;
4211 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4213 bitsize_int ((m
& (k
- 1)) * prec
));
4215 = gimple_build_assign (make_ssa_name (atype
),
4217 vect_finish_stmt_generation (vinfo
, stmt_info
,
4219 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4223 k
= (simd_clone_subparts (atype
)
4224 / simd_clone_subparts (arginfo
[i
].vectype
));
4225 gcc_assert ((k
& (k
- 1)) == 0);
4226 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4228 vec_alloc (ctor_elts
, k
);
4231 for (l
= 0; l
< k
; l
++)
4233 if (m
== 0 && l
== 0)
4235 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4239 vec_oprnds_i
[i
] = 0;
4240 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4243 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4244 arginfo
[i
].op
= vec_oprnd0
;
4247 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4251 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0
),
4255 = build1 (VIEW_CONVERT_EXPR
, atype
, vec_oprnd0
);
4257 = gimple_build_assign (make_ssa_name (atype
),
4259 vect_finish_stmt_generation (vinfo
, stmt_info
,
4261 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4264 vargs
.safe_push (vec_oprnd0
);
4267 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4269 = gimple_build_assign (make_ssa_name (atype
),
4271 vect_finish_stmt_generation (vinfo
, stmt_info
,
4273 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4278 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4279 vargs
.safe_push (op
);
4281 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4282 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4287 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4288 &stmts
, true, NULL_TREE
);
4292 edge pe
= loop_preheader_edge (loop
);
4293 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4294 gcc_assert (!new_bb
);
4296 if (arginfo
[i
].simd_lane_linear
)
4298 vargs
.safe_push (arginfo
[i
].op
);
4301 tree phi_res
= copy_ssa_name (op
);
4302 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4303 add_phi_arg (new_phi
, arginfo
[i
].op
,
4304 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4306 = POINTER_TYPE_P (TREE_TYPE (op
))
4307 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4308 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4309 ? sizetype
: TREE_TYPE (op
);
4311 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4313 tree tcst
= wide_int_to_tree (type
, cst
);
4314 tree phi_arg
= copy_ssa_name (op
);
4316 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4317 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4318 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4319 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4321 arginfo
[i
].op
= phi_res
;
4322 vargs
.safe_push (phi_res
);
4327 = POINTER_TYPE_P (TREE_TYPE (op
))
4328 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4329 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4330 ? sizetype
: TREE_TYPE (op
);
4332 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4334 tree tcst
= wide_int_to_tree (type
, cst
);
4335 new_temp
= make_ssa_name (TREE_TYPE (op
));
4337 = gimple_build_assign (new_temp
, code
,
4338 arginfo
[i
].op
, tcst
);
4339 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4340 vargs
.safe_push (new_temp
);
4343 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4344 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4345 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4346 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4347 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4348 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4354 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4358 || known_eq (simd_clone_subparts (rtype
), nunits
));
4360 new_temp
= create_tmp_var (ratype
);
4361 else if (useless_type_conversion_p (vectype
, rtype
))
4362 new_temp
= make_ssa_name (vec_dest
, new_call
);
4364 new_temp
= make_ssa_name (rtype
, new_call
);
4365 gimple_call_set_lhs (new_call
, new_temp
);
4367 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4368 gimple
*new_stmt
= new_call
;
4372 if (!multiple_p (simd_clone_subparts (vectype
), nunits
))
4375 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4376 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4377 k
= vector_unroll_factor (nunits
,
4378 simd_clone_subparts (vectype
));
4379 gcc_assert ((k
& (k
- 1)) == 0);
4380 for (l
= 0; l
< k
; l
++)
4385 t
= build_fold_addr_expr (new_temp
);
4386 t
= build2 (MEM_REF
, vectype
, t
,
4387 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4390 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4391 bitsize_int (prec
), bitsize_int (l
* prec
));
4392 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4393 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4395 if (j
== 0 && l
== 0)
4396 *vec_stmt
= new_stmt
;
4397 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4401 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4404 else if (!multiple_p (nunits
, simd_clone_subparts (vectype
)))
4406 unsigned int k
= (simd_clone_subparts (vectype
)
4407 / simd_clone_subparts (rtype
));
4408 gcc_assert ((k
& (k
- 1)) == 0);
4409 if ((j
& (k
- 1)) == 0)
4410 vec_alloc (ret_ctor_elts
, k
);
4414 o
= vector_unroll_factor (nunits
,
4415 simd_clone_subparts (rtype
));
4416 for (m
= 0; m
< o
; m
++)
4418 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4419 size_int (m
), NULL_TREE
, NULL_TREE
);
4420 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4422 vect_finish_stmt_generation (vinfo
, stmt_info
,
4424 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4425 gimple_assign_lhs (new_stmt
));
4427 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4430 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4431 if ((j
& (k
- 1)) != k
- 1)
4433 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4435 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4436 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4438 if ((unsigned) j
== k
- 1)
4439 *vec_stmt
= new_stmt
;
4440 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4445 tree t
= build_fold_addr_expr (new_temp
);
4446 t
= build2 (MEM_REF
, vectype
, t
,
4447 build_int_cst (TREE_TYPE (t
), 0));
4448 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4449 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4450 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4452 else if (!useless_type_conversion_p (vectype
, rtype
))
4454 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
4456 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4457 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4462 *vec_stmt
= new_stmt
;
4463 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4466 for (i
= 0; i
< nargs
; ++i
)
4468 vec
<tree
> oprndsi
= vec_oprnds
[i
];
4473 /* The call in STMT might prevent it from being removed in dce.
4474 We however cannot remove it here, due to the way the ssa name
4475 it defines is mapped to the new definition. So just replace
4476 rhs of the statement with something harmless. */
4484 type
= TREE_TYPE (scalar_dest
);
4485 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4486 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4489 new_stmt
= gimple_build_nop ();
4490 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4491 unlink_stmt_vdef (stmt
);
4497 /* Function vect_gen_widened_results_half
4499 Create a vector stmt whose code, type, number of arguments, and result
4500 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4501 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4502 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4503 needs to be created (DECL is a function-decl of a target-builtin).
4504 STMT_INFO is the original scalar stmt that we are vectorizing. */
4507 vect_gen_widened_results_half (vec_info
*vinfo
, enum tree_code code
,
4508 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4509 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4510 stmt_vec_info stmt_info
)
4515 /* Generate half of the widened result: */
4516 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4517 if (op_type
!= binary_op
)
4519 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4520 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4521 gimple_assign_set_lhs (new_stmt
, new_temp
);
4522 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4528 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4529 For multi-step conversions store the resulting vectors and call the function
4533 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4535 stmt_vec_info stmt_info
,
4536 vec
<tree
> &vec_dsts
,
4537 gimple_stmt_iterator
*gsi
,
4538 slp_tree slp_node
, enum tree_code code
)
4541 tree vop0
, vop1
, new_tmp
, vec_dest
;
4543 vec_dest
= vec_dsts
.pop ();
4545 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4547 /* Create demotion operation. */
4548 vop0
= (*vec_oprnds
)[i
];
4549 vop1
= (*vec_oprnds
)[i
+ 1];
4550 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4551 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4552 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4553 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4556 /* Store the resulting vector for next recursive call. */
4557 (*vec_oprnds
)[i
/2] = new_tmp
;
4560 /* This is the last step of the conversion sequence. Store the
4561 vectors in SLP_NODE or in vector info of the scalar statement
4562 (or in STMT_VINFO_RELATED_STMT chain). */
4564 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4566 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4570 /* For multi-step demotion operations we first generate demotion operations
4571 from the source type to the intermediate types, and then combine the
4572 results (stored in VEC_OPRNDS) in demotion operation to the destination
4576 /* At each level of recursion we have half of the operands we had at the
4578 vec_oprnds
->truncate ((i
+1)/2);
4579 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
4581 stmt_info
, vec_dsts
, gsi
,
4582 slp_node
, VEC_PACK_TRUNC_EXPR
);
4585 vec_dsts
.quick_push (vec_dest
);
4589 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4590 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4591 STMT_INFO. For multi-step conversions store the resulting vectors and
4592 call the function recursively. */
4595 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
4596 vec
<tree
> *vec_oprnds0
,
4597 vec
<tree
> *vec_oprnds1
,
4598 stmt_vec_info stmt_info
, tree vec_dest
,
4599 gimple_stmt_iterator
*gsi
,
4600 enum tree_code code1
,
4601 enum tree_code code2
, int op_type
)
4604 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4605 gimple
*new_stmt1
, *new_stmt2
;
4606 vec
<tree
> vec_tmp
= vNULL
;
4608 vec_tmp
.create (vec_oprnds0
->length () * 2);
4609 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4611 if (op_type
== binary_op
)
4612 vop1
= (*vec_oprnds1
)[i
];
4616 /* Generate the two halves of promotion operation. */
4617 new_stmt1
= vect_gen_widened_results_half (vinfo
, code1
, vop0
, vop1
,
4618 op_type
, vec_dest
, gsi
,
4620 new_stmt2
= vect_gen_widened_results_half (vinfo
, code2
, vop0
, vop1
,
4621 op_type
, vec_dest
, gsi
,
4623 if (is_gimple_call (new_stmt1
))
4625 new_tmp1
= gimple_call_lhs (new_stmt1
);
4626 new_tmp2
= gimple_call_lhs (new_stmt2
);
4630 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4631 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4634 /* Store the results for the next step. */
4635 vec_tmp
.quick_push (new_tmp1
);
4636 vec_tmp
.quick_push (new_tmp2
);
4639 vec_oprnds0
->release ();
4640 *vec_oprnds0
= vec_tmp
;
4643 /* Create vectorized promotion stmts for widening stmts using only half the
4644 potential vector size for input. */
4646 vect_create_half_widening_stmts (vec_info
*vinfo
,
4647 vec
<tree
> *vec_oprnds0
,
4648 vec
<tree
> *vec_oprnds1
,
4649 stmt_vec_info stmt_info
, tree vec_dest
,
4650 gimple_stmt_iterator
*gsi
,
4651 enum tree_code code1
,
4659 vec
<tree
> vec_tmp
= vNULL
;
4661 vec_tmp
.create (vec_oprnds0
->length ());
4662 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4664 tree new_tmp1
, new_tmp2
, new_tmp3
, out_type
;
4666 gcc_assert (op_type
== binary_op
);
4667 vop1
= (*vec_oprnds1
)[i
];
4669 /* Widen the first vector input. */
4670 out_type
= TREE_TYPE (vec_dest
);
4671 new_tmp1
= make_ssa_name (out_type
);
4672 new_stmt1
= gimple_build_assign (new_tmp1
, NOP_EXPR
, vop0
);
4673 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt1
, gsi
);
4674 if (VECTOR_TYPE_P (TREE_TYPE (vop1
)))
4676 /* Widen the second vector input. */
4677 new_tmp2
= make_ssa_name (out_type
);
4678 new_stmt2
= gimple_build_assign (new_tmp2
, NOP_EXPR
, vop1
);
4679 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt2
, gsi
);
4680 /* Perform the operation. With both vector inputs widened. */
4681 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, new_tmp2
);
4685 /* Perform the operation. With the single vector input widened. */
4686 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, vop1
);
4689 new_tmp3
= make_ssa_name (vec_dest
, new_stmt3
);
4690 gimple_assign_set_lhs (new_stmt3
, new_tmp3
);
4691 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt3
, gsi
);
4693 /* Store the results for the next step. */
4694 vec_tmp
.quick_push (new_tmp3
);
4697 vec_oprnds0
->release ();
4698 *vec_oprnds0
= vec_tmp
;
4702 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4703 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4704 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4705 Return true if STMT_INFO is vectorizable in this way. */
4708 vectorizable_conversion (vec_info
*vinfo
,
4709 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4710 gimple
**vec_stmt
, slp_tree slp_node
,
4711 stmt_vector_for_cost
*cost_vec
)
4715 tree op0
, op1
= NULL_TREE
;
4716 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4717 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4718 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4720 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4722 poly_uint64 nunits_in
;
4723 poly_uint64 nunits_out
;
4724 tree vectype_out
, vectype_in
;
4726 tree lhs_type
, rhs_type
;
4727 enum { NARROW
, NONE
, WIDEN
} modifier
;
4728 vec
<tree
> vec_oprnds0
= vNULL
;
4729 vec
<tree
> vec_oprnds1
= vNULL
;
4731 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4732 int multi_step_cvt
= 0;
4733 vec
<tree
> interm_types
= vNULL
;
4734 tree intermediate_type
, cvt_type
= NULL_TREE
;
4736 unsigned short fltsz
;
4738 /* Is STMT a vectorizable conversion? */
4740 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4743 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4747 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4751 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4754 code
= gimple_assign_rhs_code (stmt
);
4755 if (!CONVERT_EXPR_CODE_P (code
)
4756 && code
!= FIX_TRUNC_EXPR
4757 && code
!= FLOAT_EXPR
4758 && code
!= WIDEN_PLUS_EXPR
4759 && code
!= WIDEN_MINUS_EXPR
4760 && code
!= WIDEN_MULT_EXPR
4761 && code
!= WIDEN_LSHIFT_EXPR
)
4764 bool widen_arith
= (code
== WIDEN_PLUS_EXPR
4765 || code
== WIDEN_MINUS_EXPR
4766 || code
== WIDEN_MULT_EXPR
4767 || code
== WIDEN_LSHIFT_EXPR
);
4768 op_type
= TREE_CODE_LENGTH (code
);
4770 /* Check types of lhs and rhs. */
4771 scalar_dest
= gimple_assign_lhs (stmt
);
4772 lhs_type
= TREE_TYPE (scalar_dest
);
4773 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4775 /* Check the operands of the operation. */
4776 slp_tree slp_op0
, slp_op1
= NULL
;
4777 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
4778 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
4780 if (dump_enabled_p ())
4781 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4782 "use not simple.\n");
4786 rhs_type
= TREE_TYPE (op0
);
4787 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4788 && !((INTEGRAL_TYPE_P (lhs_type
)
4789 && INTEGRAL_TYPE_P (rhs_type
))
4790 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4791 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4794 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4795 && ((INTEGRAL_TYPE_P (lhs_type
)
4796 && !type_has_mode_precision_p (lhs_type
))
4797 || (INTEGRAL_TYPE_P (rhs_type
)
4798 && !type_has_mode_precision_p (rhs_type
))))
4800 if (dump_enabled_p ())
4801 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4802 "type conversion to/from bit-precision unsupported."
4807 if (op_type
== binary_op
)
4809 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
4810 || code
== WIDEN_PLUS_EXPR
|| code
== WIDEN_MINUS_EXPR
);
4812 op1
= gimple_assign_rhs2 (stmt
);
4814 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
4815 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
4817 if (dump_enabled_p ())
4818 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4819 "use not simple.\n");
4822 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4825 vectype_in
= vectype1_in
;
4828 /* If op0 is an external or constant def, infer the vector type
4829 from the scalar type. */
4831 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
4833 gcc_assert (vectype_in
);
4836 if (dump_enabled_p ())
4837 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4838 "no vectype for scalar type %T\n", rhs_type
);
4843 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4844 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4846 if (dump_enabled_p ())
4847 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4848 "can't convert between boolean and non "
4849 "boolean vectors %T\n", rhs_type
);
4854 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4855 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4856 if (known_eq (nunits_out
, nunits_in
))
4861 else if (multiple_p (nunits_out
, nunits_in
))
4865 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4869 /* Multiple types in SLP are handled by creating the appropriate number of
4870 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4874 else if (modifier
== NARROW
)
4875 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4877 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4879 /* Sanity check: make sure that at least one copy of the vectorized stmt
4880 needs to be generated. */
4881 gcc_assert (ncopies
>= 1);
4883 bool found_mode
= false;
4884 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4885 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4886 opt_scalar_mode rhs_mode_iter
;
4888 /* Supportable by target? */
4892 if (code
!= FIX_TRUNC_EXPR
4893 && code
!= FLOAT_EXPR
4894 && !CONVERT_EXPR_CODE_P (code
))
4896 if (supportable_convert_operation (code
, vectype_out
, vectype_in
, &code1
))
4900 if (dump_enabled_p ())
4901 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4902 "conversion not supported by target.\n");
4906 if (known_eq (nunits_in
, nunits_out
))
4908 if (!supportable_half_widening_operation (code
, vectype_out
,
4909 vectype_in
, &code1
))
4911 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4914 if (supportable_widening_operation (vinfo
, code
, stmt_info
,
4915 vectype_out
, vectype_in
, &code1
,
4916 &code2
, &multi_step_cvt
,
4919 /* Binary widening operation can only be supported directly by the
4921 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4925 if (code
!= FLOAT_EXPR
4926 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4929 fltsz
= GET_MODE_SIZE (lhs_mode
);
4930 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4932 rhs_mode
= rhs_mode_iter
.require ();
4933 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4937 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4938 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4939 if (cvt_type
== NULL_TREE
)
4942 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4944 if (!supportable_convert_operation (code
, vectype_out
,
4945 cvt_type
, &codecvt1
))
4948 else if (!supportable_widening_operation (vinfo
, code
, stmt_info
,
4949 vectype_out
, cvt_type
,
4950 &codecvt1
, &codecvt2
,
4955 gcc_assert (multi_step_cvt
== 0);
4957 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
4959 vectype_in
, &code1
, &code2
,
4960 &multi_step_cvt
, &interm_types
))
4970 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4971 codecvt2
= ERROR_MARK
;
4975 interm_types
.safe_push (cvt_type
);
4976 cvt_type
= NULL_TREE
;
4981 gcc_assert (op_type
== unary_op
);
4982 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4983 &code1
, &multi_step_cvt
,
4987 if (code
!= FIX_TRUNC_EXPR
4988 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4992 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4993 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4994 if (cvt_type
== NULL_TREE
)
4996 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4999 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
5000 &code1
, &multi_step_cvt
,
5009 if (!vec_stmt
) /* transformation not required. */
5012 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
5013 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
5015 if (dump_enabled_p ())
5016 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5017 "incompatible vector types for invariants\n");
5020 DUMP_VECT_SCOPE ("vectorizable_conversion");
5021 if (modifier
== NONE
)
5023 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
5024 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5027 else if (modifier
== NARROW
)
5029 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
5030 /* The final packing step produces one vector result per copy. */
5031 unsigned int nvectors
5032 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
5033 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5034 multi_step_cvt
, cost_vec
,
5039 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5040 /* The initial unpacking step produces two vector results
5041 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5042 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5043 unsigned int nvectors
5045 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5047 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5048 multi_step_cvt
, cost_vec
,
5051 interm_types
.release ();
5056 if (dump_enabled_p ())
5057 dump_printf_loc (MSG_NOTE
, vect_location
,
5058 "transform conversion. ncopies = %d.\n", ncopies
);
5060 if (op_type
== binary_op
)
5062 if (CONSTANT_CLASS_P (op0
))
5063 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5064 else if (CONSTANT_CLASS_P (op1
))
5065 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5068 /* In case of multi-step conversion, we first generate conversion operations
5069 to the intermediate types, and then from that types to the final one.
5070 We create vector destinations for the intermediate type (TYPES) received
5071 from supportable_*_operation, and store them in the correct order
5072 for future use in vect_create_vectorized_*_stmts (). */
5073 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5074 vec_dest
= vect_create_destination_var (scalar_dest
,
5075 (cvt_type
&& modifier
== WIDEN
)
5076 ? cvt_type
: vectype_out
);
5077 vec_dsts
.quick_push (vec_dest
);
5081 for (i
= interm_types
.length () - 1;
5082 interm_types
.iterate (i
, &intermediate_type
); i
--)
5084 vec_dest
= vect_create_destination_var (scalar_dest
,
5086 vec_dsts
.quick_push (vec_dest
);
5091 vec_dest
= vect_create_destination_var (scalar_dest
,
5093 ? vectype_out
: cvt_type
);
5098 if (modifier
== WIDEN
)
5100 else if (modifier
== NARROW
)
5103 ninputs
= vect_pow2 (multi_step_cvt
);
5111 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5113 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5115 /* Arguments are ready, create the new vector stmt. */
5116 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5117 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
5118 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5119 gimple_assign_set_lhs (new_stmt
, new_temp
);
5120 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5123 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5125 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5130 /* In case the vectorization factor (VF) is bigger than the number
5131 of elements that we can fit in a vectype (nunits), we have to
5132 generate more than one vector stmt - i.e - we need to "unroll"
5133 the vector stmt by a factor VF/nunits. */
5134 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5136 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
5138 if (code
== WIDEN_LSHIFT_EXPR
)
5140 int oprnds_size
= vec_oprnds0
.length ();
5141 vec_oprnds1
.create (oprnds_size
);
5142 for (i
= 0; i
< oprnds_size
; ++i
)
5143 vec_oprnds1
.quick_push (op1
);
5145 /* Arguments are ready. Create the new vector stmts. */
5146 for (i
= multi_step_cvt
; i
>= 0; i
--)
5148 tree this_dest
= vec_dsts
[i
];
5149 enum tree_code c1
= code1
, c2
= code2
;
5150 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5155 if (known_eq (nunits_out
, nunits_in
))
5156 vect_create_half_widening_stmts (vinfo
, &vec_oprnds0
,
5157 &vec_oprnds1
, stmt_info
,
5161 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5162 &vec_oprnds1
, stmt_info
,
5167 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5172 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5173 new_temp
= make_ssa_name (vec_dest
);
5174 new_stmt
= gimple_build_assign (new_temp
, codecvt1
, vop0
);
5175 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5178 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
5181 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5183 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5188 /* In case the vectorization factor (VF) is bigger than the number
5189 of elements that we can fit in a vectype (nunits), we have to
5190 generate more than one vector stmt - i.e - we need to "unroll"
5191 the vector stmt by a factor VF/nunits. */
5192 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5194 /* Arguments are ready. Create the new vector stmts. */
5196 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5198 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5199 new_temp
= make_ssa_name (vec_dest
);
5201 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5202 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5203 vec_oprnds0
[i
] = new_temp
;
5206 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5208 stmt_info
, vec_dsts
, gsi
,
5213 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5215 vec_oprnds0
.release ();
5216 vec_oprnds1
.release ();
5217 interm_types
.release ();
5222 /* Return true if we can assume from the scalar form of STMT_INFO that
5223 neither the scalar nor the vector forms will generate code. STMT_INFO
5224 is known not to involve a data reference. */
5227 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5229 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5233 tree lhs
= gimple_assign_lhs (stmt
);
5234 tree_code code
= gimple_assign_rhs_code (stmt
);
5235 tree rhs
= gimple_assign_rhs1 (stmt
);
5237 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5240 if (CONVERT_EXPR_CODE_P (code
))
5241 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5246 /* Function vectorizable_assignment.
5248 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5249 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5250 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5251 Return true if STMT_INFO is vectorizable in this way. */
5254 vectorizable_assignment (vec_info
*vinfo
,
5255 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5256 gimple
**vec_stmt
, slp_tree slp_node
,
5257 stmt_vector_for_cost
*cost_vec
)
5262 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5264 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5268 vec
<tree
> vec_oprnds
= vNULL
;
5270 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5271 enum tree_code code
;
5274 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5277 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5281 /* Is vectorizable assignment? */
5282 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5286 scalar_dest
= gimple_assign_lhs (stmt
);
5287 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5290 if (STMT_VINFO_DATA_REF (stmt_info
))
5293 code
= gimple_assign_rhs_code (stmt
);
5294 if (!(gimple_assign_single_p (stmt
)
5295 || code
== PAREN_EXPR
5296 || CONVERT_EXPR_CODE_P (code
)))
5299 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5300 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5302 /* Multiple types in SLP are handled by creating the appropriate number of
5303 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5308 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5310 gcc_assert (ncopies
>= 1);
5313 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5314 &dt
[0], &vectype_in
))
5316 if (dump_enabled_p ())
5317 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5318 "use not simple.\n");
5322 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5324 /* We can handle NOP_EXPR conversions that do not change the number
5325 of elements or the vector size. */
5326 if ((CONVERT_EXPR_CODE_P (code
)
5327 || code
== VIEW_CONVERT_EXPR
)
5329 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5330 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5331 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5334 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
5335 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5337 if (dump_enabled_p ())
5338 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5339 "can't convert between boolean and non "
5340 "boolean vectors %T\n", TREE_TYPE (op
));
5345 /* We do not handle bit-precision changes. */
5346 if ((CONVERT_EXPR_CODE_P (code
)
5347 || code
== VIEW_CONVERT_EXPR
)
5348 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5349 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5350 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5351 /* But a conversion that does not change the bit-pattern is ok. */
5352 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5353 > TYPE_PRECISION (TREE_TYPE (op
)))
5354 && TYPE_UNSIGNED (TREE_TYPE (op
))))
5356 if (dump_enabled_p ())
5357 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5358 "type conversion to/from bit-precision "
5363 if (!vec_stmt
) /* transformation not required. */
5366 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
5368 if (dump_enabled_p ())
5369 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5370 "incompatible vector types for invariants\n");
5373 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5374 DUMP_VECT_SCOPE ("vectorizable_assignment");
5375 if (!vect_nop_conversion_p (stmt_info
))
5376 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5382 if (dump_enabled_p ())
5383 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5386 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5389 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
5391 /* Arguments are ready. create the new vector stmt. */
5392 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5394 if (CONVERT_EXPR_CODE_P (code
)
5395 || code
== VIEW_CONVERT_EXPR
)
5396 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5397 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5398 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5399 gimple_assign_set_lhs (new_stmt
, new_temp
);
5400 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5402 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5404 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5407 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5409 vec_oprnds
.release ();
5414 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5415 either as shift by a scalar or by a vector. */
5418 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5421 machine_mode vec_mode
;
5426 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5430 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5432 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5434 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5436 || (optab_handler (optab
, TYPE_MODE (vectype
))
5437 == CODE_FOR_nothing
))
5441 vec_mode
= TYPE_MODE (vectype
);
5442 icode
= (int) optab_handler (optab
, vec_mode
);
5443 if (icode
== CODE_FOR_nothing
)
5450 /* Function vectorizable_shift.
5452 Check if STMT_INFO performs a shift operation that can be vectorized.
5453 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5454 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5455 Return true if STMT_INFO is vectorizable in this way. */
5458 vectorizable_shift (vec_info
*vinfo
,
5459 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5460 gimple
**vec_stmt
, slp_tree slp_node
,
5461 stmt_vector_for_cost
*cost_vec
)
5465 tree op0
, op1
= NULL
;
5466 tree vec_oprnd1
= NULL_TREE
;
5468 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5469 enum tree_code code
;
5470 machine_mode vec_mode
;
5474 machine_mode optab_op2_mode
;
5475 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5477 poly_uint64 nunits_in
;
5478 poly_uint64 nunits_out
;
5483 vec
<tree
> vec_oprnds0
= vNULL
;
5484 vec
<tree
> vec_oprnds1
= vNULL
;
5487 bool scalar_shift_arg
= true;
5488 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5489 bool incompatible_op1_vectype_p
= false;
5491 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5494 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5495 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5499 /* Is STMT a vectorizable binary/unary operation? */
5500 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5504 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5507 code
= gimple_assign_rhs_code (stmt
);
5509 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5510 || code
== RROTATE_EXPR
))
5513 scalar_dest
= gimple_assign_lhs (stmt
);
5514 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5515 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5517 if (dump_enabled_p ())
5518 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5519 "bit-precision shifts not supported.\n");
5524 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5525 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5527 if (dump_enabled_p ())
5528 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5529 "use not simple.\n");
5532 /* If op0 is an external or constant def, infer the vector type
5533 from the scalar type. */
5535 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
5537 gcc_assert (vectype
);
5540 if (dump_enabled_p ())
5541 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5542 "no vectype for scalar type\n");
5546 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5547 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5548 if (maybe_ne (nunits_out
, nunits_in
))
5551 stmt_vec_info op1_def_stmt_info
;
5553 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
5554 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
5556 if (dump_enabled_p ())
5557 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5558 "use not simple.\n");
5562 /* Multiple types in SLP are handled by creating the appropriate number of
5563 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5568 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5570 gcc_assert (ncopies
>= 1);
5572 /* Determine whether the shift amount is a vector, or scalar. If the
5573 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5575 if ((dt
[1] == vect_internal_def
5576 || dt
[1] == vect_induction_def
5577 || dt
[1] == vect_nested_cycle
)
5579 scalar_shift_arg
= false;
5580 else if (dt
[1] == vect_constant_def
5581 || dt
[1] == vect_external_def
5582 || dt
[1] == vect_internal_def
)
5584 /* In SLP, need to check whether the shift count is the same,
5585 in loops if it is a constant or invariant, it is always
5589 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5590 stmt_vec_info slpstmt_info
;
5592 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5594 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5595 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5596 scalar_shift_arg
= false;
5599 /* For internal SLP defs we have to make sure we see scalar stmts
5600 for all vector elements.
5601 ??? For different vectors we could resort to a different
5602 scalar shift operand but code-generation below simply always
5604 if (dt
[1] == vect_internal_def
5605 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5607 scalar_shift_arg
= false;
5610 /* If the shift amount is computed by a pattern stmt we cannot
5611 use the scalar amount directly thus give up and use a vector
5613 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5614 scalar_shift_arg
= false;
5618 if (dump_enabled_p ())
5619 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5620 "operand mode requires invariant argument.\n");
5624 /* Vector shifted by vector. */
5625 bool was_scalar_shift_arg
= scalar_shift_arg
;
5626 if (!scalar_shift_arg
)
5628 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5629 if (dump_enabled_p ())
5630 dump_printf_loc (MSG_NOTE
, vect_location
,
5631 "vector/vector shift/rotate found.\n");
5634 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
5636 incompatible_op1_vectype_p
5637 = (op1_vectype
== NULL_TREE
5638 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5639 TYPE_VECTOR_SUBPARTS (vectype
))
5640 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5641 if (incompatible_op1_vectype_p
5643 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
5644 || slp_op1
->refcnt
!= 1))
5646 if (dump_enabled_p ())
5647 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5648 "unusable type for last operand in"
5649 " vector/vector shift/rotate.\n");
5653 /* See if the machine has a vector shifted by scalar insn and if not
5654 then see if it has a vector shifted by vector insn. */
5657 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5659 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5661 if (dump_enabled_p ())
5662 dump_printf_loc (MSG_NOTE
, vect_location
,
5663 "vector/scalar shift/rotate found.\n");
5667 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5669 && (optab_handler (optab
, TYPE_MODE (vectype
))
5670 != CODE_FOR_nothing
))
5672 scalar_shift_arg
= false;
5674 if (dump_enabled_p ())
5675 dump_printf_loc (MSG_NOTE
, vect_location
,
5676 "vector/vector shift/rotate found.\n");
5679 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
5683 /* Unlike the other binary operators, shifts/rotates have
5684 the rhs being int, instead of the same type as the lhs,
5685 so make sure the scalar is the right type if we are
5686 dealing with vectors of long long/long/short/char. */
5687 incompatible_op1_vectype_p
5689 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
5691 if (incompatible_op1_vectype_p
5692 && dt
[1] == vect_internal_def
)
5694 if (dump_enabled_p ())
5695 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5696 "unusable type for last operand in"
5697 " vector/vector shift/rotate.\n");
5704 /* Supportable by target? */
5707 if (dump_enabled_p ())
5708 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5712 vec_mode
= TYPE_MODE (vectype
);
5713 icode
= (int) optab_handler (optab
, vec_mode
);
5714 if (icode
== CODE_FOR_nothing
)
5716 if (dump_enabled_p ())
5717 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5718 "op not supported by target.\n");
5721 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5722 if (vect_emulated_vector_p (vectype
))
5725 if (!vec_stmt
) /* transformation not required. */
5728 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5729 || ((!scalar_shift_arg
|| dt
[1] == vect_internal_def
)
5730 && (!incompatible_op1_vectype_p
5731 || dt
[1] == vect_constant_def
)
5732 && !vect_maybe_update_slp_op_vectype
5734 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
5736 if (dump_enabled_p ())
5737 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5738 "incompatible vector types for invariants\n");
5741 /* Now adjust the constant shift amount in place. */
5743 && incompatible_op1_vectype_p
5744 && dt
[1] == vect_constant_def
)
5746 for (unsigned i
= 0;
5747 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
5749 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
5750 = fold_convert (TREE_TYPE (vectype
),
5751 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
5752 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
5756 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5757 DUMP_VECT_SCOPE ("vectorizable_shift");
5758 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
5759 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
5765 if (dump_enabled_p ())
5766 dump_printf_loc (MSG_NOTE
, vect_location
,
5767 "transform binary/unary operation.\n");
5769 if (incompatible_op1_vectype_p
&& !slp_node
)
5771 gcc_assert (!scalar_shift_arg
&& was_scalar_shift_arg
);
5772 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5773 if (dt
[1] != vect_constant_def
)
5774 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
5775 TREE_TYPE (vectype
), NULL
);
5779 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5781 if (scalar_shift_arg
&& dt
[1] != vect_internal_def
)
5783 /* Vector shl and shr insn patterns can be defined with scalar
5784 operand 2 (shift operand). In this case, use constant or loop
5785 invariant op1 directly, without extending it to vector mode
5787 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5788 if (!VECTOR_MODE_P (optab_op2_mode
))
5790 if (dump_enabled_p ())
5791 dump_printf_loc (MSG_NOTE
, vect_location
,
5792 "operand 1 using scalar mode.\n");
5794 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
5795 vec_oprnds1
.quick_push (vec_oprnd1
);
5796 /* Store vec_oprnd1 for every vector stmt to be created.
5797 We check during the analysis that all the shift arguments
5799 TODO: Allow different constants for different vector
5800 stmts generated for an SLP instance. */
5802 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
5803 vec_oprnds1
.quick_push (vec_oprnd1
);
5806 else if (!scalar_shift_arg
&& slp_node
&& incompatible_op1_vectype_p
)
5808 if (was_scalar_shift_arg
)
5810 /* If the argument was the same in all lanes create
5811 the correctly typed vector shift amount directly. */
5812 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5813 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
5814 !loop_vinfo
? gsi
: NULL
);
5815 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
5816 !loop_vinfo
? gsi
: NULL
);
5817 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5818 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
5819 vec_oprnds1
.quick_push (vec_oprnd1
);
5821 else if (dt
[1] == vect_constant_def
)
5822 /* The constant shift amount has been adjusted in place. */
5825 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
5828 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5829 (a special case for certain kind of vector shifts); otherwise,
5830 operand 1 should be of a vector type (the usual case). */
5831 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5833 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
5835 /* Arguments are ready. Create the new vector stmt. */
5836 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5838 /* For internal defs where we need to use a scalar shift arg
5839 extract the first lane. */
5840 if (scalar_shift_arg
&& dt
[1] == vect_internal_def
)
5842 vop1
= vec_oprnds1
[0];
5843 new_temp
= make_ssa_name (TREE_TYPE (TREE_TYPE (vop1
)));
5845 = gimple_build_assign (new_temp
,
5846 build3 (BIT_FIELD_REF
, TREE_TYPE (new_temp
),
5848 TYPE_SIZE (TREE_TYPE (new_temp
)),
5849 bitsize_zero_node
));
5850 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5854 vop1
= vec_oprnds1
[i
];
5855 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5856 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5857 gimple_assign_set_lhs (new_stmt
, new_temp
);
5858 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5860 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5862 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5866 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5868 vec_oprnds0
.release ();
5869 vec_oprnds1
.release ();
5875 /* Function vectorizable_operation.
5877 Check if STMT_INFO performs a binary, unary or ternary operation that can
5879 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5880 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5881 Return true if STMT_INFO is vectorizable in this way. */
5884 vectorizable_operation (vec_info
*vinfo
,
5885 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5886 gimple
**vec_stmt
, slp_tree slp_node
,
5887 stmt_vector_for_cost
*cost_vec
)
5891 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5893 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5894 enum tree_code code
, orig_code
;
5895 machine_mode vec_mode
;
5899 bool target_support_p
;
5900 enum vect_def_type dt
[3]
5901 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5903 poly_uint64 nunits_in
;
5904 poly_uint64 nunits_out
;
5906 int ncopies
, vec_num
;
5908 vec
<tree
> vec_oprnds0
= vNULL
;
5909 vec
<tree
> vec_oprnds1
= vNULL
;
5910 vec
<tree
> vec_oprnds2
= vNULL
;
5911 tree vop0
, vop1
, vop2
;
5912 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5914 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5917 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5921 /* Is STMT a vectorizable binary/unary operation? */
5922 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5926 /* Loads and stores are handled in vectorizable_{load,store}. */
5927 if (STMT_VINFO_DATA_REF (stmt_info
))
5930 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5932 /* Shifts are handled in vectorizable_shift. */
5933 if (code
== LSHIFT_EXPR
5934 || code
== RSHIFT_EXPR
5935 || code
== LROTATE_EXPR
5936 || code
== RROTATE_EXPR
)
5939 /* Comparisons are handled in vectorizable_comparison. */
5940 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
5943 /* Conditions are handled in vectorizable_condition. */
5944 if (code
== COND_EXPR
)
5947 /* For pointer addition and subtraction, we should use the normal
5948 plus and minus for the vector operation. */
5949 if (code
== POINTER_PLUS_EXPR
)
5951 if (code
== POINTER_DIFF_EXPR
)
5954 /* Support only unary or binary operations. */
5955 op_type
= TREE_CODE_LENGTH (code
);
5956 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5958 if (dump_enabled_p ())
5959 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5960 "num. args = %d (not unary/binary/ternary op).\n",
5965 scalar_dest
= gimple_assign_lhs (stmt
);
5966 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5968 /* Most operations cannot handle bit-precision types without extra
5970 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
5972 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5973 /* Exception are bitwise binary operations. */
5974 && code
!= BIT_IOR_EXPR
5975 && code
!= BIT_XOR_EXPR
5976 && code
!= BIT_AND_EXPR
)
5978 if (dump_enabled_p ())
5979 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5980 "bit-precision arithmetic not supported.\n");
5985 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5986 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5988 if (dump_enabled_p ())
5989 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5990 "use not simple.\n");
5993 /* If op0 is an external or constant def, infer the vector type
5994 from the scalar type. */
5997 /* For boolean type we cannot determine vectype by
5998 invariant value (don't know whether it is a vector
5999 of booleans or vector of integers). We use output
6000 vectype because operations on boolean don't change
6002 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6004 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6006 if (dump_enabled_p ())
6007 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6008 "not supported operation on bool value.\n");
6011 vectype
= vectype_out
;
6014 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6018 gcc_assert (vectype
);
6021 if (dump_enabled_p ())
6022 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6023 "no vectype for scalar type %T\n",
6029 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6030 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6031 if (maybe_ne (nunits_out
, nunits_in
))
6034 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6035 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6036 if (op_type
== binary_op
|| op_type
== ternary_op
)
6038 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6039 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6041 if (dump_enabled_p ())
6042 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6043 "use not simple.\n");
6047 if (op_type
== ternary_op
)
6049 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6050 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6052 if (dump_enabled_p ())
6053 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6054 "use not simple.\n");
6059 /* Multiple types in SLP are handled by creating the appropriate number of
6060 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6065 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6069 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6073 gcc_assert (ncopies
>= 1);
6075 /* Reject attempts to combine mask types with nonmask types, e.g. if
6076 we have an AND between a (nonmask) boolean loaded from memory and
6077 a (mask) boolean result of a comparison.
6079 TODO: We could easily fix these cases up using pattern statements. */
6080 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6081 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6082 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6084 if (dump_enabled_p ())
6085 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6086 "mixed mask and nonmask vector types\n");
6090 /* Supportable by target? */
6092 vec_mode
= TYPE_MODE (vectype
);
6093 if (code
== MULT_HIGHPART_EXPR
)
6094 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6097 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6100 if (dump_enabled_p ())
6101 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6105 target_support_p
= (optab_handler (optab
, vec_mode
)
6106 != CODE_FOR_nothing
);
6109 bool using_emulated_vectors_p
= vect_emulated_vector_p (vectype
);
6110 if (!target_support_p
)
6112 if (dump_enabled_p ())
6113 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6114 "op not supported by target.\n");
6115 /* Check only during analysis. */
6116 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6117 || (!vec_stmt
&& !vect_can_vectorize_without_simd_p (code
)))
6119 if (dump_enabled_p ())
6120 dump_printf_loc (MSG_NOTE
, vect_location
,
6121 "proceeding using word mode.\n");
6122 using_emulated_vectors_p
= true;
6125 if (using_emulated_vectors_p
6126 && !vect_can_vectorize_without_simd_p (code
))
6128 if (dump_enabled_p ())
6129 dump_printf (MSG_NOTE
, "using word mode not possible.\n");
6133 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6134 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6135 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6137 if (!vec_stmt
) /* transformation not required. */
6139 /* If this operation is part of a reduction, a fully-masked loop
6140 should only change the active lanes of the reduction chain,
6141 keeping the inactive lanes as-is. */
6143 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
6146 if (cond_fn
== IFN_LAST
6147 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
6148 OPTIMIZE_FOR_SPEED
))
6150 if (dump_enabled_p ())
6151 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6152 "can't use a fully-masked loop because no"
6153 " conditional operation is available.\n");
6154 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
6157 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6161 /* Put types on constant and invariant SLP children. */
6163 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6164 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6165 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6167 if (dump_enabled_p ())
6168 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6169 "incompatible vector types for invariants\n");
6173 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6174 DUMP_VECT_SCOPE ("vectorizable_operation");
6175 vect_model_simple_cost (vinfo
, stmt_info
,
6176 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6177 if (using_emulated_vectors_p
)
6179 /* The above vect_model_simple_cost call handles constants
6180 in the prologue and (mis-)costs one of the stmts as
6181 vector stmt. See tree-vect-generic.c:do_plus_minus/do_negate
6182 for the actual lowering that will be applied. */
6184 = slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
;
6198 record_stmt_cost (cost_vec
, n
, scalar_stmt
, stmt_info
, 0, vect_body
);
6205 if (dump_enabled_p ())
6206 dump_printf_loc (MSG_NOTE
, vect_location
,
6207 "transform binary/unary operation.\n");
6209 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6211 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6212 vectors with unsigned elements, but the result is signed. So, we
6213 need to compute the MINUS_EXPR into vectype temporary and
6214 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6215 tree vec_cvt_dest
= NULL_TREE
;
6216 if (orig_code
== POINTER_DIFF_EXPR
)
6218 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6219 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6223 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6225 /* In case the vectorization factor (VF) is bigger than the number
6226 of elements that we can fit in a vectype (nunits), we have to generate
6227 more than one vector stmt - i.e - we need to "unroll" the
6228 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6229 from one copy of the vector stmt to the next, in the field
6230 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6231 stages to find the correct vector defs to be used when vectorizing
6232 stmts that use the defs of the current stmt. The example below
6233 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6234 we need to create 4 vectorized stmts):
6236 before vectorization:
6237 RELATED_STMT VEC_STMT
6241 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6243 RELATED_STMT VEC_STMT
6244 VS1_0: vx0 = memref0 VS1_1 -
6245 VS1_1: vx1 = memref1 VS1_2 -
6246 VS1_2: vx2 = memref2 VS1_3 -
6247 VS1_3: vx3 = memref3 - -
6248 S1: x = load - VS1_0
6251 step2: vectorize stmt S2 (done here):
6252 To vectorize stmt S2 we first need to find the relevant vector
6253 def for the first operand 'x'. This is, as usual, obtained from
6254 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6255 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6256 relevant vector def 'vx0'. Having found 'vx0' we can generate
6257 the vector stmt VS2_0, and as usual, record it in the
6258 STMT_VINFO_VEC_STMT of stmt S2.
6259 When creating the second copy (VS2_1), we obtain the relevant vector
6260 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6261 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6262 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6263 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6264 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6265 chain of stmts and pointers:
6266 RELATED_STMT VEC_STMT
6267 VS1_0: vx0 = memref0 VS1_1 -
6268 VS1_1: vx1 = memref1 VS1_2 -
6269 VS1_2: vx2 = memref2 VS1_3 -
6270 VS1_3: vx3 = memref3 - -
6271 S1: x = load - VS1_0
6272 VS2_0: vz0 = vx0 + v1 VS2_1 -
6273 VS2_1: vz1 = vx1 + v1 VS2_2 -
6274 VS2_2: vz2 = vx2 + v1 VS2_3 -
6275 VS2_3: vz3 = vx3 + v1 - -
6276 S2: z = x + 1 - VS2_0 */
6278 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6279 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
6280 /* Arguments are ready. Create the new vector stmt. */
6281 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6283 gimple
*new_stmt
= NULL
;
6284 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6285 ? vec_oprnds1
[i
] : NULL_TREE
);
6286 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
6287 if (masked_loop_p
&& reduc_idx
>= 0)
6289 /* Perform the operation on active elements only and take
6290 inactive elements from the reduction chain input. */
6292 vop2
= reduc_idx
== 1 ? vop1
: vop0
;
6293 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6295 gcall
*call
= gimple_build_call_internal (cond_fn
, 4, mask
,
6297 new_temp
= make_ssa_name (vec_dest
, call
);
6298 gimple_call_set_lhs (call
, new_temp
);
6299 gimple_call_set_nothrow (call
, true);
6300 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
6305 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
6306 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6307 gimple_assign_set_lhs (new_stmt
, new_temp
);
6308 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6311 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6312 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6314 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6315 gimple_assign_set_lhs (new_stmt
, new_temp
);
6316 vect_finish_stmt_generation (vinfo
, stmt_info
,
6321 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6323 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6327 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6329 vec_oprnds0
.release ();
6330 vec_oprnds1
.release ();
6331 vec_oprnds2
.release ();
6336 /* A helper function to ensure data reference DR_INFO's base alignment. */
6339 ensure_base_align (dr_vec_info
*dr_info
)
6341 /* Alignment is only analyzed for the first element of a DR group,
6342 use that to look at base alignment we need to enforce. */
6343 if (STMT_VINFO_GROUPED_ACCESS (dr_info
->stmt
))
6344 dr_info
= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info
->stmt
));
6346 gcc_assert (dr_info
->misalignment
!= DR_MISALIGNMENT_UNINITIALIZED
);
6348 if (dr_info
->base_misaligned
)
6350 tree base_decl
= dr_info
->base_decl
;
6352 // We should only be able to increase the alignment of a base object if
6353 // we know what its new alignment should be at compile time.
6354 unsigned HOST_WIDE_INT align_base_to
=
6355 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6357 if (decl_in_symtab_p (base_decl
))
6358 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6359 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6361 SET_DECL_ALIGN (base_decl
, align_base_to
);
6362 DECL_USER_ALIGN (base_decl
) = 1;
6364 dr_info
->base_misaligned
= false;
6369 /* Function get_group_alias_ptr_type.
6371 Return the alias type for the group starting at FIRST_STMT_INFO. */
6374 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6376 struct data_reference
*first_dr
, *next_dr
;
6378 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6379 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6380 while (next_stmt_info
)
6382 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6383 if (get_alias_set (DR_REF (first_dr
))
6384 != get_alias_set (DR_REF (next_dr
)))
6386 if (dump_enabled_p ())
6387 dump_printf_loc (MSG_NOTE
, vect_location
,
6388 "conflicting alias set types.\n");
6389 return ptr_type_node
;
6391 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6393 return reference_alias_ptr_type (DR_REF (first_dr
));
6397 /* Function scan_operand_equal_p.
6399 Helper function for check_scan_store. Compare two references
6400 with .GOMP_SIMD_LANE bases. */
6403 scan_operand_equal_p (tree ref1
, tree ref2
)
6405 tree ref
[2] = { ref1
, ref2
};
6406 poly_int64 bitsize
[2], bitpos
[2];
6407 tree offset
[2], base
[2];
6408 for (int i
= 0; i
< 2; ++i
)
6411 int unsignedp
, reversep
, volatilep
= 0;
6412 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6413 &offset
[i
], &mode
, &unsignedp
,
6414 &reversep
, &volatilep
);
6415 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6417 if (TREE_CODE (base
[i
]) == MEM_REF
6418 && offset
[i
] == NULL_TREE
6419 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6421 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6422 if (is_gimple_assign (def_stmt
)
6423 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6424 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6425 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6427 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6429 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6430 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6435 if (!operand_equal_p (base
[0], base
[1], 0))
6437 if (maybe_ne (bitsize
[0], bitsize
[1]))
6439 if (offset
[0] != offset
[1])
6441 if (!offset
[0] || !offset
[1])
6443 if (!operand_equal_p (offset
[0], offset
[1], 0))
6446 for (int i
= 0; i
< 2; ++i
)
6448 step
[i
] = integer_one_node
;
6449 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6451 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6452 if (is_gimple_assign (def_stmt
)
6453 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6454 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6457 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6458 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6461 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6463 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6464 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6466 tree rhs1
= NULL_TREE
;
6467 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6469 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6470 if (gimple_assign_cast_p (def_stmt
))
6471 rhs1
= gimple_assign_rhs1 (def_stmt
);
6473 else if (CONVERT_EXPR_P (offset
[i
]))
6474 rhs1
= TREE_OPERAND (offset
[i
], 0);
6476 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6477 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6478 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6479 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6482 if (!operand_equal_p (offset
[0], offset
[1], 0)
6483 || !operand_equal_p (step
[0], step
[1], 0))
6491 enum scan_store_kind
{
6492 /* Normal permutation. */
6493 scan_store_kind_perm
,
6495 /* Whole vector left shift permutation with zero init. */
6496 scan_store_kind_lshift_zero
,
6498 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6499 scan_store_kind_lshift_cond
6502 /* Function check_scan_store.
6504 Verify if we can perform the needed permutations or whole vector shifts.
6505 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6506 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6507 to do at each step. */
6510 scan_store_can_perm_p (tree vectype
, tree init
,
6511 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6513 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6514 unsigned HOST_WIDE_INT nunits
;
6515 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6517 int units_log2
= exact_log2 (nunits
);
6518 if (units_log2
<= 0)
6522 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6523 for (i
= 0; i
<= units_log2
; ++i
)
6525 unsigned HOST_WIDE_INT j
, k
;
6526 enum scan_store_kind kind
= scan_store_kind_perm
;
6527 vec_perm_builder
sel (nunits
, nunits
, 1);
6528 sel
.quick_grow (nunits
);
6529 if (i
== units_log2
)
6531 for (j
= 0; j
< nunits
; ++j
)
6532 sel
[j
] = nunits
- 1;
6536 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6538 for (k
= 0; j
< nunits
; ++j
, ++k
)
6539 sel
[j
] = nunits
+ k
;
6541 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6542 if (!can_vec_perm_const_p (vec_mode
, indices
))
6544 if (i
== units_log2
)
6547 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6549 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6551 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6552 /* Whole vector shifts shift in zeros, so if init is all zero
6553 constant, there is no need to do anything further. */
6554 if ((TREE_CODE (init
) != INTEGER_CST
6555 && TREE_CODE (init
) != REAL_CST
)
6556 || !initializer_zerop (init
))
6558 tree masktype
= truth_type_for (vectype
);
6559 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6561 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6564 kind
= whole_vector_shift_kind
;
6566 if (use_whole_vector
)
6568 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6569 use_whole_vector
->safe_grow_cleared (i
, true);
6570 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6571 use_whole_vector
->safe_push (kind
);
6579 /* Function check_scan_store.
6581 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6584 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
6585 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6586 vect_memory_access_type memory_access_type
)
6588 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6589 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6592 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6595 || memory_access_type
!= VMAT_CONTIGUOUS
6596 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6597 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6598 || loop_vinfo
== NULL
6599 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6600 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6601 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
6602 || !integer_zerop (DR_INIT (dr_info
->dr
))
6603 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6604 || !alias_sets_conflict_p (get_alias_set (vectype
),
6605 get_alias_set (TREE_TYPE (ref_type
))))
6607 if (dump_enabled_p ())
6608 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6609 "unsupported OpenMP scan store.\n");
6613 /* We need to pattern match code built by OpenMP lowering and simplified
6614 by following optimizations into something we can handle.
6615 #pragma omp simd reduction(inscan,+:r)
6619 #pragma omp scan inclusive (r)
6622 shall have body with:
6623 // Initialization for input phase, store the reduction initializer:
6624 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6625 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6627 // Actual input phase:
6629 r.0_5 = D.2042[_20];
6632 // Initialization for scan phase:
6633 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6639 // Actual scan phase:
6641 r.1_8 = D.2042[_20];
6643 The "omp simd array" variable D.2042 holds the privatized copy used
6644 inside of the loop and D.2043 is another one that holds copies of
6645 the current original list item. The separate GOMP_SIMD_LANE ifn
6646 kinds are there in order to allow optimizing the initializer store
6647 and combiner sequence, e.g. if it is originally some C++ish user
6648 defined reduction, but allow the vectorizer to pattern recognize it
6649 and turn into the appropriate vectorized scan.
6651 For exclusive scan, this is slightly different:
6652 #pragma omp simd reduction(inscan,+:r)
6656 #pragma omp scan exclusive (r)
6659 shall have body with:
6660 // Initialization for input phase, store the reduction initializer:
6661 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6662 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6664 // Actual input phase:
6666 r.0_5 = D.2042[_20];
6669 // Initialization for scan phase:
6670 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6676 // Actual scan phase:
6678 r.1_8 = D.2044[_20];
6681 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6683 /* Match the D.2042[_21] = 0; store above. Just require that
6684 it is a constant or external definition store. */
6685 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6688 if (dump_enabled_p ())
6689 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6690 "unsupported OpenMP scan initializer store.\n");
6694 if (! loop_vinfo
->scan_map
)
6695 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6696 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6697 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6700 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6702 /* These stores can be vectorized normally. */
6706 if (rhs_dt
!= vect_internal_def
)
6709 if (dump_enabled_p ())
6710 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6711 "unsupported OpenMP scan combiner pattern.\n");
6715 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6716 tree rhs
= gimple_assign_rhs1 (stmt
);
6717 if (TREE_CODE (rhs
) != SSA_NAME
)
6720 gimple
*other_store_stmt
= NULL
;
6721 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6722 bool inscan_var_store
6723 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6725 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6727 if (!inscan_var_store
)
6729 use_operand_p use_p
;
6730 imm_use_iterator iter
;
6731 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6733 gimple
*use_stmt
= USE_STMT (use_p
);
6734 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6736 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6737 || !is_gimple_assign (use_stmt
)
6738 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6740 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6742 other_store_stmt
= use_stmt
;
6744 if (other_store_stmt
== NULL
)
6746 rhs
= gimple_assign_lhs (other_store_stmt
);
6747 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6751 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6753 use_operand_p use_p
;
6754 imm_use_iterator iter
;
6755 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6757 gimple
*use_stmt
= USE_STMT (use_p
);
6758 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6760 if (other_store_stmt
)
6762 other_store_stmt
= use_stmt
;
6768 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6769 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6770 || !is_gimple_assign (def_stmt
)
6771 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6774 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6775 /* For pointer addition, we should use the normal plus for the vector
6779 case POINTER_PLUS_EXPR
:
6782 case MULT_HIGHPART_EXPR
:
6787 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6790 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6791 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6792 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6795 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6796 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6797 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6798 || !gimple_assign_load_p (load1_stmt
)
6799 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6800 || !gimple_assign_load_p (load2_stmt
))
6803 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6804 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6805 if (load1_stmt_info
== NULL
6806 || load2_stmt_info
== NULL
6807 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
6808 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
6809 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
6810 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6813 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
6815 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6816 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
6817 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
6819 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6821 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6825 use_operand_p use_p
;
6826 imm_use_iterator iter
;
6827 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
6829 gimple
*use_stmt
= USE_STMT (use_p
);
6830 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
6832 if (other_store_stmt
)
6834 other_store_stmt
= use_stmt
;
6838 if (other_store_stmt
== NULL
)
6840 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
6841 || !gimple_store_p (other_store_stmt
))
6844 stmt_vec_info other_store_stmt_info
6845 = loop_vinfo
->lookup_stmt (other_store_stmt
);
6846 if (other_store_stmt_info
== NULL
6847 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
6848 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6851 gimple
*stmt1
= stmt
;
6852 gimple
*stmt2
= other_store_stmt
;
6853 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6854 std::swap (stmt1
, stmt2
);
6855 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6856 gimple_assign_rhs1 (load2_stmt
)))
6858 std::swap (rhs1
, rhs2
);
6859 std::swap (load1_stmt
, load2_stmt
);
6860 std::swap (load1_stmt_info
, load2_stmt_info
);
6862 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6863 gimple_assign_rhs1 (load1_stmt
)))
6866 tree var3
= NULL_TREE
;
6867 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
6868 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
6869 gimple_assign_rhs1 (load2_stmt
)))
6871 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6873 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6874 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
6875 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
6877 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6878 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
6879 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
6880 || lookup_attribute ("omp simd inscan exclusive",
6881 DECL_ATTRIBUTES (var3
)))
6885 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
6886 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
6887 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
6890 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6891 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
6892 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
6893 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
6894 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6895 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
6898 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6899 std::swap (var1
, var2
);
6901 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6903 if (!lookup_attribute ("omp simd inscan exclusive",
6904 DECL_ATTRIBUTES (var1
)))
6909 if (loop_vinfo
->scan_map
== NULL
)
6911 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
6915 /* The IL is as expected, now check if we can actually vectorize it.
6922 should be vectorized as (where _40 is the vectorized rhs
6923 from the D.2042[_21] = 0; store):
6924 _30 = MEM <vector(8) int> [(int *)&D.2043];
6925 _31 = MEM <vector(8) int> [(int *)&D.2042];
6926 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6928 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6929 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6931 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6932 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6933 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6935 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6936 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6938 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6939 MEM <vector(8) int> [(int *)&D.2043] = _39;
6940 MEM <vector(8) int> [(int *)&D.2042] = _38;
6947 should be vectorized as (where _40 is the vectorized rhs
6948 from the D.2042[_21] = 0; store):
6949 _30 = MEM <vector(8) int> [(int *)&D.2043];
6950 _31 = MEM <vector(8) int> [(int *)&D.2042];
6951 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6952 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6954 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6955 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6956 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6958 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6959 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6960 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6962 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6963 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6966 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6967 MEM <vector(8) int> [(int *)&D.2044] = _39;
6968 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6969 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6970 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6971 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
6974 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
6975 if (units_log2
== -1)
6982 /* Function vectorizable_scan_store.
6984 Helper of vectorizable_score, arguments like on vectorizable_store.
6985 Handle only the transformation, checking is done in check_scan_store. */
6988 vectorizable_scan_store (vec_info
*vinfo
,
6989 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6990 gimple
**vec_stmt
, int ncopies
)
6992 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6993 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6994 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
6995 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6997 if (dump_enabled_p ())
6998 dump_printf_loc (MSG_NOTE
, vect_location
,
6999 "transform scan store. ncopies = %d\n", ncopies
);
7001 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7002 tree rhs
= gimple_assign_rhs1 (stmt
);
7003 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7005 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7006 bool inscan_var_store
7007 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7009 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7011 use_operand_p use_p
;
7012 imm_use_iterator iter
;
7013 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7015 gimple
*use_stmt
= USE_STMT (use_p
);
7016 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7018 rhs
= gimple_assign_lhs (use_stmt
);
7023 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7024 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7025 if (code
== POINTER_PLUS_EXPR
)
7027 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7028 && commutative_tree_code (code
));
7029 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7030 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7031 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7032 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7033 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7034 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7035 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7036 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7037 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7038 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7039 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7041 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7043 std::swap (rhs1
, rhs2
);
7044 std::swap (var1
, var2
);
7045 std::swap (load1_dr_info
, load2_dr_info
);
7048 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7051 unsigned HOST_WIDE_INT nunits
;
7052 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7054 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7055 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7056 gcc_assert (units_log2
> 0);
7057 auto_vec
<tree
, 16> perms
;
7058 perms
.quick_grow (units_log2
+ 1);
7059 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7060 for (int i
= 0; i
<= units_log2
; ++i
)
7062 unsigned HOST_WIDE_INT j
, k
;
7063 vec_perm_builder
sel (nunits
, nunits
, 1);
7064 sel
.quick_grow (nunits
);
7065 if (i
== units_log2
)
7066 for (j
= 0; j
< nunits
; ++j
)
7067 sel
[j
] = nunits
- 1;
7070 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7072 for (k
= 0; j
< nunits
; ++j
, ++k
)
7073 sel
[j
] = nunits
+ k
;
7075 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7076 if (!use_whole_vector
.is_empty ()
7077 && use_whole_vector
[i
] != scan_store_kind_perm
)
7079 if (zero_vec
== NULL_TREE
)
7080 zero_vec
= build_zero_cst (vectype
);
7081 if (masktype
== NULL_TREE
7082 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7083 masktype
= truth_type_for (vectype
);
7084 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7087 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7090 tree vec_oprnd1
= NULL_TREE
;
7091 tree vec_oprnd2
= NULL_TREE
;
7092 tree vec_oprnd3
= NULL_TREE
;
7093 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7094 tree dataref_offset
= build_int_cst (ref_type
, 0);
7095 tree bump
= vect_get_data_ptr_increment (vinfo
, dr_info
,
7096 vectype
, VMAT_CONTIGUOUS
);
7097 tree ldataref_ptr
= NULL_TREE
;
7098 tree orig
= NULL_TREE
;
7099 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7100 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7101 auto_vec
<tree
> vec_oprnds1
;
7102 auto_vec
<tree
> vec_oprnds2
;
7103 auto_vec
<tree
> vec_oprnds3
;
7104 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
7105 *init
, &vec_oprnds1
,
7106 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
7107 rhs2
, &vec_oprnds3
);
7108 for (int j
= 0; j
< ncopies
; j
++)
7110 vec_oprnd1
= vec_oprnds1
[j
];
7111 if (ldataref_ptr
== NULL
)
7112 vec_oprnd2
= vec_oprnds2
[j
];
7113 vec_oprnd3
= vec_oprnds3
[j
];
7116 else if (!inscan_var_store
)
7117 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7121 vec_oprnd2
= make_ssa_name (vectype
);
7122 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7123 unshare_expr (ldataref_ptr
),
7125 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7126 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7127 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7128 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7129 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7132 tree v
= vec_oprnd2
;
7133 for (int i
= 0; i
< units_log2
; ++i
)
7135 tree new_temp
= make_ssa_name (vectype
);
7136 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7138 && (use_whole_vector
[i
]
7139 != scan_store_kind_perm
))
7140 ? zero_vec
: vec_oprnd1
, v
,
7142 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7143 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7144 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7146 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7148 /* Whole vector shift shifted in zero bits, but if *init
7149 is not initializer_zerop, we need to replace those elements
7150 with elements from vec_oprnd1. */
7151 tree_vector_builder
vb (masktype
, nunits
, 1);
7152 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7153 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7154 ? boolean_false_node
: boolean_true_node
);
7156 tree new_temp2
= make_ssa_name (vectype
);
7157 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7158 new_temp
, vec_oprnd1
);
7159 vect_finish_stmt_generation (vinfo
, stmt_info
,
7161 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7162 new_temp
= new_temp2
;
7165 /* For exclusive scan, perform the perms[i] permutation once
7168 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7176 tree new_temp2
= make_ssa_name (vectype
);
7177 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7178 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7179 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7184 tree new_temp
= make_ssa_name (vectype
);
7185 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7186 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7187 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7189 tree last_perm_arg
= new_temp
;
7190 /* For exclusive scan, new_temp computed above is the exclusive scan
7191 prefix sum. Turn it into inclusive prefix sum for the broadcast
7192 of the last element into orig. */
7193 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7195 last_perm_arg
= make_ssa_name (vectype
);
7196 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7197 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7198 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7201 orig
= make_ssa_name (vectype
);
7202 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7203 last_perm_arg
, perms
[units_log2
]);
7204 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7205 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7207 if (!inscan_var_store
)
7209 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7210 unshare_expr (dataref_ptr
),
7212 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7213 g
= gimple_build_assign (data_ref
, new_temp
);
7214 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7215 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7219 if (inscan_var_store
)
7220 for (int j
= 0; j
< ncopies
; j
++)
7223 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7225 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7226 unshare_expr (dataref_ptr
),
7228 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7229 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7230 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7231 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7237 /* Function vectorizable_store.
7239 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7240 that can be vectorized.
7241 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7242 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7243 Return true if STMT_INFO is vectorizable in this way. */
7246 vectorizable_store (vec_info
*vinfo
,
7247 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7248 gimple
**vec_stmt
, slp_tree slp_node
,
7249 stmt_vector_for_cost
*cost_vec
)
7253 tree vec_oprnd
= NULL_TREE
;
7255 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7256 class loop
*loop
= NULL
;
7257 machine_mode vec_mode
;
7259 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7260 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7261 tree dataref_ptr
= NULL_TREE
;
7262 tree dataref_offset
= NULL_TREE
;
7263 gimple
*ptr_incr
= NULL
;
7266 stmt_vec_info first_stmt_info
;
7268 unsigned int group_size
, i
;
7269 vec
<tree
> oprnds
= vNULL
;
7270 vec
<tree
> result_chain
= vNULL
;
7271 vec
<tree
> vec_oprnds
= vNULL
;
7272 bool slp
= (slp_node
!= NULL
);
7273 unsigned int vec_num
;
7274 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
7276 gather_scatter_info gs_info
;
7278 vec_load_store_type vls_type
;
7281 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7284 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7288 /* Is vectorizable store? */
7290 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7291 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7293 tree scalar_dest
= gimple_assign_lhs (assign
);
7294 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7295 && is_pattern_stmt_p (stmt_info
))
7296 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7297 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7298 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7299 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7300 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7301 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7302 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7303 && TREE_CODE (scalar_dest
) != MEM_REF
)
7308 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7309 if (!call
|| !gimple_call_internal_p (call
))
7312 internal_fn ifn
= gimple_call_internal_fn (call
);
7313 if (!internal_store_fn_p (ifn
))
7316 if (slp_node
!= NULL
)
7318 if (dump_enabled_p ())
7319 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7320 "SLP of masked stores not supported.\n");
7324 int mask_index
= internal_fn_mask_index (ifn
);
7326 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
7327 &mask
, NULL
, &mask_dt
, &mask_vectype
))
7331 op
= vect_get_store_rhs (stmt_info
);
7333 /* Cannot have hybrid store SLP -- that would mean storing to the
7334 same location twice. */
7335 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7337 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7338 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7342 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7343 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7348 /* Multiple types in SLP are handled by creating the appropriate number of
7349 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7354 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7356 gcc_assert (ncopies
>= 1);
7358 /* FORNOW. This restriction should be relaxed. */
7359 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7361 if (dump_enabled_p ())
7362 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7363 "multiple types in nested loop.\n");
7367 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
7368 op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7371 elem_type
= TREE_TYPE (vectype
);
7372 vec_mode
= TYPE_MODE (vectype
);
7374 if (!STMT_VINFO_DATA_REF (stmt_info
))
7377 vect_memory_access_type memory_access_type
;
7378 enum dr_alignment_support alignment_support_scheme
;
7381 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, vls_type
,
7382 ncopies
, &memory_access_type
, &poffset
,
7383 &alignment_support_scheme
, &misalignment
, &gs_info
))
7388 if (memory_access_type
== VMAT_CONTIGUOUS
)
7390 if (!VECTOR_MODE_P (vec_mode
)
7391 || !can_vec_mask_load_store_p (vec_mode
,
7392 TYPE_MODE (mask_vectype
), false))
7395 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7396 && (memory_access_type
!= VMAT_GATHER_SCATTER
7397 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7399 if (dump_enabled_p ())
7400 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7401 "unsupported access type for masked store.\n");
7407 /* FORNOW. In some cases can vectorize even if data-type not supported
7408 (e.g. - array initialization with 0). */
7409 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7413 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7414 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7415 && memory_access_type
!= VMAT_GATHER_SCATTER
7416 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7419 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7420 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7421 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7425 first_stmt_info
= stmt_info
;
7426 first_dr_info
= dr_info
;
7427 group_size
= vec_num
= 1;
7430 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7432 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7433 memory_access_type
))
7437 if (!vec_stmt
) /* transformation not required. */
7439 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7442 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
7443 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, vls_type
,
7444 group_size
, memory_access_type
,
7448 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
7451 if (dump_enabled_p ())
7452 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7453 "incompatible vector types for invariants\n");
7457 if (dump_enabled_p ()
7458 && memory_access_type
!= VMAT_ELEMENTWISE
7459 && memory_access_type
!= VMAT_GATHER_SCATTER
7460 && alignment_support_scheme
!= dr_aligned
)
7461 dump_printf_loc (MSG_NOTE
, vect_location
,
7462 "Vectorizing an unaligned access.\n");
7464 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7465 vect_model_store_cost (vinfo
, stmt_info
, ncopies
,
7466 memory_access_type
, alignment_support_scheme
,
7467 misalignment
, vls_type
, slp_node
, cost_vec
);
7470 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7474 ensure_base_align (dr_info
);
7476 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7478 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7479 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7480 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7481 tree ptr
, var
, scale
, vec_mask
;
7482 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7483 tree mask_halfvectype
= mask_vectype
;
7484 edge pe
= loop_preheader_edge (loop
);
7487 enum { NARROW
, NONE
, WIDEN
} modifier
;
7488 poly_uint64 scatter_off_nunits
7489 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7491 if (known_eq (nunits
, scatter_off_nunits
))
7493 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7497 /* Currently gathers and scatters are only supported for
7498 fixed-length vectors. */
7499 unsigned int count
= scatter_off_nunits
.to_constant ();
7500 vec_perm_builder
sel (count
, count
, 1);
7501 for (i
= 0; i
< (unsigned int) count
; ++i
)
7502 sel
.quick_push (i
| (count
/ 2));
7504 vec_perm_indices
indices (sel
, 1, count
);
7505 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7507 gcc_assert (perm_mask
!= NULL_TREE
);
7509 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7513 /* Currently gathers and scatters are only supported for
7514 fixed-length vectors. */
7515 unsigned int count
= nunits
.to_constant ();
7516 vec_perm_builder
sel (count
, count
, 1);
7517 for (i
= 0; i
< (unsigned int) count
; ++i
)
7518 sel
.quick_push (i
| (count
/ 2));
7520 vec_perm_indices
indices (sel
, 2, count
);
7521 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7522 gcc_assert (perm_mask
!= NULL_TREE
);
7526 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7531 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7532 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7533 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7534 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7535 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7536 scaletype
= TREE_VALUE (arglist
);
7538 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7539 && TREE_CODE (rettype
) == VOID_TYPE
);
7541 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7542 if (!is_gimple_min_invariant (ptr
))
7544 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7545 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7546 gcc_assert (!new_bb
);
7549 if (mask
== NULL_TREE
)
7551 mask_arg
= build_int_cst (masktype
, -1);
7552 mask_arg
= vect_init_vector (vinfo
, stmt_info
,
7553 mask_arg
, masktype
, NULL
);
7556 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7558 auto_vec
<tree
> vec_oprnds0
;
7559 auto_vec
<tree
> vec_oprnds1
;
7560 auto_vec
<tree
> vec_masks
;
7563 tree mask_vectype
= truth_type_for (vectype
);
7564 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7566 ? ncopies
/ 2 : ncopies
,
7567 mask
, &vec_masks
, mask_vectype
);
7569 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7571 ? ncopies
/ 2 : ncopies
,
7572 gs_info
.offset
, &vec_oprnds0
);
7573 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7575 ? ncopies
/ 2 : ncopies
,
7577 for (j
= 0; j
< ncopies
; ++j
)
7579 if (modifier
== WIDEN
)
7582 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
7583 perm_mask
, stmt_info
, gsi
);
7585 op
= vec_oprnd0
= vec_oprnds0
[j
/ 2];
7586 src
= vec_oprnd1
= vec_oprnds1
[j
];
7588 mask_op
= vec_mask
= vec_masks
[j
];
7590 else if (modifier
== NARROW
)
7593 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
7594 perm_mask
, stmt_info
, gsi
);
7596 src
= vec_oprnd1
= vec_oprnds1
[j
/ 2];
7597 op
= vec_oprnd0
= vec_oprnds0
[j
];
7599 mask_op
= vec_mask
= vec_masks
[j
/ 2];
7603 op
= vec_oprnd0
= vec_oprnds0
[j
];
7604 src
= vec_oprnd1
= vec_oprnds1
[j
];
7606 mask_op
= vec_mask
= vec_masks
[j
];
7609 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7611 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7612 TYPE_VECTOR_SUBPARTS (srctype
)));
7613 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7614 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7616 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7617 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7621 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7623 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7624 TYPE_VECTOR_SUBPARTS (idxtype
)));
7625 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7626 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7628 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7629 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7637 if (modifier
== NARROW
)
7639 var
= vect_get_new_ssa_name (mask_halfvectype
,
7642 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7643 : VEC_UNPACK_LO_EXPR
,
7645 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7648 tree optype
= TREE_TYPE (mask_arg
);
7649 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7652 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7653 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7654 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7656 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7657 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7659 if (!useless_type_conversion_p (masktype
, utype
))
7661 gcc_assert (TYPE_PRECISION (utype
)
7662 <= TYPE_PRECISION (masktype
));
7663 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7664 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7665 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7671 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7672 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7674 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
7676 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7679 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7680 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
7682 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7683 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7688 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7690 /* We vectorize all the stmts of the interleaving group when we
7691 reach the last stmt in the group. */
7692 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7693 < DR_GROUP_SIZE (first_stmt_info
)
7702 grouped_store
= false;
7703 /* VEC_NUM is the number of vect stmts to be created for this
7705 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7706 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7707 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7708 == first_stmt_info
);
7709 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7710 op
= vect_get_store_rhs (first_stmt_info
);
7713 /* VEC_NUM is the number of vect stmts to be created for this
7715 vec_num
= group_size
;
7717 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7720 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7722 if (dump_enabled_p ())
7723 dump_printf_loc (MSG_NOTE
, vect_location
,
7724 "transform store. ncopies = %d\n", ncopies
);
7726 if (memory_access_type
== VMAT_ELEMENTWISE
7727 || memory_access_type
== VMAT_STRIDED_SLP
)
7729 gimple_stmt_iterator incr_gsi
;
7735 tree stride_base
, stride_step
, alias_off
;
7739 /* Checked by get_load_store_type. */
7740 unsigned int const_nunits
= nunits
.to_constant ();
7742 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7743 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7745 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
7747 = fold_build_pointer_plus
7748 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7749 size_binop (PLUS_EXPR
,
7750 convert_to_ptrofftype (dr_offset
),
7751 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7752 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7754 /* For a store with loop-invariant (but other than power-of-2)
7755 stride (i.e. not a grouped access) like so:
7757 for (i = 0; i < n; i += stride)
7760 we generate a new induction variable and new stores from
7761 the components of the (vectorized) rhs:
7763 for (j = 0; ; j += VF*stride)
7768 array[j + stride] = tmp2;
7772 unsigned nstores
= const_nunits
;
7774 tree ltype
= elem_type
;
7775 tree lvectype
= vectype
;
7778 if (group_size
< const_nunits
7779 && const_nunits
% group_size
== 0)
7781 nstores
= const_nunits
/ group_size
;
7783 ltype
= build_vector_type (elem_type
, group_size
);
7786 /* First check if vec_extract optab doesn't support extraction
7787 of vector elts directly. */
7788 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7790 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7791 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
7792 group_size
).exists (&vmode
)
7793 || (convert_optab_handler (vec_extract_optab
,
7794 TYPE_MODE (vectype
), vmode
)
7795 == CODE_FOR_nothing
))
7797 /* Try to avoid emitting an extract of vector elements
7798 by performing the extracts using an integer type of the
7799 same size, extracting from a vector of those and then
7800 re-interpreting it as the original vector type if
7803 = group_size
* GET_MODE_BITSIZE (elmode
);
7804 unsigned int lnunits
= const_nunits
/ group_size
;
7805 /* If we can't construct such a vector fall back to
7806 element extracts from the original vector type and
7807 element size stores. */
7808 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7809 && VECTOR_MODE_P (TYPE_MODE (vectype
))
7810 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
7811 lnunits
).exists (&vmode
)
7812 && (convert_optab_handler (vec_extract_optab
,
7814 != CODE_FOR_nothing
))
7818 ltype
= build_nonstandard_integer_type (lsize
, 1);
7819 lvectype
= build_vector_type (ltype
, nstores
);
7821 /* Else fall back to vector extraction anyway.
7822 Fewer stores are more important than avoiding spilling
7823 of the vector we extract from. Compared to the
7824 construction case in vectorizable_load no store-forwarding
7825 issue exists here for reasonable archs. */
7828 else if (group_size
>= const_nunits
7829 && group_size
% const_nunits
== 0)
7832 lnel
= const_nunits
;
7836 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
7837 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7840 ivstep
= stride_step
;
7841 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
7842 build_int_cst (TREE_TYPE (ivstep
), vf
));
7844 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7846 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7847 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7848 create_iv (stride_base
, ivstep
, NULL
,
7849 loop
, &incr_gsi
, insert_after
,
7851 incr
= gsi_stmt (incr_gsi
);
7853 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7855 alias_off
= build_int_cst (ref_type
, 0);
7856 stmt_vec_info next_stmt_info
= first_stmt_info
;
7857 for (g
= 0; g
< group_size
; g
++)
7859 running_off
= offvar
;
7862 tree size
= TYPE_SIZE_UNIT (ltype
);
7863 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
7865 tree newoff
= copy_ssa_name (running_off
, NULL
);
7866 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7868 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7869 running_off
= newoff
;
7872 op
= vect_get_store_rhs (next_stmt_info
);
7873 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
,
7875 unsigned int group_el
= 0;
7876 unsigned HOST_WIDE_INT
7877 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7878 for (j
= 0; j
< ncopies
; j
++)
7880 vec_oprnd
= vec_oprnds
[j
];
7881 /* Pun the vector to extract from if necessary. */
7882 if (lvectype
!= vectype
)
7884 tree tem
= make_ssa_name (lvectype
);
7886 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
7887 lvectype
, vec_oprnd
));
7888 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
7891 for (i
= 0; i
< nstores
; i
++)
7893 tree newref
, newoff
;
7894 gimple
*incr
, *assign
;
7895 tree size
= TYPE_SIZE (ltype
);
7896 /* Extract the i'th component. */
7897 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
7898 bitsize_int (i
), size
);
7899 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
7902 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
7906 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7908 newref
= build2 (MEM_REF
, ltype
,
7909 running_off
, this_off
);
7910 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
7912 /* And store it to *running_off. */
7913 assign
= gimple_build_assign (newref
, elem
);
7914 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
7918 || group_el
== group_size
)
7920 newoff
= copy_ssa_name (running_off
, NULL
);
7921 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7922 running_off
, stride_step
);
7923 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7925 running_off
= newoff
;
7928 if (g
== group_size
- 1
7931 if (j
== 0 && i
== 0)
7933 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
7937 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7938 vec_oprnds
.release ();
7946 auto_vec
<tree
> dr_chain (group_size
);
7947 oprnds
.create (group_size
);
7949 gcc_assert (alignment_support_scheme
);
7950 vec_loop_masks
*loop_masks
7951 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7952 ? &LOOP_VINFO_MASKS (loop_vinfo
)
7954 vec_loop_lens
*loop_lens
7955 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
7956 ? &LOOP_VINFO_LENS (loop_vinfo
)
7959 /* Shouldn't go with length-based approach if fully masked. */
7960 gcc_assert (!loop_lens
|| !loop_masks
);
7962 /* Targets with store-lane instructions must not require explicit
7963 realignment. vect_supportable_dr_alignment always returns either
7964 dr_aligned or dr_unaligned_supported for masked operations. */
7965 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
7968 || alignment_support_scheme
== dr_aligned
7969 || alignment_support_scheme
== dr_unaligned_supported
);
7971 tree offset
= NULL_TREE
;
7972 if (!known_eq (poffset
, 0))
7973 offset
= size_int (poffset
);
7976 tree vec_offset
= NULL_TREE
;
7977 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7979 aggr_type
= NULL_TREE
;
7982 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
7984 aggr_type
= elem_type
;
7985 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
7986 &bump
, &vec_offset
);
7990 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7991 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7993 aggr_type
= vectype
;
7994 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
7995 memory_access_type
);
7999 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8001 /* In case the vectorization factor (VF) is bigger than the number
8002 of elements that we can fit in a vectype (nunits), we have to generate
8003 more than one vector stmt - i.e - we need to "unroll" the
8004 vector stmt by a factor VF/nunits. */
8006 /* In case of interleaving (non-unit grouped access):
8013 We create vectorized stores starting from base address (the access of the
8014 first stmt in the chain (S2 in the above example), when the last store stmt
8015 of the chain (S4) is reached:
8018 VS2: &base + vec_size*1 = vx0
8019 VS3: &base + vec_size*2 = vx1
8020 VS4: &base + vec_size*3 = vx3
8022 Then permutation statements are generated:
8024 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8025 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8028 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8029 (the order of the data-refs in the output of vect_permute_store_chain
8030 corresponds to the order of scalar stmts in the interleaving chain - see
8031 the documentation of vect_permute_store_chain()).
8033 In case of both multiple types and interleaving, above vector stores and
8034 permutation stmts are created for every copy. The result vector stmts are
8035 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8036 STMT_VINFO_RELATED_STMT for the next copies.
8039 auto_vec
<tree
> vec_masks
;
8040 tree vec_mask
= NULL
;
8041 auto_vec
<tree
> vec_offsets
;
8042 auto_vec
<vec
<tree
> > gvec_oprnds
;
8043 gvec_oprnds
.safe_grow_cleared (group_size
, true);
8044 for (j
= 0; j
< ncopies
; j
++)
8051 /* Get vectorized arguments for SLP_NODE. */
8052 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1,
8054 vec_oprnd
= vec_oprnds
[0];
8058 /* For interleaved stores we collect vectorized defs for all the
8059 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8060 used as an input to vect_permute_store_chain().
8062 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8063 and OPRNDS are of size 1. */
8064 stmt_vec_info next_stmt_info
= first_stmt_info
;
8065 for (i
= 0; i
< group_size
; i
++)
8067 /* Since gaps are not supported for interleaved stores,
8068 DR_GROUP_SIZE is the exact number of stmts in the chain.
8069 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8070 that there is no interleaving, DR_GROUP_SIZE is 1,
8071 and only one iteration of the loop will be executed. */
8072 op
= vect_get_store_rhs (next_stmt_info
);
8073 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
8074 ncopies
, op
, &gvec_oprnds
[i
]);
8075 vec_oprnd
= gvec_oprnds
[i
][0];
8076 dr_chain
.quick_push (gvec_oprnds
[i
][0]);
8077 oprnds
.quick_push (gvec_oprnds
[i
][0]);
8078 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8082 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
8083 mask
, &vec_masks
, mask_vectype
);
8084 vec_mask
= vec_masks
[0];
8088 /* We should have catched mismatched types earlier. */
8089 gcc_assert (useless_type_conversion_p (vectype
,
8090 TREE_TYPE (vec_oprnd
)));
8091 bool simd_lane_access_p
8092 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8093 if (simd_lane_access_p
8095 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8096 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8097 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
8098 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8099 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8100 get_alias_set (TREE_TYPE (ref_type
))))
8102 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8103 dataref_offset
= build_int_cst (ref_type
, 0);
8105 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8107 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
8108 &gs_info
, &dataref_ptr
,
8110 vec_offset
= vec_offsets
[0];
8114 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
8115 simd_lane_access_p
? loop
: NULL
,
8116 offset
, &dummy
, gsi
, &ptr_incr
,
8117 simd_lane_access_p
, bump
);
8121 /* For interleaved stores we created vectorized defs for all the
8122 defs stored in OPRNDS in the previous iteration (previous copy).
8123 DR_CHAIN is then used as an input to vect_permute_store_chain().
8124 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8125 OPRNDS are of size 1. */
8126 for (i
= 0; i
< group_size
; i
++)
8128 vec_oprnd
= gvec_oprnds
[i
][j
];
8129 dr_chain
[i
] = gvec_oprnds
[i
][j
];
8130 oprnds
[i
] = gvec_oprnds
[i
][j
];
8133 vec_mask
= vec_masks
[j
];
8136 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8137 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8138 vec_offset
= vec_offsets
[j
];
8140 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8144 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8148 /* Get an array into which we can store the individual vectors. */
8149 vec_array
= create_vector_array (vectype
, vec_num
);
8151 /* Invalidate the current contents of VEC_ARRAY. This should
8152 become an RTL clobber too, which prevents the vector registers
8153 from being upward-exposed. */
8154 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8156 /* Store the individual vectors into the array. */
8157 for (i
= 0; i
< vec_num
; i
++)
8159 vec_oprnd
= dr_chain
[i
];
8160 write_vector_array (vinfo
, stmt_info
,
8161 gsi
, vec_oprnd
, vec_array
, i
);
8164 tree final_mask
= NULL
;
8166 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8169 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8176 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8178 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8179 tree alias_ptr
= build_int_cst (ref_type
, align
);
8180 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8181 dataref_ptr
, alias_ptr
,
8182 final_mask
, vec_array
);
8187 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8188 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8189 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8191 gimple_call_set_lhs (call
, data_ref
);
8193 gimple_call_set_nothrow (call
, true);
8194 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8197 /* Record that VEC_ARRAY is now dead. */
8198 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8206 result_chain
.create (group_size
);
8208 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
8209 gsi
, &result_chain
);
8212 stmt_vec_info next_stmt_info
= first_stmt_info
;
8213 for (i
= 0; i
< vec_num
; i
++)
8216 unsigned HOST_WIDE_INT align
;
8218 tree final_mask
= NULL_TREE
;
8220 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8222 vectype
, vec_num
* j
+ i
);
8224 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8227 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8229 tree scale
= size_int (gs_info
.scale
);
8232 call
= gimple_build_call_internal
8233 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8234 scale
, vec_oprnd
, final_mask
);
8236 call
= gimple_build_call_internal
8237 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8239 gimple_call_set_nothrow (call
, true);
8240 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8246 /* Bump the vector pointer. */
8247 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
8248 gsi
, stmt_info
, bump
);
8251 vec_oprnd
= vec_oprnds
[i
];
8252 else if (grouped_store
)
8253 /* For grouped stores vectorized defs are interleaved in
8254 vect_permute_store_chain(). */
8255 vec_oprnd
= result_chain
[i
];
8257 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8258 if (alignment_support_scheme
== dr_aligned
)
8260 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
8262 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
8266 misalign
= misalignment
;
8267 if (dataref_offset
== NULL_TREE
8268 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8269 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8271 align
= least_bit_hwi (misalign
| align
);
8273 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8275 tree perm_mask
= perm_mask_for_reverse (vectype
);
8276 tree perm_dest
= vect_create_destination_var
8277 (vect_get_store_rhs (stmt_info
), vectype
);
8278 tree new_temp
= make_ssa_name (perm_dest
);
8280 /* Generate the permute statement. */
8282 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8283 vec_oprnd
, perm_mask
);
8284 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8286 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8287 vec_oprnd
= new_temp
;
8290 /* Arguments are ready. Create the new vector stmt. */
8293 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8295 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8297 final_mask
, vec_oprnd
);
8298 gimple_call_set_nothrow (call
, true);
8299 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8305 = vect_get_loop_len (loop_vinfo
, loop_lens
,
8306 vec_num
* ncopies
, vec_num
* j
+ i
);
8307 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8308 machine_mode vmode
= TYPE_MODE (vectype
);
8309 opt_machine_mode new_ovmode
8310 = get_len_load_store_mode (vmode
, false);
8311 machine_mode new_vmode
= new_ovmode
.require ();
8312 /* Need conversion if it's wrapped with VnQI. */
8313 if (vmode
!= new_vmode
)
8316 = build_vector_type_for_mode (unsigned_intQI_type_node
,
8319 = vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
8321 = build1 (VIEW_CONVERT_EXPR
, new_vtype
, vec_oprnd
);
8323 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
,
8325 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
8330 = gimple_build_call_internal (IFN_LEN_STORE
, 4, dataref_ptr
,
8331 ptr
, final_len
, vec_oprnd
);
8332 gimple_call_set_nothrow (call
, true);
8333 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8338 data_ref
= fold_build2 (MEM_REF
, vectype
,
8342 : build_int_cst (ref_type
, 0));
8343 if (alignment_support_scheme
== dr_aligned
)
8346 TREE_TYPE (data_ref
)
8347 = build_aligned_type (TREE_TYPE (data_ref
),
8348 align
* BITS_PER_UNIT
);
8349 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8350 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
8351 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8357 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8358 if (!next_stmt_info
)
8365 *vec_stmt
= new_stmt
;
8366 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8370 for (i
= 0; i
< group_size
; ++i
)
8372 vec
<tree
> oprndsi
= gvec_oprnds
[i
];
8376 result_chain
.release ();
8377 vec_oprnds
.release ();
8382 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8383 VECTOR_CST mask. No checks are made that the target platform supports the
8384 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8385 vect_gen_perm_mask_checked. */
8388 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8392 poly_uint64 nunits
= sel
.length ();
8393 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8395 mask_type
= build_vector_type (ssizetype
, nunits
);
8396 return vec_perm_indices_to_tree (mask_type
, sel
);
8399 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8400 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8403 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8405 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
8406 return vect_gen_perm_mask_any (vectype
, sel
);
8409 /* Given a vector variable X and Y, that was generated for the scalar
8410 STMT_INFO, generate instructions to permute the vector elements of X and Y
8411 using permutation mask MASK_VEC, insert them at *GSI and return the
8412 permuted vector variable. */
8415 permute_vec_elements (vec_info
*vinfo
,
8416 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8417 gimple_stmt_iterator
*gsi
)
8419 tree vectype
= TREE_TYPE (x
);
8420 tree perm_dest
, data_ref
;
8423 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8424 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8425 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8427 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8428 data_ref
= make_ssa_name (perm_dest
);
8430 /* Generate the permute statement. */
8431 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8432 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8437 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8438 inserting them on the loops preheader edge. Returns true if we
8439 were successful in doing so (and thus STMT_INFO can be moved then),
8440 otherwise returns false. */
8443 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8449 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8451 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8452 if (!gimple_nop_p (def_stmt
)
8453 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8455 /* Make sure we don't need to recurse. While we could do
8456 so in simple cases when there are more complex use webs
8457 we don't have an easy way to preserve stmt order to fulfil
8458 dependencies within them. */
8461 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8463 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8465 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8466 if (!gimple_nop_p (def_stmt2
)
8467 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8477 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8479 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8480 if (!gimple_nop_p (def_stmt
)
8481 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8483 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8484 gsi_remove (&gsi
, false);
8485 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8492 /* vectorizable_load.
8494 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8495 that can be vectorized.
8496 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8497 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8498 Return true if STMT_INFO is vectorizable in this way. */
8501 vectorizable_load (vec_info
*vinfo
,
8502 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8503 gimple
**vec_stmt
, slp_tree slp_node
,
8504 stmt_vector_for_cost
*cost_vec
)
8507 tree vec_dest
= NULL
;
8508 tree data_ref
= NULL
;
8509 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8510 class loop
*loop
= NULL
;
8511 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8512 bool nested_in_vect_loop
= false;
8517 tree dataref_ptr
= NULL_TREE
;
8518 tree dataref_offset
= NULL_TREE
;
8519 gimple
*ptr_incr
= NULL
;
8522 unsigned int group_size
;
8523 poly_uint64 group_gap_adj
;
8524 tree msq
= NULL_TREE
, lsq
;
8525 tree realignment_token
= NULL_TREE
;
8527 vec
<tree
> dr_chain
= vNULL
;
8528 bool grouped_load
= false;
8529 stmt_vec_info first_stmt_info
;
8530 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8531 bool compute_in_loop
= false;
8532 class loop
*at_loop
;
8534 bool slp
= (slp_node
!= NULL
);
8535 bool slp_perm
= false;
8536 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8539 gather_scatter_info gs_info
;
8541 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8543 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8546 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8550 if (!STMT_VINFO_DATA_REF (stmt_info
))
8553 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8554 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8556 scalar_dest
= gimple_assign_lhs (assign
);
8557 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8560 tree_code code
= gimple_assign_rhs_code (assign
);
8561 if (code
!= ARRAY_REF
8562 && code
!= BIT_FIELD_REF
8563 && code
!= INDIRECT_REF
8564 && code
!= COMPONENT_REF
8565 && code
!= IMAGPART_EXPR
8566 && code
!= REALPART_EXPR
8568 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8573 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8574 if (!call
|| !gimple_call_internal_p (call
))
8577 internal_fn ifn
= gimple_call_internal_fn (call
);
8578 if (!internal_load_fn_p (ifn
))
8581 scalar_dest
= gimple_call_lhs (call
);
8585 int mask_index
= internal_fn_mask_index (ifn
);
8587 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
,
8588 /* ??? For SLP we only have operands for
8589 the mask operand. */
8590 slp_node
? 0 : mask_index
,
8591 &mask
, NULL
, &mask_dt
, &mask_vectype
))
8595 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8596 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8600 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8601 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8602 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8607 /* Multiple types in SLP are handled by creating the appropriate number of
8608 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8613 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8615 gcc_assert (ncopies
>= 1);
8617 /* FORNOW. This restriction should be relaxed. */
8618 if (nested_in_vect_loop
&& ncopies
> 1)
8620 if (dump_enabled_p ())
8621 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8622 "multiple types in nested loop.\n");
8626 /* Invalidate assumptions made by dependence analysis when vectorization
8627 on the unrolled body effectively re-orders stmts. */
8629 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8630 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8631 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8633 if (dump_enabled_p ())
8634 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8635 "cannot perform implicit CSE when unrolling "
8636 "with negative dependence distance\n");
8640 elem_type
= TREE_TYPE (vectype
);
8641 mode
= TYPE_MODE (vectype
);
8643 /* FORNOW. In some cases can vectorize even if data-type not supported
8644 (e.g. - data copies). */
8645 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8647 if (dump_enabled_p ())
8648 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8649 "Aligned load, but unsupported type.\n");
8653 /* Check if the load is a part of an interleaving chain. */
8654 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8656 grouped_load
= true;
8658 gcc_assert (!nested_in_vect_loop
);
8659 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8661 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8662 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8664 /* Refuse non-SLP vectorization of SLP-only groups. */
8665 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8667 if (dump_enabled_p ())
8668 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8669 "cannot vectorize load in non-SLP mode.\n");
8673 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8679 /* In BB vectorization we may not actually use a loaded vector
8680 accessing elements in excess of DR_GROUP_SIZE. */
8681 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8682 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
8683 unsigned HOST_WIDE_INT nunits
;
8684 unsigned j
, k
, maxk
= 0;
8685 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
8688 tree vectype
= SLP_TREE_VECTYPE (slp_node
);
8689 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
8690 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
8692 if (dump_enabled_p ())
8693 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8694 "BB vectorization with gaps at the end of "
8695 "a load is not supported\n");
8702 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
8705 if (dump_enabled_p ())
8706 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
8708 "unsupported load permutation\n");
8713 /* Invalidate assumptions made by dependence analysis when vectorization
8714 on the unrolled body effectively re-orders stmts. */
8715 if (!PURE_SLP_STMT (stmt_info
)
8716 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8717 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8718 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8720 if (dump_enabled_p ())
8721 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8722 "cannot perform implicit CSE when performing "
8723 "group loads with negative dependence distance\n");
8730 vect_memory_access_type memory_access_type
;
8731 enum dr_alignment_support alignment_support_scheme
;
8734 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, VLS_LOAD
,
8735 ncopies
, &memory_access_type
, &poffset
,
8736 &alignment_support_scheme
, &misalignment
, &gs_info
))
8741 if (memory_access_type
== VMAT_CONTIGUOUS
)
8743 machine_mode vec_mode
= TYPE_MODE (vectype
);
8744 if (!VECTOR_MODE_P (vec_mode
)
8745 || !can_vec_mask_load_store_p (vec_mode
,
8746 TYPE_MODE (mask_vectype
), true))
8749 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8750 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8752 if (dump_enabled_p ())
8753 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8754 "unsupported access type for masked load.\n");
8757 else if (memory_access_type
== VMAT_GATHER_SCATTER
8758 && gs_info
.ifn
== IFN_LAST
8761 if (dump_enabled_p ())
8762 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8763 "unsupported masked emulated gather.\n");
8768 if (!vec_stmt
) /* transformation not required. */
8772 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
8775 if (dump_enabled_p ())
8776 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8777 "incompatible vector types for invariants\n");
8782 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8785 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
8786 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, VLS_LOAD
,
8787 group_size
, memory_access_type
,
8790 if (dump_enabled_p ()
8791 && memory_access_type
!= VMAT_ELEMENTWISE
8792 && memory_access_type
!= VMAT_GATHER_SCATTER
8793 && alignment_support_scheme
!= dr_aligned
)
8794 dump_printf_loc (MSG_NOTE
, vect_location
,
8795 "Vectorizing an unaligned access.\n");
8797 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
8798 vect_model_load_cost (vinfo
, stmt_info
, ncopies
, vf
, memory_access_type
,
8799 alignment_support_scheme
, misalignment
,
8800 &gs_info
, slp_node
, cost_vec
);
8805 gcc_assert (memory_access_type
8806 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8808 if (dump_enabled_p ())
8809 dump_printf_loc (MSG_NOTE
, vect_location
,
8810 "transform load. ncopies = %d\n", ncopies
);
8814 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8815 ensure_base_align (dr_info
);
8817 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8819 vect_build_gather_load_calls (vinfo
,
8820 stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
8824 if (memory_access_type
== VMAT_INVARIANT
)
8826 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
8827 /* If we have versioned for aliasing or the loop doesn't
8828 have any data dependencies that would preclude this,
8829 then we are sure this is a loop invariant load and
8830 thus we can insert it on the preheader edge. */
8831 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8832 && !nested_in_vect_loop
8833 && hoist_defs_of_uses (stmt_info
, loop
));
8836 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
8837 if (dump_enabled_p ())
8838 dump_printf_loc (MSG_NOTE
, vect_location
,
8839 "hoisting out of the vectorized loop: %G", stmt
);
8840 scalar_dest
= copy_ssa_name (scalar_dest
);
8841 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
8842 gsi_insert_on_edge_immediate
8843 (loop_preheader_edge (loop
),
8844 gimple_build_assign (scalar_dest
, rhs
));
8846 /* These copies are all equivalent, but currently the representation
8847 requires a separate STMT_VINFO_VEC_STMT for each one. */
8848 gimple_stmt_iterator gsi2
= *gsi
;
8850 for (j
= 0; j
< ncopies
; j
++)
8853 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8856 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8858 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8860 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8864 *vec_stmt
= new_stmt
;
8865 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8871 if (memory_access_type
== VMAT_ELEMENTWISE
8872 || memory_access_type
== VMAT_STRIDED_SLP
)
8874 gimple_stmt_iterator incr_gsi
;
8879 vec
<constructor_elt
, va_gc
> *v
= NULL
;
8880 tree stride_base
, stride_step
, alias_off
;
8881 /* Checked by get_load_store_type. */
8882 unsigned int const_nunits
= nunits
.to_constant ();
8883 unsigned HOST_WIDE_INT cst_offset
= 0;
8886 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
));
8887 gcc_assert (!nested_in_vect_loop
);
8891 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8892 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8896 first_stmt_info
= stmt_info
;
8897 first_dr_info
= dr_info
;
8899 if (slp
&& grouped_load
)
8901 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8902 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8908 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
8909 * vect_get_place_in_interleaving_chain (stmt_info
,
8912 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
8915 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
8917 = fold_build_pointer_plus
8918 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8919 size_binop (PLUS_EXPR
,
8920 convert_to_ptrofftype (dr_offset
),
8921 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8922 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8924 /* For a load with loop-invariant (but other than power-of-2)
8925 stride (i.e. not a grouped access) like so:
8927 for (i = 0; i < n; i += stride)
8930 we generate a new induction variable and new accesses to
8931 form a new vector (or vectors, depending on ncopies):
8933 for (j = 0; ; j += VF*stride)
8935 tmp2 = array[j + stride];
8937 vectemp = {tmp1, tmp2, ...}
8940 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
8941 build_int_cst (TREE_TYPE (stride_step
), vf
));
8943 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8945 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8946 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8947 create_iv (stride_base
, ivstep
, NULL
,
8948 loop
, &incr_gsi
, insert_after
,
8951 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8953 running_off
= offvar
;
8954 alias_off
= build_int_cst (ref_type
, 0);
8955 int nloads
= const_nunits
;
8957 tree ltype
= TREE_TYPE (vectype
);
8958 tree lvectype
= vectype
;
8959 auto_vec
<tree
> dr_chain
;
8960 if (memory_access_type
== VMAT_STRIDED_SLP
)
8962 if (group_size
< const_nunits
)
8964 /* First check if vec_init optab supports construction from vector
8965 elts directly. Otherwise avoid emitting a constructor of
8966 vector elements by performing the loads using an integer type
8967 of the same size, constructing a vector of those and then
8968 re-interpreting it as the original vector type. This avoids a
8969 huge runtime penalty due to the general inability to perform
8970 store forwarding from smaller stores to a larger load. */
8973 = vector_vector_composition_type (vectype
,
8974 const_nunits
/ group_size
,
8976 if (vtype
!= NULL_TREE
)
8978 nloads
= const_nunits
/ group_size
;
8987 lnel
= const_nunits
;
8990 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
8992 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8993 else if (nloads
== 1)
8998 /* For SLP permutation support we need to load the whole group,
8999 not only the number of vector stmts the permutation result
9003 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9005 unsigned int const_vf
= vf
.to_constant ();
9006 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
9007 dr_chain
.create (ncopies
);
9010 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9012 unsigned int group_el
= 0;
9013 unsigned HOST_WIDE_INT
9014 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
9015 for (j
= 0; j
< ncopies
; j
++)
9018 vec_alloc (v
, nloads
);
9019 gimple
*new_stmt
= NULL
;
9020 for (i
= 0; i
< nloads
; i
++)
9022 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
9023 group_el
* elsz
+ cst_offset
);
9024 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
9025 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9026 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
9027 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9029 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9030 gimple_assign_lhs (new_stmt
));
9034 || group_el
== group_size
)
9036 tree newoff
= copy_ssa_name (running_off
);
9037 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
9038 running_off
, stride_step
);
9039 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
9041 running_off
= newoff
;
9047 tree vec_inv
= build_constructor (lvectype
, v
);
9048 new_temp
= vect_init_vector (vinfo
, stmt_info
,
9049 vec_inv
, lvectype
, gsi
);
9050 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9051 if (lvectype
!= vectype
)
9053 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
9055 build1 (VIEW_CONVERT_EXPR
,
9056 vectype
, new_temp
));
9057 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9064 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
9066 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9071 *vec_stmt
= new_stmt
;
9072 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9078 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
9084 if (memory_access_type
== VMAT_GATHER_SCATTER
9085 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
9086 grouped_load
= false;
9090 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9091 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9092 /* For SLP vectorization we directly vectorize a subchain
9093 without permutation. */
9094 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9095 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9096 /* For BB vectorization always use the first stmt to base
9097 the data ref pointer on. */
9099 first_stmt_info_for_drptr
9100 = vect_find_first_scalar_stmt_in_slp (slp_node
);
9102 /* Check if the chain of loads is already vectorized. */
9103 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
9104 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9105 ??? But we can only do so if there is exactly one
9106 as we have no way to get at the rest. Leave the CSE
9108 ??? With the group load eventually participating
9109 in multiple different permutations (having multiple
9110 slp nodes which refer to the same group) the CSE
9111 is even wrong code. See PR56270. */
9114 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9117 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9120 /* VEC_NUM is the number of vect stmts to be created for this group. */
9123 grouped_load
= false;
9124 /* If an SLP permutation is from N elements to N elements,
9125 and if one vector holds a whole number of N, we can load
9126 the inputs to the permutation in the same way as an
9127 unpermuted sequence. In other cases we need to load the
9128 whole group, not only the number of vector stmts the
9129 permutation result fits in. */
9130 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
9132 && (group_size
!= scalar_lanes
9133 || !multiple_p (nunits
, group_size
)))
9135 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9136 variable VF; see vect_transform_slp_perm_load. */
9137 unsigned int const_vf
= vf
.to_constant ();
9138 unsigned int const_nunits
= nunits
.to_constant ();
9139 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
9140 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
9144 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9146 = group_size
- scalar_lanes
;
9150 vec_num
= group_size
;
9152 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9156 first_stmt_info
= stmt_info
;
9157 first_dr_info
= dr_info
;
9158 group_size
= vec_num
= 1;
9160 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
9163 gcc_assert (alignment_support_scheme
);
9164 vec_loop_masks
*loop_masks
9165 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
9166 ? &LOOP_VINFO_MASKS (loop_vinfo
)
9168 vec_loop_lens
*loop_lens
9169 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
9170 ? &LOOP_VINFO_LENS (loop_vinfo
)
9173 /* Shouldn't go with length-based approach if fully masked. */
9174 gcc_assert (!loop_lens
|| !loop_masks
);
9176 /* Targets with store-lane instructions must not require explicit
9177 realignment. vect_supportable_dr_alignment always returns either
9178 dr_aligned or dr_unaligned_supported for masked operations. */
9179 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
9182 || alignment_support_scheme
== dr_aligned
9183 || alignment_support_scheme
== dr_unaligned_supported
);
9185 /* In case the vectorization factor (VF) is bigger than the number
9186 of elements that we can fit in a vectype (nunits), we have to generate
9187 more than one vector stmt - i.e - we need to "unroll" the
9188 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9189 from one copy of the vector stmt to the next, in the field
9190 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9191 stages to find the correct vector defs to be used when vectorizing
9192 stmts that use the defs of the current stmt. The example below
9193 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9194 need to create 4 vectorized stmts):
9196 before vectorization:
9197 RELATED_STMT VEC_STMT
9201 step 1: vectorize stmt S1:
9202 We first create the vector stmt VS1_0, and, as usual, record a
9203 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9204 Next, we create the vector stmt VS1_1, and record a pointer to
9205 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9206 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9208 RELATED_STMT VEC_STMT
9209 VS1_0: vx0 = memref0 VS1_1 -
9210 VS1_1: vx1 = memref1 VS1_2 -
9211 VS1_2: vx2 = memref2 VS1_3 -
9212 VS1_3: vx3 = memref3 - -
9213 S1: x = load - VS1_0
9217 /* In case of interleaving (non-unit grouped access):
9224 Vectorized loads are created in the order of memory accesses
9225 starting from the access of the first stmt of the chain:
9228 VS2: vx1 = &base + vec_size*1
9229 VS3: vx3 = &base + vec_size*2
9230 VS4: vx4 = &base + vec_size*3
9232 Then permutation statements are generated:
9234 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9235 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9238 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9239 (the order of the data-refs in the output of vect_permute_load_chain
9240 corresponds to the order of scalar stmts in the interleaving chain - see
9241 the documentation of vect_permute_load_chain()).
9242 The generation of permutation stmts and recording them in
9243 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9245 In case of both multiple types and interleaving, the vector loads and
9246 permutation stmts above are created for every copy. The result vector
9247 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9248 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9250 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9251 on a target that supports unaligned accesses (dr_unaligned_supported)
9252 we generate the following code:
9256 p = p + indx * vectype_size;
9261 Otherwise, the data reference is potentially unaligned on a target that
9262 does not support unaligned accesses (dr_explicit_realign_optimized) -
9263 then generate the following code, in which the data in each iteration is
9264 obtained by two vector loads, one from the previous iteration, and one
9265 from the current iteration:
9267 msq_init = *(floor(p1))
9268 p2 = initial_addr + VS - 1;
9269 realignment_token = call target_builtin;
9272 p2 = p2 + indx * vectype_size
9274 vec_dest = realign_load (msq, lsq, realignment_token)
9279 /* If the misalignment remains the same throughout the execution of the
9280 loop, we can create the init_addr and permutation mask at the loop
9281 preheader. Otherwise, it needs to be created inside the loop.
9282 This can only occur when vectorizing memory accesses in the inner-loop
9283 nested within an outer-loop that is being vectorized. */
9285 if (nested_in_vect_loop
9286 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9287 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9289 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9290 compute_in_loop
= true;
9293 bool diff_first_stmt_info
9294 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
9296 tree offset
= NULL_TREE
;
9297 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9298 || alignment_support_scheme
== dr_explicit_realign
)
9299 && !compute_in_loop
)
9301 /* If we have different first_stmt_info, we can't set up realignment
9302 here, since we can't guarantee first_stmt_info DR has been
9303 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9304 distance from first_stmt_info DR instead as below. */
9305 if (!diff_first_stmt_info
)
9306 msq
= vect_setup_realignment (vinfo
,
9307 first_stmt_info
, gsi
, &realignment_token
,
9308 alignment_support_scheme
, NULL_TREE
,
9310 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9312 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9313 offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9315 gcc_assert (!first_stmt_info_for_drptr
);
9321 if (!known_eq (poffset
, 0))
9323 ? size_binop (PLUS_EXPR
, offset
, size_int (poffset
))
9324 : size_int (poffset
));
9327 tree vec_offset
= NULL_TREE
;
9328 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9330 aggr_type
= NULL_TREE
;
9333 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9335 aggr_type
= elem_type
;
9336 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9337 &bump
, &vec_offset
);
9341 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9342 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9344 aggr_type
= vectype
;
9345 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
9346 memory_access_type
);
9349 vec
<tree
> vec_offsets
= vNULL
;
9350 auto_vec
<tree
> vec_masks
;
9352 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
9353 mask
, &vec_masks
, mask_vectype
, NULL_TREE
);
9354 tree vec_mask
= NULL_TREE
;
9355 poly_uint64 group_elt
= 0;
9356 for (j
= 0; j
< ncopies
; j
++)
9358 /* 1. Create the vector or array pointer update chain. */
9361 bool simd_lane_access_p
9362 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9363 if (simd_lane_access_p
9364 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9365 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9366 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9367 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9368 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9369 get_alias_set (TREE_TYPE (ref_type
)))
9370 && (alignment_support_scheme
== dr_aligned
9371 || alignment_support_scheme
== dr_unaligned_supported
))
9373 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9374 dataref_offset
= build_int_cst (ref_type
, 0);
9376 else if (diff_first_stmt_info
)
9379 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
9380 aggr_type
, at_loop
, offset
, &dummy
,
9381 gsi
, &ptr_incr
, simd_lane_access_p
,
9383 /* Adjust the pointer by the difference to first_stmt. */
9384 data_reference_p ptrdr
9385 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9387 = fold_convert (sizetype
,
9388 size_binop (MINUS_EXPR
,
9389 DR_INIT (first_dr_info
->dr
),
9391 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9393 if (alignment_support_scheme
== dr_explicit_realign
)
9395 msq
= vect_setup_realignment (vinfo
,
9396 first_stmt_info_for_drptr
, gsi
,
9398 alignment_support_scheme
,
9399 dataref_ptr
, &at_loop
);
9400 gcc_assert (!compute_in_loop
);
9403 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9405 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
9406 &gs_info
, &dataref_ptr
,
9411 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9413 offset
, &dummy
, gsi
, &ptr_incr
,
9414 simd_lane_access_p
, bump
);
9416 vec_mask
= vec_masks
[0];
9421 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9423 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9424 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9427 vec_mask
= vec_masks
[j
];
9430 if (grouped_load
|| slp_perm
)
9431 dr_chain
.create (vec_num
);
9433 gimple
*new_stmt
= NULL
;
9434 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9438 vec_array
= create_vector_array (vectype
, vec_num
);
9440 tree final_mask
= NULL_TREE
;
9442 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9445 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9452 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9454 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
9455 tree alias_ptr
= build_int_cst (ref_type
, align
);
9456 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9457 dataref_ptr
, alias_ptr
,
9463 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9464 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9465 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9467 gimple_call_set_lhs (call
, vec_array
);
9468 gimple_call_set_nothrow (call
, true);
9469 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9472 /* Extract each vector into an SSA_NAME. */
9473 for (i
= 0; i
< vec_num
; i
++)
9475 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
9477 dr_chain
.quick_push (new_temp
);
9480 /* Record the mapping between SSA_NAMEs and statements. */
9481 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
9483 /* Record that VEC_ARRAY is now dead. */
9484 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
9488 for (i
= 0; i
< vec_num
; i
++)
9490 tree final_mask
= NULL_TREE
;
9492 && memory_access_type
!= VMAT_INVARIANT
)
9493 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9495 vectype
, vec_num
* j
+ i
);
9497 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9501 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9502 gsi
, stmt_info
, bump
);
9504 /* 2. Create the vector-load in the loop. */
9505 switch (alignment_support_scheme
)
9508 case dr_unaligned_supported
:
9510 unsigned int misalign
;
9511 unsigned HOST_WIDE_INT align
;
9513 if (memory_access_type
== VMAT_GATHER_SCATTER
9514 && gs_info
.ifn
!= IFN_LAST
)
9516 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9517 vec_offset
= vec_offsets
[j
];
9518 tree zero
= build_zero_cst (vectype
);
9519 tree scale
= size_int (gs_info
.scale
);
9522 call
= gimple_build_call_internal
9523 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
9524 vec_offset
, scale
, zero
, final_mask
);
9526 call
= gimple_build_call_internal
9527 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
9528 vec_offset
, scale
, zero
);
9529 gimple_call_set_nothrow (call
, true);
9531 data_ref
= NULL_TREE
;
9534 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9536 /* Emulated gather-scatter. */
9537 gcc_assert (!final_mask
);
9538 unsigned HOST_WIDE_INT const_nunits
9539 = nunits
.to_constant ();
9540 unsigned HOST_WIDE_INT const_offset_nunits
9541 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
)
9543 vec
<constructor_elt
, va_gc
> *ctor_elts
;
9544 vec_alloc (ctor_elts
, const_nunits
);
9545 gimple_seq stmts
= NULL
;
9546 /* We support offset vectors with more elements
9547 than the data vector for now. */
9548 unsigned HOST_WIDE_INT factor
9549 = const_offset_nunits
/ const_nunits
;
9550 vec_offset
= vec_offsets
[j
/ factor
];
9551 unsigned elt_offset
= (j
% factor
) * const_nunits
;
9552 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
9553 tree scale
= size_int (gs_info
.scale
);
9555 = get_object_alignment (DR_REF (first_dr_info
->dr
));
9556 tree ltype
= build_aligned_type (TREE_TYPE (vectype
),
9558 for (unsigned k
= 0; k
< const_nunits
; ++k
)
9560 tree boff
= size_binop (MULT_EXPR
,
9561 TYPE_SIZE (idx_type
),
9564 tree idx
= gimple_build (&stmts
, BIT_FIELD_REF
,
9565 idx_type
, vec_offset
,
9566 TYPE_SIZE (idx_type
),
9568 idx
= gimple_convert (&stmts
, sizetype
, idx
);
9569 idx
= gimple_build (&stmts
, MULT_EXPR
,
9570 sizetype
, idx
, scale
);
9571 tree ptr
= gimple_build (&stmts
, PLUS_EXPR
,
9572 TREE_TYPE (dataref_ptr
),
9574 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
9575 tree elt
= make_ssa_name (TREE_TYPE (vectype
));
9576 tree ref
= build2 (MEM_REF
, ltype
, ptr
,
9577 build_int_cst (ref_type
, 0));
9578 new_stmt
= gimple_build_assign (elt
, ref
);
9579 gimple_seq_add_stmt (&stmts
, new_stmt
);
9580 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
, elt
);
9582 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
9583 new_stmt
= gimple_build_assign (NULL_TREE
,
9585 (vectype
, ctor_elts
));
9586 data_ref
= NULL_TREE
;
9591 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9592 if (alignment_support_scheme
== dr_aligned
)
9594 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
9596 align
= dr_alignment
9597 (vect_dr_behavior (vinfo
, first_dr_info
));
9601 misalign
= misalignment
;
9602 if (dataref_offset
== NULL_TREE
9603 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9604 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9606 align
= least_bit_hwi (misalign
| align
);
9610 tree ptr
= build_int_cst (ref_type
,
9611 align
* BITS_PER_UNIT
);
9613 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9616 gimple_call_set_nothrow (call
, true);
9618 data_ref
= NULL_TREE
;
9620 else if (loop_lens
&& memory_access_type
!= VMAT_INVARIANT
)
9623 = vect_get_loop_len (loop_vinfo
, loop_lens
,
9626 tree ptr
= build_int_cst (ref_type
,
9627 align
* BITS_PER_UNIT
);
9629 = gimple_build_call_internal (IFN_LEN_LOAD
, 3,
9632 gimple_call_set_nothrow (call
, true);
9634 data_ref
= NULL_TREE
;
9636 /* Need conversion if it's wrapped with VnQI. */
9637 machine_mode vmode
= TYPE_MODE (vectype
);
9638 opt_machine_mode new_ovmode
9639 = get_len_load_store_mode (vmode
, true);
9640 machine_mode new_vmode
= new_ovmode
.require ();
9641 if (vmode
!= new_vmode
)
9643 tree qi_type
= unsigned_intQI_type_node
;
9645 = build_vector_type_for_mode (qi_type
, new_vmode
);
9646 tree var
= vect_get_new_ssa_name (new_vtype
,
9648 gimple_set_lhs (call
, var
);
9649 vect_finish_stmt_generation (vinfo
, stmt_info
, call
,
9651 tree op
= build1 (VIEW_CONVERT_EXPR
, vectype
, var
);
9653 = gimple_build_assign (vec_dest
,
9654 VIEW_CONVERT_EXPR
, op
);
9659 tree ltype
= vectype
;
9660 tree new_vtype
= NULL_TREE
;
9661 unsigned HOST_WIDE_INT gap
9662 = DR_GROUP_GAP (first_stmt_info
);
9663 unsigned int vect_align
9664 = vect_known_alignment_in_bytes (first_dr_info
,
9666 unsigned int scalar_dr_size
9667 = vect_get_scalar_dr_size (first_dr_info
);
9668 /* If there's no peeling for gaps but we have a gap
9669 with slp loads then load the lower half of the
9670 vector only. See get_group_load_store_type for
9671 when we apply this optimization. */
9674 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9676 && known_eq (nunits
, (group_size
- gap
) * 2)
9677 && known_eq (nunits
, group_size
)
9678 && gap
>= (vect_align
/ scalar_dr_size
))
9682 = vector_vector_composition_type (vectype
, 2,
9684 if (new_vtype
!= NULL_TREE
)
9688 = (dataref_offset
? dataref_offset
9689 : build_int_cst (ref_type
, 0));
9690 if (ltype
!= vectype
9691 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9693 unsigned HOST_WIDE_INT gap_offset
9694 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
9695 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
9696 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
9699 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
9700 if (alignment_support_scheme
== dr_aligned
)
9703 TREE_TYPE (data_ref
)
9704 = build_aligned_type (TREE_TYPE (data_ref
),
9705 align
* BITS_PER_UNIT
);
9706 if (ltype
!= vectype
)
9708 vect_copy_ref_info (data_ref
,
9709 DR_REF (first_dr_info
->dr
));
9710 tree tem
= make_ssa_name (ltype
);
9711 new_stmt
= gimple_build_assign (tem
, data_ref
);
9712 vect_finish_stmt_generation (vinfo
, stmt_info
,
9715 vec
<constructor_elt
, va_gc
> *v
;
9717 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9719 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9720 build_zero_cst (ltype
));
9721 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9725 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9726 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9727 build_zero_cst (ltype
));
9729 gcc_assert (new_vtype
!= NULL_TREE
);
9730 if (new_vtype
== vectype
)
9731 new_stmt
= gimple_build_assign (
9732 vec_dest
, build_constructor (vectype
, v
));
9735 tree new_vname
= make_ssa_name (new_vtype
);
9736 new_stmt
= gimple_build_assign (
9737 new_vname
, build_constructor (new_vtype
, v
));
9738 vect_finish_stmt_generation (vinfo
, stmt_info
,
9740 new_stmt
= gimple_build_assign (
9741 vec_dest
, build1 (VIEW_CONVERT_EXPR
, vectype
,
9748 case dr_explicit_realign
:
9752 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9754 if (compute_in_loop
)
9755 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
9757 dr_explicit_realign
,
9760 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9761 ptr
= copy_ssa_name (dataref_ptr
);
9763 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9764 // For explicit realign the target alignment should be
9765 // known at compile time.
9766 unsigned HOST_WIDE_INT align
=
9767 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9768 new_stmt
= gimple_build_assign
9769 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
9771 (TREE_TYPE (dataref_ptr
),
9772 -(HOST_WIDE_INT
) align
));
9773 vect_finish_stmt_generation (vinfo
, stmt_info
,
9776 = build2 (MEM_REF
, vectype
, ptr
,
9777 build_int_cst (ref_type
, 0));
9778 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9779 vec_dest
= vect_create_destination_var (scalar_dest
,
9781 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9782 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9783 gimple_assign_set_lhs (new_stmt
, new_temp
);
9784 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
9785 vect_finish_stmt_generation (vinfo
, stmt_info
,
9789 bump
= size_binop (MULT_EXPR
, vs
,
9790 TYPE_SIZE_UNIT (elem_type
));
9791 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
9792 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
,
9794 new_stmt
= gimple_build_assign
9795 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
9797 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
9798 ptr
= copy_ssa_name (ptr
, new_stmt
);
9799 gimple_assign_set_lhs (new_stmt
, ptr
);
9800 vect_finish_stmt_generation (vinfo
, stmt_info
,
9803 = build2 (MEM_REF
, vectype
, ptr
,
9804 build_int_cst (ref_type
, 0));
9807 case dr_explicit_realign_optimized
:
9809 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9810 new_temp
= copy_ssa_name (dataref_ptr
);
9812 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9813 // We should only be doing this if we know the target
9814 // alignment at compile time.
9815 unsigned HOST_WIDE_INT align
=
9816 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9817 new_stmt
= gimple_build_assign
9818 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
9819 build_int_cst (TREE_TYPE (dataref_ptr
),
9820 -(HOST_WIDE_INT
) align
));
9821 vect_finish_stmt_generation (vinfo
, stmt_info
,
9824 = build2 (MEM_REF
, vectype
, new_temp
,
9825 build_int_cst (ref_type
, 0));
9831 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9832 /* DATA_REF is null if we've already built the statement. */
9835 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9836 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9838 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9839 gimple_set_lhs (new_stmt
, new_temp
);
9840 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9842 /* 3. Handle explicit realignment if necessary/supported.
9844 vec_dest = realign_load (msq, lsq, realignment_token) */
9845 if (alignment_support_scheme
== dr_explicit_realign_optimized
9846 || alignment_support_scheme
== dr_explicit_realign
)
9848 lsq
= gimple_assign_lhs (new_stmt
);
9849 if (!realignment_token
)
9850 realignment_token
= dataref_ptr
;
9851 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9852 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
9853 msq
, lsq
, realignment_token
);
9854 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9855 gimple_assign_set_lhs (new_stmt
, new_temp
);
9856 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9858 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9861 if (i
== vec_num
- 1 && j
== ncopies
- 1)
9862 add_phi_arg (phi
, lsq
,
9863 loop_latch_edge (containing_loop
),
9869 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9871 tree perm_mask
= perm_mask_for_reverse (vectype
);
9872 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
9873 perm_mask
, stmt_info
, gsi
);
9874 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9877 /* Collect vector loads and later create their permutation in
9878 vect_transform_grouped_load (). */
9879 if (grouped_load
|| slp_perm
)
9880 dr_chain
.quick_push (new_temp
);
9882 /* Store vector loads in the corresponding SLP_NODE. */
9883 if (slp
&& !slp_perm
)
9884 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9886 /* With SLP permutation we load the gaps as well, without
9887 we need to skip the gaps after we manage to fully load
9888 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9889 group_elt
+= nunits
;
9890 if (maybe_ne (group_gap_adj
, 0U)
9892 && known_eq (group_elt
, group_size
- group_gap_adj
))
9894 poly_wide_int bump_val
9895 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9897 if (tree_int_cst_sgn
9898 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
9899 bump_val
= -bump_val
;
9900 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9901 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9902 gsi
, stmt_info
, bump
);
9906 /* Bump the vector pointer to account for a gap or for excess
9907 elements loaded for a permuted SLP load. */
9908 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
9910 poly_wide_int bump_val
9911 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9913 if (tree_int_cst_sgn
9914 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
9915 bump_val
= -bump_val
;
9916 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9917 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9922 if (slp
&& !slp_perm
)
9928 /* For SLP we know we've seen all possible uses of dr_chain so
9929 direct vect_transform_slp_perm_load to DCE the unused parts.
9930 ??? This is a hack to prevent compile-time issues as seen
9931 in PR101120 and friends. */
9932 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
9933 gsi
, vf
, false, &n_perms
,
9941 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
9942 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
9944 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9948 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9951 dr_chain
.release ();
9954 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9959 /* Function vect_is_simple_cond.
9962 LOOP - the loop that is being vectorized.
9963 COND - Condition that is checked for simple use.
9966 *COMP_VECTYPE - the vector type for the comparison.
9967 *DTS - The def types for the arguments of the comparison
9969 Returns whether a COND can be vectorized. Checks whether
9970 condition operands are supportable using vec_is_simple_use. */
9973 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
9974 slp_tree slp_node
, tree
*comp_vectype
,
9975 enum vect_def_type
*dts
, tree vectype
)
9978 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9982 if (TREE_CODE (cond
) == SSA_NAME
9983 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
9985 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
9986 &slp_op
, &dts
[0], comp_vectype
)
9988 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
9993 if (!COMPARISON_CLASS_P (cond
))
9996 lhs
= TREE_OPERAND (cond
, 0);
9997 rhs
= TREE_OPERAND (cond
, 1);
9999 if (TREE_CODE (lhs
) == SSA_NAME
)
10001 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
10002 &lhs
, &slp_op
, &dts
[0], &vectype1
))
10005 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
10006 || TREE_CODE (lhs
) == FIXED_CST
)
10007 dts
[0] = vect_constant_def
;
10011 if (TREE_CODE (rhs
) == SSA_NAME
)
10013 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
10014 &rhs
, &slp_op
, &dts
[1], &vectype2
))
10017 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
10018 || TREE_CODE (rhs
) == FIXED_CST
)
10019 dts
[1] = vect_constant_def
;
10023 if (vectype1
&& vectype2
10024 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10025 TYPE_VECTOR_SUBPARTS (vectype2
)))
10028 *comp_vectype
= vectype1
? vectype1
: vectype2
;
10029 /* Invariant comparison. */
10030 if (! *comp_vectype
)
10032 tree scalar_type
= TREE_TYPE (lhs
);
10033 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10034 *comp_vectype
= truth_type_for (vectype
);
10037 /* If we can widen the comparison to match vectype do so. */
10038 if (INTEGRAL_TYPE_P (scalar_type
)
10040 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
10041 TYPE_SIZE (TREE_TYPE (vectype
))))
10042 scalar_type
= build_nonstandard_integer_type
10043 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
10044 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
10052 /* vectorizable_condition.
10054 Check if STMT_INFO is conditional modify expression that can be vectorized.
10055 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10056 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10059 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10061 Return true if STMT_INFO is vectorizable in this way. */
10064 vectorizable_condition (vec_info
*vinfo
,
10065 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10067 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10069 tree scalar_dest
= NULL_TREE
;
10070 tree vec_dest
= NULL_TREE
;
10071 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
10072 tree then_clause
, else_clause
;
10073 tree comp_vectype
= NULL_TREE
;
10074 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
10075 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
10078 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10079 enum vect_def_type dts
[4]
10080 = {vect_unknown_def_type
, vect_unknown_def_type
,
10081 vect_unknown_def_type
, vect_unknown_def_type
};
10085 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10087 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10088 vec
<tree
> vec_oprnds0
= vNULL
;
10089 vec
<tree
> vec_oprnds1
= vNULL
;
10090 vec
<tree
> vec_oprnds2
= vNULL
;
10091 vec
<tree
> vec_oprnds3
= vNULL
;
10093 bool masked
= false;
10095 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10098 /* Is vectorizable conditional operation? */
10099 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10103 code
= gimple_assign_rhs_code (stmt
);
10104 if (code
!= COND_EXPR
)
10107 stmt_vec_info reduc_info
= NULL
;
10108 int reduc_index
= -1;
10109 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
10111 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
10114 if (STMT_SLP_TYPE (stmt_info
))
10116 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
10117 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
10118 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
10119 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
10120 || reduc_index
!= -1);
10124 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10128 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10129 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10134 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10138 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10142 gcc_assert (ncopies
>= 1);
10143 if (for_reduction
&& ncopies
> 1)
10144 return false; /* FORNOW */
10146 cond_expr
= gimple_assign_rhs1 (stmt
);
10148 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
10149 &comp_vectype
, &dts
[0], vectype
)
10153 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
10154 slp_tree then_slp_node
, else_slp_node
;
10155 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
10156 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
10158 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
10159 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
10162 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
10165 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
10168 masked
= !COMPARISON_CLASS_P (cond_expr
);
10169 vec_cmp_type
= truth_type_for (comp_vectype
);
10171 if (vec_cmp_type
== NULL_TREE
)
10174 cond_code
= TREE_CODE (cond_expr
);
10177 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
10178 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
10181 /* For conditional reductions, the "then" value needs to be the candidate
10182 value calculated by this iteration while the "else" value needs to be
10183 the result carried over from previous iterations. If the COND_EXPR
10184 is the other way around, we need to swap it. */
10185 bool must_invert_cmp_result
= false;
10186 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
10189 must_invert_cmp_result
= true;
10192 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
10193 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
10194 if (new_code
== ERROR_MARK
)
10195 must_invert_cmp_result
= true;
10198 cond_code
= new_code
;
10199 /* Make sure we don't accidentally use the old condition. */
10200 cond_expr
= NULL_TREE
;
10203 std::swap (then_clause
, else_clause
);
10206 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
10208 /* Boolean values may have another representation in vectors
10209 and therefore we prefer bit operations over comparison for
10210 them (which also works for scalar masks). We store opcodes
10211 to use in bitop1 and bitop2. Statement is vectorized as
10212 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10213 depending on bitop1 and bitop2 arity. */
10217 bitop1
= BIT_NOT_EXPR
;
10218 bitop2
= BIT_AND_EXPR
;
10221 bitop1
= BIT_NOT_EXPR
;
10222 bitop2
= BIT_IOR_EXPR
;
10225 bitop1
= BIT_NOT_EXPR
;
10226 bitop2
= BIT_AND_EXPR
;
10227 std::swap (cond_expr0
, cond_expr1
);
10230 bitop1
= BIT_NOT_EXPR
;
10231 bitop2
= BIT_IOR_EXPR
;
10232 std::swap (cond_expr0
, cond_expr1
);
10235 bitop1
= BIT_XOR_EXPR
;
10238 bitop1
= BIT_XOR_EXPR
;
10239 bitop2
= BIT_NOT_EXPR
;
10244 cond_code
= SSA_NAME
;
10247 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
10248 && reduction_type
== EXTRACT_LAST_REDUCTION
10249 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
10251 if (dump_enabled_p ())
10252 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10253 "reduction comparison operation not supported.\n");
10259 if (bitop1
!= NOP_EXPR
)
10261 machine_mode mode
= TYPE_MODE (comp_vectype
);
10264 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
10265 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10268 if (bitop2
!= NOP_EXPR
)
10270 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
10272 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10277 vect_cost_for_stmt kind
= vector_stmt
;
10278 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10279 /* Count one reduction-like operation per vector. */
10280 kind
= vec_to_scalar
;
10281 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
))
10285 && (!vect_maybe_update_slp_op_vectype
10286 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
10288 && !vect_maybe_update_slp_op_vectype
10289 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
10290 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
10291 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
10293 if (dump_enabled_p ())
10294 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10295 "incompatible vector types for invariants\n");
10299 if (loop_vinfo
&& for_reduction
10300 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
10302 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10303 vect_record_loop_mask (loop_vinfo
, &LOOP_VINFO_MASKS (loop_vinfo
),
10304 ncopies
* vec_num
, vectype
, NULL
);
10305 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10306 else if (STMT_VINFO_DEF_TYPE (reduc_info
) != vect_nested_cycle
)
10308 if (dump_enabled_p ())
10309 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10310 "conditional reduction prevents the use"
10311 " of partial vectors.\n");
10312 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
10316 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10317 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10325 scalar_dest
= gimple_assign_lhs (stmt
);
10326 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10327 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10329 bool swap_cond_operands
= false;
10331 /* See whether another part of the vectorized code applies a loop
10332 mask to the condition, or to its inverse. */
10334 vec_loop_masks
*masks
= NULL
;
10335 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10337 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10338 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10341 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10342 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10343 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10346 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10347 tree_code orig_code
= cond
.code
;
10348 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10349 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10351 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10352 cond_code
= cond
.code
;
10353 swap_cond_operands
= true;
10357 /* Try the inverse of the current mask. We check if the
10358 inverse mask is live and if so we generate a negate of
10359 the current mask such that we still honor NaNs. */
10360 cond
.inverted_p
= true;
10361 cond
.code
= orig_code
;
10362 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10364 bitop1
= orig_code
;
10365 bitop2
= BIT_NOT_EXPR
;
10366 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10367 cond_code
= cond
.code
;
10368 swap_cond_operands
= true;
10375 /* Handle cond expr. */
10377 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10378 cond_expr
, &vec_oprnds0
, comp_vectype
,
10379 then_clause
, &vec_oprnds2
, vectype
,
10380 reduction_type
!= EXTRACT_LAST_REDUCTION
10381 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10383 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10384 cond_expr0
, &vec_oprnds0
, comp_vectype
,
10385 cond_expr1
, &vec_oprnds1
, comp_vectype
,
10386 then_clause
, &vec_oprnds2
, vectype
,
10387 reduction_type
!= EXTRACT_LAST_REDUCTION
10388 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10390 /* Arguments are ready. Create the new vector stmt. */
10391 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10393 vec_then_clause
= vec_oprnds2
[i
];
10394 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10395 vec_else_clause
= vec_oprnds3
[i
];
10397 if (swap_cond_operands
)
10398 std::swap (vec_then_clause
, vec_else_clause
);
10401 vec_compare
= vec_cond_lhs
;
10404 vec_cond_rhs
= vec_oprnds1
[i
];
10405 if (bitop1
== NOP_EXPR
)
10407 gimple_seq stmts
= NULL
;
10408 vec_compare
= gimple_build (&stmts
, cond_code
, vec_cmp_type
,
10409 vec_cond_lhs
, vec_cond_rhs
);
10410 gsi_insert_before (gsi
, stmts
, GSI_SAME_STMT
);
10414 new_temp
= make_ssa_name (vec_cmp_type
);
10416 if (bitop1
== BIT_NOT_EXPR
)
10417 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10421 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10423 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10424 if (bitop2
== NOP_EXPR
)
10425 vec_compare
= new_temp
;
10426 else if (bitop2
== BIT_NOT_EXPR
)
10428 /* Instead of doing ~x ? y : z do x ? z : y. */
10429 vec_compare
= new_temp
;
10430 std::swap (vec_then_clause
, vec_else_clause
);
10434 vec_compare
= make_ssa_name (vec_cmp_type
);
10436 = gimple_build_assign (vec_compare
, bitop2
,
10437 vec_cond_lhs
, new_temp
);
10438 vect_finish_stmt_generation (vinfo
, stmt_info
,
10444 /* If we decided to apply a loop mask to the result of the vector
10445 comparison, AND the comparison with the mask now. Later passes
10446 should then be able to reuse the AND results between mulitple
10450 for (int i = 0; i < 100; ++i)
10451 x[i] = y[i] ? z[i] : 10;
10453 results in following optimized GIMPLE:
10455 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10456 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10457 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10458 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10459 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10460 vect_iftmp.11_47, { 10, ... }>;
10462 instead of using a masked and unmasked forms of
10463 vec != { 0, ... } (masked in the MASK_LOAD,
10464 unmasked in the VEC_COND_EXPR). */
10466 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10467 in cases where that's necessary. */
10469 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
10471 if (!is_gimple_val (vec_compare
))
10473 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10474 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10476 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10477 vec_compare
= vec_compare_name
;
10480 if (must_invert_cmp_result
)
10482 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10483 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10486 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10487 vec_compare
= vec_compare_name
;
10493 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10495 tree tmp2
= make_ssa_name (vec_cmp_type
);
10497 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10499 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
10500 vec_compare
= tmp2
;
10505 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10507 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
10508 tree lhs
= gimple_get_lhs (old_stmt
);
10509 new_stmt
= gimple_build_call_internal
10510 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10512 gimple_call_set_lhs (new_stmt
, lhs
);
10513 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
10514 if (old_stmt
== gsi_stmt (*gsi
))
10515 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
10518 /* In this case we're moving the definition to later in the
10519 block. That doesn't matter because the only uses of the
10520 lhs are in phi statements. */
10521 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
10522 gsi_remove (&old_gsi
, true);
10523 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10528 new_temp
= make_ssa_name (vec_dest
);
10529 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10530 vec_then_clause
, vec_else_clause
);
10531 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10534 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10536 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10540 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10542 vec_oprnds0
.release ();
10543 vec_oprnds1
.release ();
10544 vec_oprnds2
.release ();
10545 vec_oprnds3
.release ();
10550 /* vectorizable_comparison.
10552 Check if STMT_INFO is comparison expression that can be vectorized.
10553 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10554 comparison, put it in VEC_STMT, and insert it at GSI.
10556 Return true if STMT_INFO is vectorizable in this way. */
10559 vectorizable_comparison (vec_info
*vinfo
,
10560 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10562 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10564 tree lhs
, rhs1
, rhs2
;
10565 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10566 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10567 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10569 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10570 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10572 poly_uint64 nunits
;
10574 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10576 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10577 vec
<tree
> vec_oprnds0
= vNULL
;
10578 vec
<tree
> vec_oprnds1
= vNULL
;
10582 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10585 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10588 mask_type
= vectype
;
10589 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10594 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10596 gcc_assert (ncopies
>= 1);
10597 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10600 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10604 code
= gimple_assign_rhs_code (stmt
);
10606 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10609 slp_tree slp_rhs1
, slp_rhs2
;
10610 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10611 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
10614 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10615 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
10618 if (vectype1
&& vectype2
10619 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10620 TYPE_VECTOR_SUBPARTS (vectype2
)))
10623 vectype
= vectype1
? vectype1
: vectype2
;
10625 /* Invariant comparison. */
10628 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10629 vectype
= mask_type
;
10631 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
10633 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10636 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10639 /* Can't compare mask and non-mask types. */
10640 if (vectype1
&& vectype2
10641 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10644 /* Boolean values may have another representation in vectors
10645 and therefore we prefer bit operations over comparison for
10646 them (which also works for scalar masks). We store opcodes
10647 to use in bitop1 and bitop2. Statement is vectorized as
10648 BITOP2 (rhs1 BITOP1 rhs2) or
10649 rhs1 BITOP2 (BITOP1 rhs2)
10650 depending on bitop1 and bitop2 arity. */
10651 bool swap_p
= false;
10652 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10654 if (code
== GT_EXPR
)
10656 bitop1
= BIT_NOT_EXPR
;
10657 bitop2
= BIT_AND_EXPR
;
10659 else if (code
== GE_EXPR
)
10661 bitop1
= BIT_NOT_EXPR
;
10662 bitop2
= BIT_IOR_EXPR
;
10664 else if (code
== LT_EXPR
)
10666 bitop1
= BIT_NOT_EXPR
;
10667 bitop2
= BIT_AND_EXPR
;
10670 else if (code
== LE_EXPR
)
10672 bitop1
= BIT_NOT_EXPR
;
10673 bitop2
= BIT_IOR_EXPR
;
10678 bitop1
= BIT_XOR_EXPR
;
10679 if (code
== EQ_EXPR
)
10680 bitop2
= BIT_NOT_EXPR
;
10686 if (bitop1
== NOP_EXPR
)
10688 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10693 machine_mode mode
= TYPE_MODE (vectype
);
10696 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10697 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10700 if (bitop2
!= NOP_EXPR
)
10702 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10703 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10708 /* Put types on constant and invariant SLP children. */
10710 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
10711 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
10713 if (dump_enabled_p ())
10714 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10715 "incompatible vector types for invariants\n");
10719 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10720 vect_model_simple_cost (vinfo
, stmt_info
,
10721 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10722 dts
, ndts
, slp_node
, cost_vec
);
10729 lhs
= gimple_assign_lhs (stmt
);
10730 mask
= vect_create_destination_var (lhs
, mask_type
);
10732 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10733 rhs1
, &vec_oprnds0
, vectype
,
10734 rhs2
, &vec_oprnds1
, vectype
);
10736 std::swap (vec_oprnds0
, vec_oprnds1
);
10738 /* Arguments are ready. Create the new vector stmt. */
10739 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10742 vec_rhs2
= vec_oprnds1
[i
];
10744 new_temp
= make_ssa_name (mask
);
10745 if (bitop1
== NOP_EXPR
)
10747 new_stmt
= gimple_build_assign (new_temp
, code
,
10748 vec_rhs1
, vec_rhs2
);
10749 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10753 if (bitop1
== BIT_NOT_EXPR
)
10754 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10756 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10758 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10759 if (bitop2
!= NOP_EXPR
)
10761 tree res
= make_ssa_name (mask
);
10762 if (bitop2
== BIT_NOT_EXPR
)
10763 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10765 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
10767 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10771 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10773 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10777 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10779 vec_oprnds0
.release ();
10780 vec_oprnds1
.release ();
10785 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10786 can handle all live statements in the node. Otherwise return true
10787 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10788 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10791 can_vectorize_live_stmts (vec_info
*vinfo
,
10792 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10793 slp_tree slp_node
, slp_instance slp_node_instance
,
10795 stmt_vector_for_cost
*cost_vec
)
10799 stmt_vec_info slp_stmt_info
;
10801 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
10803 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
10804 && !vectorizable_live_operation (vinfo
,
10805 slp_stmt_info
, gsi
, slp_node
,
10806 slp_node_instance
, i
,
10807 vec_stmt_p
, cost_vec
))
10811 else if (STMT_VINFO_LIVE_P (stmt_info
)
10812 && !vectorizable_live_operation (vinfo
, stmt_info
, gsi
,
10813 slp_node
, slp_node_instance
, -1,
10814 vec_stmt_p
, cost_vec
))
10820 /* Make sure the statement is vectorizable. */
10823 vect_analyze_stmt (vec_info
*vinfo
,
10824 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
10825 slp_tree node
, slp_instance node_instance
,
10826 stmt_vector_for_cost
*cost_vec
)
10828 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10829 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
10831 gimple_seq pattern_def_seq
;
10833 if (dump_enabled_p ())
10834 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
10837 if (gimple_has_volatile_ops (stmt_info
->stmt
))
10838 return opt_result::failure_at (stmt_info
->stmt
,
10840 " stmt has volatile operands: %G\n",
10843 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10845 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
10847 gimple_stmt_iterator si
;
10849 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
10851 stmt_vec_info pattern_def_stmt_info
10852 = vinfo
->lookup_stmt (gsi_stmt (si
));
10853 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
10854 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
10856 /* Analyze def stmt of STMT if it's a pattern stmt. */
10857 if (dump_enabled_p ())
10858 dump_printf_loc (MSG_NOTE
, vect_location
,
10859 "==> examining pattern def statement: %G",
10860 pattern_def_stmt_info
->stmt
);
10863 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
10864 need_to_vectorize
, node
, node_instance
,
10872 /* Skip stmts that do not need to be vectorized. In loops this is expected
10874 - the COND_EXPR which is the loop exit condition
10875 - any LABEL_EXPRs in the loop
10876 - computations that are used only for array indexing or loop control.
10877 In basic blocks we only analyze statements that are a part of some SLP
10878 instance, therefore, all the statements are relevant.
10880 Pattern statement needs to be analyzed instead of the original statement
10881 if the original statement is not relevant. Otherwise, we analyze both
10882 statements. In basic blocks we are called from some SLP instance
10883 traversal, don't analyze pattern stmts instead, the pattern stmts
10884 already will be part of SLP instance. */
10886 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
10887 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
10888 && !STMT_VINFO_LIVE_P (stmt_info
))
10890 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10891 && pattern_stmt_info
10892 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10893 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10895 /* Analyze PATTERN_STMT instead of the original stmt. */
10896 stmt_info
= pattern_stmt_info
;
10897 if (dump_enabled_p ())
10898 dump_printf_loc (MSG_NOTE
, vect_location
,
10899 "==> examining pattern statement: %G",
10904 if (dump_enabled_p ())
10905 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
10907 return opt_result::success ();
10910 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10912 && pattern_stmt_info
10913 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10914 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10916 /* Analyze PATTERN_STMT too. */
10917 if (dump_enabled_p ())
10918 dump_printf_loc (MSG_NOTE
, vect_location
,
10919 "==> examining pattern statement: %G",
10920 pattern_stmt_info
->stmt
);
10923 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
10924 node_instance
, cost_vec
);
10929 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
10931 case vect_internal_def
:
10934 case vect_reduction_def
:
10935 case vect_nested_cycle
:
10936 gcc_assert (!bb_vinfo
10937 && (relevance
== vect_used_in_outer
10938 || relevance
== vect_used_in_outer_by_reduction
10939 || relevance
== vect_used_by_reduction
10940 || relevance
== vect_unused_in_scope
10941 || relevance
== vect_used_only_live
));
10944 case vect_induction_def
:
10945 gcc_assert (!bb_vinfo
);
10948 case vect_constant_def
:
10949 case vect_external_def
:
10950 case vect_unknown_def_type
:
10952 gcc_unreachable ();
10955 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10957 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (node
);
10959 if (STMT_VINFO_RELEVANT_P (stmt_info
))
10961 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
10962 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
10963 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
10964 *need_to_vectorize
= true;
10967 if (PURE_SLP_STMT (stmt_info
) && !node
)
10969 if (dump_enabled_p ())
10970 dump_printf_loc (MSG_NOTE
, vect_location
,
10971 "handled only by SLP analysis\n");
10972 return opt_result::success ();
10977 && (STMT_VINFO_RELEVANT_P (stmt_info
)
10978 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
10979 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10980 -mveclibabi= takes preference over library functions with
10981 the simd attribute. */
10982 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10983 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
10985 || vectorizable_conversion (vinfo
, stmt_info
,
10986 NULL
, NULL
, node
, cost_vec
)
10987 || vectorizable_operation (vinfo
, stmt_info
,
10988 NULL
, NULL
, node
, cost_vec
)
10989 || vectorizable_assignment (vinfo
, stmt_info
,
10990 NULL
, NULL
, node
, cost_vec
)
10991 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10992 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10993 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
10994 node
, node_instance
, cost_vec
)
10995 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
10996 NULL
, node
, cost_vec
)
10997 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10998 || vectorizable_condition (vinfo
, stmt_info
,
10999 NULL
, NULL
, node
, cost_vec
)
11000 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11002 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11003 stmt_info
, NULL
, node
));
11007 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
11008 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
11009 NULL
, NULL
, node
, cost_vec
)
11010 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
11012 || vectorizable_shift (vinfo
, stmt_info
,
11013 NULL
, NULL
, node
, cost_vec
)
11014 || vectorizable_operation (vinfo
, stmt_info
,
11015 NULL
, NULL
, node
, cost_vec
)
11016 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
11018 || vectorizable_load (vinfo
, stmt_info
,
11019 NULL
, NULL
, node
, cost_vec
)
11020 || vectorizable_store (vinfo
, stmt_info
,
11021 NULL
, NULL
, node
, cost_vec
)
11022 || vectorizable_condition (vinfo
, stmt_info
,
11023 NULL
, NULL
, node
, cost_vec
)
11024 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11026 || vectorizable_phi (vinfo
, stmt_info
, NULL
, node
, cost_vec
));
11030 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11033 return opt_result::failure_at (stmt_info
->stmt
,
11035 " relevant stmt not supported: %G",
11038 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11039 need extra handling, except for vectorizable reductions. */
11041 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11042 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
11043 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
11044 stmt_info
, NULL
, node
, node_instance
,
11046 return opt_result::failure_at (stmt_info
->stmt
,
11048 " live stmt not supported: %G",
11051 return opt_result::success ();
11055 /* Function vect_transform_stmt.
11057 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11060 vect_transform_stmt (vec_info
*vinfo
,
11061 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11062 slp_tree slp_node
, slp_instance slp_node_instance
)
11064 bool is_store
= false;
11065 gimple
*vec_stmt
= NULL
;
11068 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
11070 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11072 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (slp_node
);
11074 switch (STMT_VINFO_TYPE (stmt_info
))
11076 case type_demotion_vec_info_type
:
11077 case type_promotion_vec_info_type
:
11078 case type_conversion_vec_info_type
:
11079 done
= vectorizable_conversion (vinfo
, stmt_info
,
11080 gsi
, &vec_stmt
, slp_node
, NULL
);
11084 case induc_vec_info_type
:
11085 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
11086 stmt_info
, &vec_stmt
, slp_node
,
11091 case shift_vec_info_type
:
11092 done
= vectorizable_shift (vinfo
, stmt_info
,
11093 gsi
, &vec_stmt
, slp_node
, NULL
);
11097 case op_vec_info_type
:
11098 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11103 case assignment_vec_info_type
:
11104 done
= vectorizable_assignment (vinfo
, stmt_info
,
11105 gsi
, &vec_stmt
, slp_node
, NULL
);
11109 case load_vec_info_type
:
11110 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11115 case store_vec_info_type
:
11116 done
= vectorizable_store (vinfo
, stmt_info
,
11117 gsi
, &vec_stmt
, slp_node
, NULL
);
11119 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
11121 /* In case of interleaving, the whole chain is vectorized when the
11122 last store in the chain is reached. Store stmts before the last
11123 one are skipped, and there vec_stmt_info shouldn't be freed
11125 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
11126 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
11133 case condition_vec_info_type
:
11134 done
= vectorizable_condition (vinfo
, stmt_info
,
11135 gsi
, &vec_stmt
, slp_node
, NULL
);
11139 case comparison_vec_info_type
:
11140 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11145 case call_vec_info_type
:
11146 done
= vectorizable_call (vinfo
, stmt_info
,
11147 gsi
, &vec_stmt
, slp_node
, NULL
);
11150 case call_simd_clone_vec_info_type
:
11151 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11155 case reduc_vec_info_type
:
11156 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11157 gsi
, &vec_stmt
, slp_node
);
11161 case cycle_phi_info_type
:
11162 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11163 &vec_stmt
, slp_node
, slp_node_instance
);
11167 case lc_phi_info_type
:
11168 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11169 stmt_info
, &vec_stmt
, slp_node
);
11173 case phi_info_type
:
11174 done
= vectorizable_phi (vinfo
, stmt_info
, &vec_stmt
, slp_node
, NULL
);
11179 if (!STMT_VINFO_LIVE_P (stmt_info
))
11181 if (dump_enabled_p ())
11182 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11183 "stmt not supported.\n");
11184 gcc_unreachable ();
11189 if (!slp_node
&& vec_stmt
)
11190 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
11192 if (STMT_VINFO_TYPE (stmt_info
) != store_vec_info_type
)
11194 /* Handle stmts whose DEF is used outside the loop-nest that is
11195 being vectorized. */
11196 done
= can_vectorize_live_stmts (vinfo
, stmt_info
, gsi
, slp_node
,
11197 slp_node_instance
, true, NULL
);
11202 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11208 /* Remove a group of stores (for SLP or interleaving), free their
11212 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
11214 stmt_vec_info next_stmt_info
= first_stmt_info
;
11216 while (next_stmt_info
)
11218 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
11219 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
11220 /* Free the attached stmt_vec_info and remove the stmt. */
11221 vinfo
->remove_stmt (next_stmt_info
);
11222 next_stmt_info
= tmp
;
11226 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11227 elements of type SCALAR_TYPE, or null if the target doesn't support
11230 If NUNITS is zero, return a vector type that contains elements of
11231 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11233 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11234 for this vectorization region and want to "autodetect" the best choice.
11235 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11236 and we want the new type to be interoperable with it. PREVAILING_MODE
11237 in this case can be a scalar integer mode or a vector mode; when it
11238 is a vector mode, the function acts like a tree-level version of
11239 related_vector_mode. */
11242 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
11243 tree scalar_type
, poly_uint64 nunits
)
11245 tree orig_scalar_type
= scalar_type
;
11246 scalar_mode inner_mode
;
11247 machine_mode simd_mode
;
11250 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
11251 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
11254 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
11256 /* For vector types of elements whose mode precision doesn't
11257 match their types precision we use a element type of mode
11258 precision. The vectorization routines will have to make sure
11259 they support the proper result truncation/extension.
11260 We also make sure to build vector types with INTEGER_TYPE
11261 component type only. */
11262 if (INTEGRAL_TYPE_P (scalar_type
)
11263 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
11264 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
11265 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
11266 TYPE_UNSIGNED (scalar_type
));
11268 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11269 When the component mode passes the above test simply use a type
11270 corresponding to that mode. The theory is that any use that
11271 would cause problems with this will disable vectorization anyway. */
11272 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
11273 && !INTEGRAL_TYPE_P (scalar_type
))
11274 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
11276 /* We can't build a vector type of elements with alignment bigger than
11278 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
11279 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
11280 TYPE_UNSIGNED (scalar_type
));
11282 /* If we felt back to using the mode fail if there was
11283 no scalar type for it. */
11284 if (scalar_type
== NULL_TREE
)
11287 /* If no prevailing mode was supplied, use the mode the target prefers.
11288 Otherwise lookup a vector mode based on the prevailing mode. */
11289 if (prevailing_mode
== VOIDmode
)
11291 gcc_assert (known_eq (nunits
, 0U));
11292 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
11293 if (SCALAR_INT_MODE_P (simd_mode
))
11295 /* Traditional behavior is not to take the integer mode
11296 literally, but simply to use it as a way of determining
11297 the vector size. It is up to mode_for_vector to decide
11298 what the TYPE_MODE should be.
11300 Note that nunits == 1 is allowed in order to support single
11301 element vector types. */
11302 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
11303 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11307 else if (SCALAR_INT_MODE_P (prevailing_mode
)
11308 || !related_vector_mode (prevailing_mode
,
11309 inner_mode
, nunits
).exists (&simd_mode
))
11311 /* Fall back to using mode_for_vector, mostly in the hope of being
11312 able to use an integer mode. */
11313 if (known_eq (nunits
, 0U)
11314 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
11317 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11321 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
11323 /* In cases where the mode was chosen by mode_for_vector, check that
11324 the target actually supports the chosen mode, or that it at least
11325 allows the vector mode to be replaced by a like-sized integer. */
11326 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11327 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11330 /* Re-attach the address-space qualifier if we canonicalized the scalar
11332 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11333 return build_qualified_type
11334 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11339 /* Function get_vectype_for_scalar_type.
11341 Returns the vector type corresponding to SCALAR_TYPE as supported
11342 by the target. If GROUP_SIZE is nonzero and we're performing BB
11343 vectorization, make sure that the number of elements in the vector
11344 is no bigger than GROUP_SIZE. */
11347 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11348 unsigned int group_size
)
11350 /* For BB vectorization, we should always have a group size once we've
11351 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11352 are tentative requests during things like early data reference
11353 analysis and pattern recognition. */
11354 if (is_a
<bb_vec_info
> (vinfo
))
11355 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11359 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11361 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
11362 vinfo
->vector_mode
= TYPE_MODE (vectype
);
11364 /* Register the natural choice of vector type, before the group size
11365 has been applied. */
11367 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
11369 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11370 try again with an explicit number of elements. */
11373 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
11375 /* Start with the biggest number of units that fits within
11376 GROUP_SIZE and halve it until we find a valid vector type.
11377 Usually either the first attempt will succeed or all will
11378 fail (in the latter case because GROUP_SIZE is too small
11379 for the target), but it's possible that a target could have
11380 a hole between supported vector types.
11382 If GROUP_SIZE is not a power of 2, this has the effect of
11383 trying the largest power of 2 that fits within the group,
11384 even though the group is not a multiple of that vector size.
11385 The BB vectorizer will then try to carve up the group into
11387 unsigned int nunits
= 1 << floor_log2 (group_size
);
11390 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11391 scalar_type
, nunits
);
11394 while (nunits
> 1 && !vectype
);
11400 /* Return the vector type corresponding to SCALAR_TYPE as supported
11401 by the target. NODE, if nonnull, is the SLP tree node that will
11402 use the returned vector type. */
11405 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
11407 unsigned int group_size
= 0;
11409 group_size
= SLP_TREE_LANES (node
);
11410 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11413 /* Function get_mask_type_for_scalar_type.
11415 Returns the mask type corresponding to a result of comparison
11416 of vectors of specified SCALAR_TYPE as supported by target.
11417 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11418 make sure that the number of elements in the vector is no bigger
11419 than GROUP_SIZE. */
11422 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11423 unsigned int group_size
)
11425 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11430 return truth_type_for (vectype
);
11433 /* Function get_same_sized_vectype
11435 Returns a vector type corresponding to SCALAR_TYPE of size
11436 VECTOR_TYPE if supported by the target. */
11439 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11441 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11442 return truth_type_for (vector_type
);
11444 poly_uint64 nunits
;
11445 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
11446 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
11449 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
11450 scalar_type
, nunits
);
11453 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11454 would not change the chosen vector modes. */
11457 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
11459 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
11460 i
!= vinfo
->used_vector_modes
.end (); ++i
)
11461 if (!VECTOR_MODE_P (*i
)
11462 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
11467 /* Function vect_is_simple_use.
11470 VINFO - the vect info of the loop or basic block that is being vectorized.
11471 OPERAND - operand in the loop or bb.
11473 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11474 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11475 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11476 the definition could be anywhere in the function
11477 DT - the type of definition
11479 Returns whether a stmt with OPERAND can be vectorized.
11480 For loops, supportable operands are constants, loop invariants, and operands
11481 that are defined by the current iteration of the loop. Unsupportable
11482 operands are those that are defined by a previous iteration of the loop (as
11483 is the case in reduction/induction computations).
11484 For basic blocks, supportable operands are constants and bb invariants.
11485 For now, operands defined outside the basic block are not supported. */
11488 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11489 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11491 if (def_stmt_info_out
)
11492 *def_stmt_info_out
= NULL
;
11494 *def_stmt_out
= NULL
;
11495 *dt
= vect_unknown_def_type
;
11497 if (dump_enabled_p ())
11499 dump_printf_loc (MSG_NOTE
, vect_location
,
11500 "vect_is_simple_use: operand ");
11501 if (TREE_CODE (operand
) == SSA_NAME
11502 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11503 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11505 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11508 if (CONSTANT_CLASS_P (operand
))
11509 *dt
= vect_constant_def
;
11510 else if (is_gimple_min_invariant (operand
))
11511 *dt
= vect_external_def
;
11512 else if (TREE_CODE (operand
) != SSA_NAME
)
11513 *dt
= vect_unknown_def_type
;
11514 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11515 *dt
= vect_external_def
;
11518 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11519 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11521 *dt
= vect_external_def
;
11524 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11525 def_stmt
= stmt_vinfo
->stmt
;
11526 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11527 if (def_stmt_info_out
)
11528 *def_stmt_info_out
= stmt_vinfo
;
11531 *def_stmt_out
= def_stmt
;
11534 if (dump_enabled_p ())
11536 dump_printf (MSG_NOTE
, ", type of def: ");
11539 case vect_uninitialized_def
:
11540 dump_printf (MSG_NOTE
, "uninitialized\n");
11542 case vect_constant_def
:
11543 dump_printf (MSG_NOTE
, "constant\n");
11545 case vect_external_def
:
11546 dump_printf (MSG_NOTE
, "external\n");
11548 case vect_internal_def
:
11549 dump_printf (MSG_NOTE
, "internal\n");
11551 case vect_induction_def
:
11552 dump_printf (MSG_NOTE
, "induction\n");
11554 case vect_reduction_def
:
11555 dump_printf (MSG_NOTE
, "reduction\n");
11557 case vect_double_reduction_def
:
11558 dump_printf (MSG_NOTE
, "double reduction\n");
11560 case vect_nested_cycle
:
11561 dump_printf (MSG_NOTE
, "nested cycle\n");
11563 case vect_unknown_def_type
:
11564 dump_printf (MSG_NOTE
, "unknown\n");
11569 if (*dt
== vect_unknown_def_type
)
11571 if (dump_enabled_p ())
11572 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11573 "Unsupported pattern.\n");
11580 /* Function vect_is_simple_use.
11582 Same as vect_is_simple_use but also determines the vector operand
11583 type of OPERAND and stores it to *VECTYPE. If the definition of
11584 OPERAND is vect_uninitialized_def, vect_constant_def or
11585 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11586 is responsible to compute the best suited vector type for the
11590 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11591 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11592 gimple
**def_stmt_out
)
11594 stmt_vec_info def_stmt_info
;
11596 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11600 *def_stmt_out
= def_stmt
;
11601 if (def_stmt_info_out
)
11602 *def_stmt_info_out
= def_stmt_info
;
11604 /* Now get a vector type if the def is internal, otherwise supply
11605 NULL_TREE and leave it up to the caller to figure out a proper
11606 type for the use stmt. */
11607 if (*dt
== vect_internal_def
11608 || *dt
== vect_induction_def
11609 || *dt
== vect_reduction_def
11610 || *dt
== vect_double_reduction_def
11611 || *dt
== vect_nested_cycle
)
11613 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11614 gcc_assert (*vectype
!= NULL_TREE
);
11615 if (dump_enabled_p ())
11616 dump_printf_loc (MSG_NOTE
, vect_location
,
11617 "vect_is_simple_use: vectype %T\n", *vectype
);
11619 else if (*dt
== vect_uninitialized_def
11620 || *dt
== vect_constant_def
11621 || *dt
== vect_external_def
)
11622 *vectype
= NULL_TREE
;
11624 gcc_unreachable ();
11629 /* Function vect_is_simple_use.
11631 Same as vect_is_simple_use but determines the operand by operand
11632 position OPERAND from either STMT or SLP_NODE, filling in *OP
11633 and *SLP_DEF (when SLP_NODE is not NULL). */
11636 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
11637 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
11638 enum vect_def_type
*dt
,
11639 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
11643 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
11645 *vectype
= SLP_TREE_VECTYPE (child
);
11646 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
11648 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
11649 return vect_is_simple_use (*op
, vinfo
, dt
, def_stmt_info_out
);
11653 if (def_stmt_info_out
)
11654 *def_stmt_info_out
= NULL
;
11655 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
11656 *dt
= SLP_TREE_DEF_TYPE (child
);
11663 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
11665 if (gimple_assign_rhs_code (ass
) == COND_EXPR
11666 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
11669 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
11671 *op
= gimple_op (ass
, operand
);
11673 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
11674 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
11676 *op
= gimple_op (ass
, operand
+ 1);
11678 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
11679 *op
= gimple_call_arg (call
, operand
);
11681 gcc_unreachable ();
11682 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
11686 /* If OP is not NULL and is external or constant update its vector
11687 type with VECTYPE. Returns true if successful or false if not,
11688 for example when conflicting vector types are present. */
11691 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
11693 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
11695 if (SLP_TREE_VECTYPE (op
))
11696 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
11697 SLP_TREE_VECTYPE (op
) = vectype
;
11701 /* Function supportable_widening_operation
11703 Check whether an operation represented by the code CODE is a
11704 widening operation that is supported by the target platform in
11705 vector form (i.e., when operating on arguments of type VECTYPE_IN
11706 producing a result of type VECTYPE_OUT).
11708 Widening operations we currently support are NOP (CONVERT), FLOAT,
11709 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11710 are supported by the target platform either directly (via vector
11711 tree-codes), or via target builtins.
11714 - CODE1 and CODE2 are codes of vector operations to be used when
11715 vectorizing the operation, if available.
11716 - MULTI_STEP_CVT determines the number of required intermediate steps in
11717 case of multi-step conversion (like char->short->int - in that case
11718 MULTI_STEP_CVT will be 1).
11719 - INTERM_TYPES contains the intermediate type required to perform the
11720 widening operation (short in the above example). */
11723 supportable_widening_operation (vec_info
*vinfo
,
11724 enum tree_code code
, stmt_vec_info stmt_info
,
11725 tree vectype_out
, tree vectype_in
,
11726 enum tree_code
*code1
, enum tree_code
*code2
,
11727 int *multi_step_cvt
,
11728 vec
<tree
> *interm_types
)
11730 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
11731 class loop
*vect_loop
= NULL
;
11732 machine_mode vec_mode
;
11733 enum insn_code icode1
, icode2
;
11734 optab optab1
, optab2
;
11735 tree vectype
= vectype_in
;
11736 tree wide_vectype
= vectype_out
;
11737 enum tree_code c1
, c2
;
11739 tree prev_type
, intermediate_type
;
11740 machine_mode intermediate_mode
, prev_mode
;
11741 optab optab3
, optab4
;
11743 *multi_step_cvt
= 0;
11745 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11749 case WIDEN_MULT_EXPR
:
11750 /* The result of a vectorized widening operation usually requires
11751 two vectors (because the widened results do not fit into one vector).
11752 The generated vector results would normally be expected to be
11753 generated in the same order as in the original scalar computation,
11754 i.e. if 8 results are generated in each vector iteration, they are
11755 to be organized as follows:
11756 vect1: [res1,res2,res3,res4],
11757 vect2: [res5,res6,res7,res8].
11759 However, in the special case that the result of the widening
11760 operation is used in a reduction computation only, the order doesn't
11761 matter (because when vectorizing a reduction we change the order of
11762 the computation). Some targets can take advantage of this and
11763 generate more efficient code. For example, targets like Altivec,
11764 that support widen_mult using a sequence of {mult_even,mult_odd}
11765 generate the following vectors:
11766 vect1: [res1,res3,res5,res7],
11767 vect2: [res2,res4,res6,res8].
11769 When vectorizing outer-loops, we execute the inner-loop sequentially
11770 (each vectorized inner-loop iteration contributes to VF outer-loop
11771 iterations in parallel). We therefore don't allow to change the
11772 order of the computation in the inner-loop during outer-loop
11774 /* TODO: Another case in which order doesn't *really* matter is when we
11775 widen and then contract again, e.g. (short)((int)x * y >> 8).
11776 Normally, pack_trunc performs an even/odd permute, whereas the
11777 repack from an even/odd expansion would be an interleave, which
11778 would be significantly simpler for e.g. AVX2. */
11779 /* In any case, in order to avoid duplicating the code below, recurse
11780 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11781 are properly set up for the caller. If we fail, we'll continue with
11782 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11784 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
11785 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
11786 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
11787 stmt_info
, vectype_out
,
11788 vectype_in
, code1
, code2
,
11789 multi_step_cvt
, interm_types
))
11791 /* Elements in a vector with vect_used_by_reduction property cannot
11792 be reordered if the use chain with this property does not have the
11793 same operation. One such an example is s += a * b, where elements
11794 in a and b cannot be reordered. Here we check if the vector defined
11795 by STMT is only directly used in the reduction statement. */
11796 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
11797 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
11799 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
11802 c1
= VEC_WIDEN_MULT_LO_EXPR
;
11803 c2
= VEC_WIDEN_MULT_HI_EXPR
;
11806 case DOT_PROD_EXPR
:
11807 c1
= DOT_PROD_EXPR
;
11808 c2
= DOT_PROD_EXPR
;
11816 case VEC_WIDEN_MULT_EVEN_EXPR
:
11817 /* Support the recursion induced just above. */
11818 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
11819 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
11822 case WIDEN_LSHIFT_EXPR
:
11823 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
11824 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
11827 case WIDEN_PLUS_EXPR
:
11828 c1
= VEC_WIDEN_PLUS_LO_EXPR
;
11829 c2
= VEC_WIDEN_PLUS_HI_EXPR
;
11832 case WIDEN_MINUS_EXPR
:
11833 c1
= VEC_WIDEN_MINUS_LO_EXPR
;
11834 c2
= VEC_WIDEN_MINUS_HI_EXPR
;
11838 c1
= VEC_UNPACK_LO_EXPR
;
11839 c2
= VEC_UNPACK_HI_EXPR
;
11843 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
11844 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
11847 case FIX_TRUNC_EXPR
:
11848 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
11849 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
11853 gcc_unreachable ();
11856 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
11857 std::swap (c1
, c2
);
11859 if (code
== FIX_TRUNC_EXPR
)
11861 /* The signedness is determined from output operand. */
11862 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11863 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
11865 else if (CONVERT_EXPR_CODE_P (code
)
11866 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
11867 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11868 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
11869 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11871 /* If the input and result modes are the same, a different optab
11872 is needed where we pass in the number of units in vectype. */
11873 optab1
= vec_unpacks_sbool_lo_optab
;
11874 optab2
= vec_unpacks_sbool_hi_optab
;
11878 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11879 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
11882 if (!optab1
|| !optab2
)
11885 vec_mode
= TYPE_MODE (vectype
);
11886 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
11887 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
11893 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11894 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11896 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11898 /* For scalar masks we may have different boolean
11899 vector types having the same QImode. Thus we
11900 add additional check for elements number. */
11901 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
11902 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11906 /* Check if it's a multi-step conversion that can be done using intermediate
11909 prev_type
= vectype
;
11910 prev_mode
= vec_mode
;
11912 if (!CONVERT_EXPR_CODE_P (code
))
11915 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11916 intermediate steps in promotion sequence. We try
11917 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11919 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11920 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11922 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11923 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11925 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
11928 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
11929 TYPE_UNSIGNED (prev_type
));
11931 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11932 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11933 && intermediate_mode
== prev_mode
11934 && SCALAR_INT_MODE_P (prev_mode
))
11936 /* If the input and result modes are the same, a different optab
11937 is needed where we pass in the number of units in vectype. */
11938 optab3
= vec_unpacks_sbool_lo_optab
;
11939 optab4
= vec_unpacks_sbool_hi_optab
;
11943 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11944 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
11947 if (!optab3
|| !optab4
11948 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
11949 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
11950 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
11951 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
11952 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
11953 == CODE_FOR_nothing
)
11954 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
11955 == CODE_FOR_nothing
))
11958 interm_types
->quick_push (intermediate_type
);
11959 (*multi_step_cvt
)++;
11961 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11962 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11964 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11966 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
11967 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11971 prev_type
= intermediate_type
;
11972 prev_mode
= intermediate_mode
;
11975 interm_types
->release ();
11980 /* Function supportable_narrowing_operation
11982 Check whether an operation represented by the code CODE is a
11983 narrowing operation that is supported by the target platform in
11984 vector form (i.e., when operating on arguments of type VECTYPE_IN
11985 and producing a result of type VECTYPE_OUT).
11987 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11988 and FLOAT. This function checks if these operations are supported by
11989 the target platform directly via vector tree-codes.
11992 - CODE1 is the code of a vector operation to be used when
11993 vectorizing the operation, if available.
11994 - MULTI_STEP_CVT determines the number of required intermediate steps in
11995 case of multi-step conversion (like int->short->char - in that case
11996 MULTI_STEP_CVT will be 1).
11997 - INTERM_TYPES contains the intermediate type required to perform the
11998 narrowing operation (short in the above example). */
12001 supportable_narrowing_operation (enum tree_code code
,
12002 tree vectype_out
, tree vectype_in
,
12003 enum tree_code
*code1
, int *multi_step_cvt
,
12004 vec
<tree
> *interm_types
)
12006 machine_mode vec_mode
;
12007 enum insn_code icode1
;
12008 optab optab1
, interm_optab
;
12009 tree vectype
= vectype_in
;
12010 tree narrow_vectype
= vectype_out
;
12012 tree intermediate_type
, prev_type
;
12013 machine_mode intermediate_mode
, prev_mode
;
12017 *multi_step_cvt
= 0;
12021 c1
= VEC_PACK_TRUNC_EXPR
;
12022 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
12023 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12024 && TYPE_MODE (narrow_vectype
) == TYPE_MODE (vectype
)
12025 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
12026 optab1
= vec_pack_sbool_trunc_optab
;
12028 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12031 case FIX_TRUNC_EXPR
:
12032 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
12033 /* The signedness is determined from output operand. */
12034 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12038 c1
= VEC_PACK_FLOAT_EXPR
;
12039 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12043 gcc_unreachable ();
12049 vec_mode
= TYPE_MODE (vectype
);
12050 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
12055 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12057 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12059 /* For scalar masks we may have different boolean
12060 vector types having the same QImode. Thus we
12061 add additional check for elements number. */
12062 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
12063 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12067 if (code
== FLOAT_EXPR
)
12070 /* Check if it's a multi-step conversion that can be done using intermediate
12072 prev_mode
= vec_mode
;
12073 prev_type
= vectype
;
12074 if (code
== FIX_TRUNC_EXPR
)
12075 uns
= TYPE_UNSIGNED (vectype_out
);
12077 uns
= TYPE_UNSIGNED (vectype
);
12079 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12080 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12081 costly than signed. */
12082 if (code
== FIX_TRUNC_EXPR
&& uns
)
12084 enum insn_code icode2
;
12087 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
12089 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12090 if (interm_optab
!= unknown_optab
12091 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
12092 && insn_data
[icode1
].operand
[0].mode
12093 == insn_data
[icode2
].operand
[0].mode
)
12096 optab1
= interm_optab
;
12101 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12102 intermediate steps in promotion sequence. We try
12103 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12104 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12105 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12107 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12108 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12110 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
12113 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
12114 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12115 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12116 && intermediate_mode
== prev_mode
12117 && SCALAR_INT_MODE_P (prev_mode
))
12118 interm_optab
= vec_pack_sbool_trunc_optab
;
12121 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
12124 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
12125 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12126 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
12127 == CODE_FOR_nothing
))
12130 interm_types
->quick_push (intermediate_type
);
12131 (*multi_step_cvt
)++;
12133 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12135 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12137 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
12138 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12142 prev_mode
= intermediate_mode
;
12143 prev_type
= intermediate_type
;
12144 optab1
= interm_optab
;
12147 interm_types
->release ();
12151 /* Generate and return a vector mask of MASK_TYPE such that
12152 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12153 Add the statements to SEQ. */
12156 vect_gen_while (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12157 tree end_index
, const char *name
)
12159 tree cmp_type
= TREE_TYPE (start_index
);
12160 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
12161 cmp_type
, mask_type
,
12162 OPTIMIZE_FOR_SPEED
));
12163 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
12164 start_index
, end_index
,
12165 build_zero_cst (mask_type
));
12168 tmp
= make_temp_ssa_name (mask_type
, NULL
, name
);
12170 tmp
= make_ssa_name (mask_type
);
12171 gimple_call_set_lhs (call
, tmp
);
12172 gimple_seq_add_stmt (seq
, call
);
12176 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12177 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12180 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12183 tree tmp
= vect_gen_while (seq
, mask_type
, start_index
, end_index
);
12184 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
12187 /* Try to compute the vector types required to vectorize STMT_INFO,
12188 returning true on success and false if vectorization isn't possible.
12189 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12190 take sure that the number of elements in the vectors is no bigger
12195 - Set *STMT_VECTYPE_OUT to:
12196 - NULL_TREE if the statement doesn't need to be vectorized;
12197 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12199 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12200 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12201 statement does not help to determine the overall number of units. */
12204 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12205 tree
*stmt_vectype_out
,
12206 tree
*nunits_vectype_out
,
12207 unsigned int group_size
)
12209 gimple
*stmt
= stmt_info
->stmt
;
12211 /* For BB vectorization, we should always have a group size once we've
12212 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12213 are tentative requests during things like early data reference
12214 analysis and pattern recognition. */
12215 if (is_a
<bb_vec_info
> (vinfo
))
12216 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
12220 *stmt_vectype_out
= NULL_TREE
;
12221 *nunits_vectype_out
= NULL_TREE
;
12223 if (gimple_get_lhs (stmt
) == NULL_TREE
12224 /* MASK_STORE has no lhs, but is ok. */
12225 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12227 if (is_a
<gcall
*> (stmt
))
12229 /* Ignore calls with no lhs. These must be calls to
12230 #pragma omp simd functions, and what vectorization factor
12231 it really needs can't be determined until
12232 vectorizable_simd_clone_call. */
12233 if (dump_enabled_p ())
12234 dump_printf_loc (MSG_NOTE
, vect_location
,
12235 "defer to SIMD clone analysis.\n");
12236 return opt_result::success ();
12239 return opt_result::failure_at (stmt
,
12240 "not vectorized: irregular stmt.%G", stmt
);
12244 tree scalar_type
= NULL_TREE
;
12245 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
12247 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12248 if (dump_enabled_p ())
12249 dump_printf_loc (MSG_NOTE
, vect_location
,
12250 "precomputed vectype: %T\n", vectype
);
12252 else if (vect_use_mask_type_p (stmt_info
))
12254 unsigned int precision
= stmt_info
->mask_precision
;
12255 scalar_type
= build_nonstandard_integer_type (precision
, 1);
12256 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
12258 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
12259 " data-type %T\n", scalar_type
);
12260 if (dump_enabled_p ())
12261 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12265 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
12266 scalar_type
= TREE_TYPE (DR_REF (dr
));
12267 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12268 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
12270 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
12272 if (dump_enabled_p ())
12275 dump_printf_loc (MSG_NOTE
, vect_location
,
12276 "get vectype for scalar type (group size %d):"
12277 " %T\n", group_size
, scalar_type
);
12279 dump_printf_loc (MSG_NOTE
, vect_location
,
12280 "get vectype for scalar type: %T\n", scalar_type
);
12282 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
12284 return opt_result::failure_at (stmt
,
12286 " unsupported data-type %T\n",
12289 if (dump_enabled_p ())
12290 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12293 if (scalar_type
&& VECTOR_MODE_P (TYPE_MODE (scalar_type
)))
12294 return opt_result::failure_at (stmt
,
12295 "not vectorized: vector stmt in loop:%G",
12298 *stmt_vectype_out
= vectype
;
12300 /* Don't try to compute scalar types if the stmt produces a boolean
12301 vector; use the existing vector type instead. */
12302 tree nunits_vectype
= vectype
;
12303 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12305 /* The number of units is set according to the smallest scalar
12306 type (or the largest vector size, but we only support one
12307 vector size per vectorization). */
12308 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
12309 TREE_TYPE (vectype
));
12310 if (scalar_type
!= TREE_TYPE (vectype
))
12312 if (dump_enabled_p ())
12313 dump_printf_loc (MSG_NOTE
, vect_location
,
12314 "get vectype for smallest scalar type: %T\n",
12316 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12318 if (!nunits_vectype
)
12319 return opt_result::failure_at
12320 (stmt
, "not vectorized: unsupported data-type %T\n",
12322 if (dump_enabled_p ())
12323 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
12328 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
12329 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)))
12330 return opt_result::failure_at (stmt
,
12331 "Not vectorized: Incompatible number "
12332 "of vector subparts between %T and %T\n",
12333 nunits_vectype
, *stmt_vectype_out
);
12335 if (dump_enabled_p ())
12337 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12338 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12339 dump_printf (MSG_NOTE
, "\n");
12342 *nunits_vectype_out
= nunits_vectype
;
12343 return opt_result::success ();
12346 /* Generate and return statement sequence that sets vector length LEN that is:
12348 min_of_start_and_end = min (START_INDEX, END_INDEX);
12349 left_len = END_INDEX - min_of_start_and_end;
12350 rhs = min (left_len, LEN_LIMIT);
12353 Note: the cost of the code generated by this function is modeled
12354 by vect_estimate_min_profitable_iters, so changes here may need
12355 corresponding changes there. */
12358 vect_gen_len (tree len
, tree start_index
, tree end_index
, tree len_limit
)
12360 gimple_seq stmts
= NULL
;
12361 tree len_type
= TREE_TYPE (len
);
12362 gcc_assert (TREE_TYPE (start_index
) == len_type
);
12364 tree min
= gimple_build (&stmts
, MIN_EXPR
, len_type
, start_index
, end_index
);
12365 tree left_len
= gimple_build (&stmts
, MINUS_EXPR
, len_type
, end_index
, min
);
12366 tree rhs
= gimple_build (&stmts
, MIN_EXPR
, len_type
, left_len
, len_limit
);
12367 gimple
* stmt
= gimple_build_assign (len
, rhs
);
12368 gimple_seq_add_stmt (&stmts
, stmt
);