1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
95 tree vectype
, int misalign
,
96 enum vect_cost_model_location where
)
98 if ((kind
== vector_load
|| kind
== unaligned_load
)
99 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
100 kind
= vector_gather_load
;
101 if ((kind
== vector_store
|| kind
== unaligned_store
)
102 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
103 kind
= vector_scatter_store
;
105 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, vectype
, misalign
};
106 body_cost_vec
->safe_push (si
);
109 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
117 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
127 read_vector_array (vec_info
*vinfo
,
128 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
129 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
131 tree vect_type
, vect
, vect_name
, array_ref
;
134 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
135 vect_type
= TREE_TYPE (TREE_TYPE (array
));
136 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
137 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
138 build_int_cst (size_type_node
, n
),
139 NULL_TREE
, NULL_TREE
);
141 new_stmt
= gimple_build_assign (vect
, array_ref
);
142 vect_name
= make_ssa_name (vect
, new_stmt
);
143 gimple_assign_set_lhs (new_stmt
, vect_name
);
144 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
154 write_vector_array (vec_info
*vinfo
,
155 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
156 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
161 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
162 build_int_cst (size_type_node
, n
),
163 NULL_TREE
, NULL_TREE
);
165 new_stmt
= gimple_build_assign (array_ref
, vect
);
166 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
174 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
178 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
188 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
189 gimple_stmt_iterator
*gsi
, tree var
)
191 tree clobber
= build_clobber (TREE_TYPE (var
));
192 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
193 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
203 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
204 enum vect_relevant relevant
, bool live_p
)
206 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
207 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE
, vect_location
,
211 "mark relevant %d, live %d: %G", relevant
, live_p
,
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE
, vect_location
,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info
= stmt_info
;
230 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
232 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
233 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
236 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
237 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
238 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
240 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE
, vect_location
,
245 "already marked relevant/live.\n");
249 worklist
->safe_push (stmt_info
);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
259 loop_vec_info loop_vinfo
)
264 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
268 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
270 enum vect_def_type dt
= vect_uninitialized_def
;
272 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
276 "use not simple.\n");
280 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
299 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
300 enum vect_relevant
*relevant
, bool *live_p
)
302 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
304 imm_use_iterator imm_iter
;
308 *relevant
= vect_unused_in_scope
;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info
->stmt
)
313 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
314 *relevant
= vect_used_in_scope
;
316 /* changing memory. */
317 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
318 if (gimple_vdef (stmt_info
->stmt
)
319 && !gimple_clobber_p (stmt_info
->stmt
))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE
, vect_location
,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant
= vect_used_in_scope
;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
330 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
332 basic_block bb
= gimple_bb (USE_STMT (use_p
));
333 if (!flow_bb_inside_loop_p (loop
, bb
))
335 if (is_gimple_debug (USE_STMT (use_p
)))
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE
, vect_location
,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
345 gcc_assert (bb
== single_exit (loop
)->dest
);
352 if (*live_p
&& *relevant
== vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE
, vect_location
,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant
= vect_used_only_live
;
361 return (*live_p
|| *relevant
);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
371 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info
))
381 /* STMT has a data_ref. FORNOW this means that its of one of
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
395 if (!assign
|| !gimple_assign_copy_p (assign
))
397 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
398 if (call
&& gimple_call_internal_p (call
))
400 internal_fn ifn
= gimple_call_internal_fn (call
);
401 int mask_index
= internal_fn_mask_index (ifn
);
403 && use
== gimple_call_arg (call
, mask_index
))
405 int stored_value_index
= internal_fn_stored_value_index (ifn
);
406 if (stored_value_index
>= 0
407 && use
== gimple_call_arg (call
, stored_value_index
))
409 if (internal_gather_scatter_fn_p (ifn
)
410 && use
== gimple_call_arg (call
, 1))
416 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
418 operand
= gimple_assign_rhs1 (assign
);
419 if (TREE_CODE (operand
) != SSA_NAME
)
430 Function process_use.
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
457 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
458 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
461 stmt_vec_info dstmt_vinfo
;
462 enum vect_def_type dt
;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
470 return opt_result::failure_at (stmt_vinfo
->stmt
,
472 " unsupported use in stmt.\n");
475 return opt_result::success ();
477 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
478 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
485 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
487 && bb
->loop_father
== def_bb
->loop_father
)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
503 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE
, vect_location
,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
511 case vect_unused_in_scope
:
512 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
513 vect_used_in_scope
: vect_unused_in_scope
;
516 case vect_used_in_outer_by_reduction
:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
518 relevant
= vect_used_by_reduction
;
521 case vect_used_in_outer
:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
523 relevant
= vect_used_in_scope
;
526 case vect_used_in_scope
:
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
541 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE
, vect_location
,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
549 case vect_unused_in_scope
:
550 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
552 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
555 case vect_used_by_reduction
:
556 case vect_used_only_live
:
557 relevant
= vect_used_in_outer_by_reduction
;
560 case vect_used_in_scope
:
561 relevant
= vect_used_in_outer
;
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
572 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
576 loop_latch_edge (bb
->loop_father
))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE
, vect_location
,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
610 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
611 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
612 unsigned int nbbs
= loop
->num_nodes
;
613 gimple_stmt_iterator si
;
617 enum vect_relevant relevant
;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec
<stmt_vec_info
, 64> worklist
;
623 /* 1. Init worklist. */
624 for (i
= 0; i
< nbbs
; i
++)
627 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
629 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
634 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
635 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
637 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
639 if (is_gimple_debug (gsi_stmt (si
)))
641 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE
, vect_location
,
644 "init: stmt relevant? %G", stmt_info
->stmt
);
646 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
647 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
651 /* 2. Process_worklist */
652 while (worklist
.length () > 0)
657 stmt_vec_info stmt_vinfo
= worklist
.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE
, vect_location
,
660 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
665 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
680 case vect_reduction_def
:
681 gcc_assert (relevant
!= vect_unused_in_scope
);
682 if (relevant
!= vect_unused_in_scope
683 && relevant
!= vect_used_in_scope
684 && relevant
!= vect_used_by_reduction
685 && relevant
!= vect_used_only_live
)
686 return opt_result::failure_at
687 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
690 case vect_nested_cycle
:
691 if (relevant
!= vect_unused_in_scope
692 && relevant
!= vect_used_in_outer_by_reduction
693 && relevant
!= vect_used_in_outer
)
694 return opt_result::failure_at
695 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
698 case vect_double_reduction_def
:
699 if (relevant
!= vect_unused_in_scope
700 && relevant
!= vect_used_by_reduction
701 && relevant
!= vect_used_only_live
)
702 return opt_result::failure_at
703 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
710 if (is_pattern_stmt_p (stmt_vinfo
))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
717 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
718 tree op
= gimple_assign_rhs1 (assign
);
721 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
724 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
725 loop_vinfo
, relevant
, &worklist
, false);
728 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
729 loop_vinfo
, relevant
, &worklist
, false);
734 for (; i
< gimple_num_ops (assign
); i
++)
736 op
= gimple_op (assign
, i
);
737 if (TREE_CODE (op
) == SSA_NAME
)
740 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
747 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
749 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
751 tree arg
= gimple_call_arg (call
, i
);
753 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
761 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
763 tree op
= USE_FROM_PTR (use_p
);
765 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
773 gather_scatter_info gs_info
;
774 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
777 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
786 } /* while worklist */
788 return opt_result::success ();
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
798 vect_model_simple_cost (vec_info
*,
799 stmt_vec_info stmt_info
, int ncopies
,
800 enum vect_def_type
*dt
,
803 stmt_vector_for_cost
*cost_vec
,
804 vect_cost_for_stmt kind
= vector_stmt
)
806 int inside_cost
= 0, prologue_cost
= 0;
808 gcc_assert (cost_vec
!= NULL
);
810 /* ??? Somehow we need to fix this at the callers. */
812 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
818 for (int i
= 0; i
< ndts
; i
++)
819 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
820 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
821 stmt_info
, 0, vect_prologue
);
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
825 stmt_info
, 0, vect_body
);
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE
, vect_location
,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. If WIDEN_ARITH
840 is true the stmt is doing widening arithmetic. */
843 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
844 enum vect_def_type
*dt
,
845 unsigned int ncopies
, int pwr
,
846 stmt_vector_for_cost
*cost_vec
,
850 int inside_cost
= 0, prologue_cost
= 0;
852 for (i
= 0; i
< pwr
+ 1; i
++)
854 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
,
856 ? vector_stmt
: vec_promote_demote
,
857 stmt_info
, 0, vect_body
);
861 /* FORNOW: Assuming maximum 2 args per stmts. */
862 for (i
= 0; i
< 2; i
++)
863 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
864 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
865 stmt_info
, 0, vect_prologue
);
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE
, vect_location
,
869 "vect_model_promotion_demotion_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
873 /* Returns true if the current function returns DECL. */
876 cfun_returns (tree decl
)
880 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
882 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
885 if (gimple_return_retval (ret
) == decl
)
887 /* We often end up with an aggregate copy to the result decl,
888 handle that case as well. First skip intermediate clobbers
893 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
895 while (gimple_clobber_p (def
));
896 if (is_a
<gassign
*> (def
)
897 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
898 && gimple_assign_rhs1 (def
) == decl
)
904 /* Function vect_model_store_cost
906 Models cost for stores. In the case of grouped accesses, one access
907 has the overhead of the grouped access attributed to it. */
910 vect_model_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
911 vect_memory_access_type memory_access_type
,
912 dr_alignment_support alignment_support_scheme
,
914 vec_load_store_type vls_type
, slp_tree slp_node
,
915 stmt_vector_for_cost
*cost_vec
)
917 unsigned int inside_cost
= 0, prologue_cost
= 0;
918 stmt_vec_info first_stmt_info
= stmt_info
;
919 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
921 /* ??? Somehow we need to fix this at the callers. */
923 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
925 if (vls_type
== VLS_STORE_INVARIANT
)
928 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
929 stmt_info
, 0, vect_prologue
);
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node
&& grouped_access_p
)
935 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
947 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
949 /* Uses a high and low interleave or shuffle operations for each
951 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
952 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
953 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
954 stmt_info
, 0, vect_body
);
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE
, vect_location
,
958 "vect_model_store_cost: strided group_size = %d .\n",
962 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
963 /* Costs of the stores. */
964 if (memory_access_type
== VMAT_ELEMENTWISE
965 || memory_access_type
== VMAT_GATHER_SCATTER
)
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
969 inside_cost
+= record_stmt_cost (cost_vec
,
970 ncopies
* assumed_nunits
,
971 scalar_store
, stmt_info
, 0, vect_body
);
974 vect_get_store_cost (vinfo
, stmt_info
, ncopies
, alignment_support_scheme
,
975 misalignment
, &inside_cost
, cost_vec
);
977 if (memory_access_type
== VMAT_ELEMENTWISE
978 || memory_access_type
== VMAT_STRIDED_SLP
)
980 /* N scalar stores plus extracting the elements. */
981 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
982 inside_cost
+= record_stmt_cost (cost_vec
,
983 ncopies
* assumed_nunits
,
984 vec_to_scalar
, stmt_info
, 0, vect_body
);
987 /* When vectorizing a store into the function result assign
988 a penalty if the function returns in a multi-register location.
989 In this case we assume we'll end up with having to spill the
990 vector result and do piecewise loads as a conservative estimate. */
991 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
993 && (TREE_CODE (base
) == RESULT_DECL
994 || (DECL_P (base
) && cfun_returns (base
)))
995 && !aggregate_value_p (base
, cfun
->decl
))
997 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
998 /* ??? Handle PARALLEL in some way. */
1001 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1002 /* Assume that a single reg-reg move is possible and cheap,
1003 do not account for vector to gp register move cost. */
1007 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1009 stmt_info
, 0, vect_epilogue
);
1011 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1013 stmt_info
, 0, vect_epilogue
);
1018 if (dump_enabled_p ())
1019 dump_printf_loc (MSG_NOTE
, vect_location
,
1020 "vect_model_store_cost: inside_cost = %d, "
1021 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1025 /* Calculate cost of DR's memory access. */
1027 vect_get_store_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1028 dr_alignment_support alignment_support_scheme
,
1030 unsigned int *inside_cost
,
1031 stmt_vector_for_cost
*body_cost_vec
)
1033 switch (alignment_support_scheme
)
1037 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1038 vector_store
, stmt_info
, 0,
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_NOTE
, vect_location
,
1043 "vect_model_store_cost: aligned.\n");
1047 case dr_unaligned_supported
:
1049 /* Here, we assign an additional cost for the unaligned store. */
1050 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1051 unaligned_store
, stmt_info
,
1052 misalignment
, vect_body
);
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE
, vect_location
,
1055 "vect_model_store_cost: unaligned supported by "
1060 case dr_unaligned_unsupported
:
1062 *inside_cost
= VECT_MAX_COST
;
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1066 "vect_model_store_cost: unsupported access.\n");
1076 /* Function vect_model_load_cost
1078 Models cost for loads. In the case of grouped accesses, one access has
1079 the overhead of the grouped access attributed to it. Since unaligned
1080 accesses are supported for loads, we also account for the costs of the
1081 access scheme chosen. */
1084 vect_model_load_cost (vec_info
*vinfo
,
1085 stmt_vec_info stmt_info
, unsigned ncopies
, poly_uint64 vf
,
1086 vect_memory_access_type memory_access_type
,
1087 dr_alignment_support alignment_support_scheme
,
1089 gather_scatter_info
*gs_info
,
1091 stmt_vector_for_cost
*cost_vec
)
1093 unsigned int inside_cost
= 0, prologue_cost
= 0;
1094 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1096 gcc_assert (cost_vec
);
1098 /* ??? Somehow we need to fix this at the callers. */
1100 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1102 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1104 /* If the load is permuted then the alignment is determined by
1105 the first group element not by the first scalar stmt DR. */
1106 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1107 /* Record the cost for the permutation. */
1108 unsigned n_perms
, n_loads
;
1109 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
,
1110 vf
, true, &n_perms
, &n_loads
);
1111 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1112 first_stmt_info
, 0, vect_body
);
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1119 /* Grouped loads read all elements in the group at once,
1120 so we want the DR for the first statement. */
1121 stmt_vec_info first_stmt_info
= stmt_info
;
1122 if (!slp_node
&& grouped_access_p
)
1123 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1125 /* True if we should include any once-per-group costs as well as
1126 the cost of the statement itself. For SLP we only get called
1127 once per group anyhow. */
1128 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1130 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1131 ones we actually need. Account for the cost of unused results. */
1132 if (first_stmt_p
&& !slp_node
&& memory_access_type
== VMAT_LOAD_STORE_LANES
)
1134 unsigned int gaps
= DR_GROUP_SIZE (first_stmt_info
);
1135 stmt_vec_info next_stmt_info
= first_stmt_info
;
1139 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
1141 while (next_stmt_info
);
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_NOTE
, vect_location
,
1146 "vect_model_load_cost: %d unused vectors.\n",
1148 vect_get_load_cost (vinfo
, stmt_info
, ncopies
* gaps
,
1149 alignment_support_scheme
, misalignment
, false,
1150 &inside_cost
, &prologue_cost
,
1151 cost_vec
, cost_vec
, true);
1155 /* We assume that the cost of a single load-lanes instruction is
1156 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1157 access is instead being provided by a load-and-permute operation,
1158 include the cost of the permutes. */
1160 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1162 /* Uses an even and odd extract operations or shuffle operations
1163 for each needed permute. */
1164 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1165 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1166 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1167 stmt_info
, 0, vect_body
);
1169 if (dump_enabled_p ())
1170 dump_printf_loc (MSG_NOTE
, vect_location
,
1171 "vect_model_load_cost: strided group_size = %d .\n",
1175 /* The loads themselves. */
1176 if (memory_access_type
== VMAT_ELEMENTWISE
1177 || memory_access_type
== VMAT_GATHER_SCATTER
)
1179 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1180 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1181 if (memory_access_type
== VMAT_GATHER_SCATTER
1182 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
1183 /* For emulated gathers N offset vector element extracts
1184 (we assume the scalar scaling and ptr + offset add is consumed by
1186 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
* assumed_nunits
,
1187 vec_to_scalar
, stmt_info
, 0,
1189 /* N scalar loads plus gathering them into a vector. */
1190 inside_cost
+= record_stmt_cost (cost_vec
,
1191 ncopies
* assumed_nunits
,
1192 scalar_load
, stmt_info
, 0, vect_body
);
1194 else if (memory_access_type
== VMAT_INVARIANT
)
1196 /* Invariant loads will ideally be hoisted and splat to a vector. */
1197 prologue_cost
+= record_stmt_cost (cost_vec
, 1,
1198 scalar_load
, stmt_info
, 0,
1200 prologue_cost
+= record_stmt_cost (cost_vec
, 1,
1201 scalar_to_vec
, stmt_info
, 0,
1205 vect_get_load_cost (vinfo
, stmt_info
, ncopies
,
1206 alignment_support_scheme
, misalignment
, first_stmt_p
,
1207 &inside_cost
, &prologue_cost
,
1208 cost_vec
, cost_vec
, true);
1209 if (memory_access_type
== VMAT_ELEMENTWISE
1210 || memory_access_type
== VMAT_STRIDED_SLP
1211 || (memory_access_type
== VMAT_GATHER_SCATTER
1212 && gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
))
1213 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1214 stmt_info
, 0, vect_body
);
1216 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_NOTE
, vect_location
,
1218 "vect_model_load_cost: inside_cost = %d, "
1219 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1223 /* Calculate cost of DR's memory access. */
1225 vect_get_load_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1226 dr_alignment_support alignment_support_scheme
,
1228 bool add_realign_cost
, unsigned int *inside_cost
,
1229 unsigned int *prologue_cost
,
1230 stmt_vector_for_cost
*prologue_cost_vec
,
1231 stmt_vector_for_cost
*body_cost_vec
,
1232 bool record_prologue_costs
)
1234 switch (alignment_support_scheme
)
1238 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1239 stmt_info
, 0, vect_body
);
1241 if (dump_enabled_p ())
1242 dump_printf_loc (MSG_NOTE
, vect_location
,
1243 "vect_model_load_cost: aligned.\n");
1247 case dr_unaligned_supported
:
1249 /* Here, we assign an additional cost for the unaligned load. */
1250 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1251 unaligned_load
, stmt_info
,
1252 misalignment
, vect_body
);
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE
, vect_location
,
1256 "vect_model_load_cost: unaligned supported by "
1261 case dr_explicit_realign
:
1263 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1264 vector_load
, stmt_info
, 0, vect_body
);
1265 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1266 vec_perm
, stmt_info
, 0, vect_body
);
1268 /* FIXME: If the misalignment remains fixed across the iterations of
1269 the containing loop, the following cost should be added to the
1271 if (targetm
.vectorize
.builtin_mask_for_load
)
1272 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1273 stmt_info
, 0, vect_body
);
1275 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE
, vect_location
,
1277 "vect_model_load_cost: explicit realign\n");
1281 case dr_explicit_realign_optimized
:
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE
, vect_location
,
1285 "vect_model_load_cost: unaligned software "
1288 /* Unaligned software pipeline has a load of an address, an initial
1289 load, and possibly a mask operation to "prime" the loop. However,
1290 if this is an access in a group of loads, which provide grouped
1291 access, then the above cost should only be considered for one
1292 access in the group. Inside the loop, there is a load op
1293 and a realignment op. */
1295 if (add_realign_cost
&& record_prologue_costs
)
1297 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1298 vector_stmt
, stmt_info
,
1300 if (targetm
.vectorize
.builtin_mask_for_load
)
1301 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1302 vector_stmt
, stmt_info
,
1306 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1307 stmt_info
, 0, vect_body
);
1308 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1309 stmt_info
, 0, vect_body
);
1311 if (dump_enabled_p ())
1312 dump_printf_loc (MSG_NOTE
, vect_location
,
1313 "vect_model_load_cost: explicit realign optimized"
1319 case dr_unaligned_unsupported
:
1321 *inside_cost
= VECT_MAX_COST
;
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1325 "vect_model_load_cost: unsupported access.\n");
1334 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1335 the loop preheader for the vectorized stmt STMT_VINFO. */
1338 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1339 gimple_stmt_iterator
*gsi
)
1342 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1344 vinfo
->insert_on_entry (stmt_vinfo
, new_stmt
);
1346 if (dump_enabled_p ())
1347 dump_printf_loc (MSG_NOTE
, vect_location
,
1348 "created new init_stmt: %G", new_stmt
);
1351 /* Function vect_init_vector.
1353 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1354 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1355 vector type a vector with all elements equal to VAL is created first.
1356 Place the initialization at GSI if it is not NULL. Otherwise, place the
1357 initialization at the loop preheader.
1358 Return the DEF of INIT_STMT.
1359 It will be used in the vectorization of STMT_INFO. */
1362 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1363 gimple_stmt_iterator
*gsi
)
1368 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1369 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1371 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1372 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1374 /* Scalar boolean value should be transformed into
1375 all zeros or all ones value before building a vector. */
1376 if (VECTOR_BOOLEAN_TYPE_P (type
))
1378 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1379 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1381 if (CONSTANT_CLASS_P (val
))
1382 val
= integer_zerop (val
) ? false_val
: true_val
;
1385 new_temp
= make_ssa_name (TREE_TYPE (type
));
1386 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1387 val
, true_val
, false_val
);
1388 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1394 gimple_seq stmts
= NULL
;
1395 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1396 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1397 TREE_TYPE (type
), val
);
1399 /* ??? Condition vectorization expects us to do
1400 promotion of invariant/external defs. */
1401 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1402 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1403 !gsi_end_p (gsi2
); )
1405 init_stmt
= gsi_stmt (gsi2
);
1406 gsi_remove (&gsi2
, false);
1407 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1411 val
= build_vector_from_val (type
, val
);
1414 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1415 init_stmt
= gimple_build_assign (new_temp
, val
);
1416 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1421 /* Function vect_get_vec_defs_for_operand.
1423 OP is an operand in STMT_VINFO. This function returns a vector of
1424 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1426 In the case that OP is an SSA_NAME which is defined in the loop, then
1427 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1429 In case OP is an invariant or constant, a new stmt that creates a vector def
1430 needs to be introduced. VECTYPE may be used to specify a required type for
1431 vector invariant. */
1434 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1436 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1439 enum vect_def_type dt
;
1441 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1443 if (dump_enabled_p ())
1444 dump_printf_loc (MSG_NOTE
, vect_location
,
1445 "vect_get_vec_defs_for_operand: %T\n", op
);
1447 stmt_vec_info def_stmt_info
;
1448 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1449 &def_stmt_info
, &def_stmt
);
1450 gcc_assert (is_simple_use
);
1451 if (def_stmt
&& dump_enabled_p ())
1452 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1454 vec_oprnds
->create (ncopies
);
1455 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1457 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1461 vector_type
= vectype
;
1462 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1463 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1464 vector_type
= truth_type_for (stmt_vectype
);
1466 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1468 gcc_assert (vector_type
);
1469 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1471 vec_oprnds
->quick_push (vop
);
1475 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1476 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1477 for (unsigned i
= 0; i
< ncopies
; ++i
)
1478 vec_oprnds
->quick_push (gimple_get_lhs
1479 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1484 /* Get vectorized definitions for OP0 and OP1. */
1487 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1489 tree op0
, vec
<tree
> *vec_oprnds0
, tree vectype0
,
1490 tree op1
, vec
<tree
> *vec_oprnds1
, tree vectype1
,
1491 tree op2
, vec
<tree
> *vec_oprnds2
, tree vectype2
,
1492 tree op3
, vec
<tree
> *vec_oprnds3
, tree vectype3
)
1497 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1499 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1501 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1503 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1508 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1509 op0
, vec_oprnds0
, vectype0
);
1511 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1512 op1
, vec_oprnds1
, vectype1
);
1514 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1515 op2
, vec_oprnds2
, vectype2
);
1517 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1518 op3
, vec_oprnds3
, vectype3
);
1523 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1525 tree op0
, vec
<tree
> *vec_oprnds0
,
1526 tree op1
, vec
<tree
> *vec_oprnds1
,
1527 tree op2
, vec
<tree
> *vec_oprnds2
,
1528 tree op3
, vec
<tree
> *vec_oprnds3
)
1530 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1531 op0
, vec_oprnds0
, NULL_TREE
,
1532 op1
, vec_oprnds1
, NULL_TREE
,
1533 op2
, vec_oprnds2
, NULL_TREE
,
1534 op3
, vec_oprnds3
, NULL_TREE
);
1537 /* Helper function called by vect_finish_replace_stmt and
1538 vect_finish_stmt_generation. Set the location of the new
1539 statement and create and return a stmt_vec_info for it. */
1542 vect_finish_stmt_generation_1 (vec_info
*,
1543 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1545 if (dump_enabled_p ())
1546 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1550 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1552 /* While EH edges will generally prevent vectorization, stmt might
1553 e.g. be in a must-not-throw region. Ensure newly created stmts
1554 that could throw are part of the same region. */
1555 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1556 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1557 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1560 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1563 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1564 which sets the same scalar result as STMT_INFO did. Create and return a
1565 stmt_vec_info for VEC_STMT. */
1568 vect_finish_replace_stmt (vec_info
*vinfo
,
1569 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1571 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1572 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1574 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1575 gsi_replace (&gsi
, vec_stmt
, true);
1577 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1580 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1581 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1584 vect_finish_stmt_generation (vec_info
*vinfo
,
1585 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1586 gimple_stmt_iterator
*gsi
)
1588 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1590 if (!gsi_end_p (*gsi
)
1591 && gimple_has_mem_ops (vec_stmt
))
1593 gimple
*at_stmt
= gsi_stmt (*gsi
);
1594 tree vuse
= gimple_vuse (at_stmt
);
1595 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1597 tree vdef
= gimple_vdef (at_stmt
);
1598 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1599 gimple_set_modified (vec_stmt
, true);
1600 /* If we have an SSA vuse and insert a store, update virtual
1601 SSA form to avoid triggering the renamer. Do so only
1602 if we can easily see all uses - which is what almost always
1603 happens with the way vectorized stmts are inserted. */
1604 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1605 && ((is_gimple_assign (vec_stmt
)
1606 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1607 || (is_gimple_call (vec_stmt
)
1608 && !(gimple_call_flags (vec_stmt
)
1609 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1611 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1612 gimple_set_vdef (vec_stmt
, new_vdef
);
1613 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1617 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1618 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1621 /* We want to vectorize a call to combined function CFN with function
1622 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1623 as the types of all inputs. Check whether this is possible using
1624 an internal function, returning its code if so or IFN_LAST if not. */
1627 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1628 tree vectype_out
, tree vectype_in
)
1631 if (internal_fn_p (cfn
))
1632 ifn
= as_internal_fn (cfn
);
1634 ifn
= associated_internal_fn (fndecl
);
1635 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1637 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1638 if (info
.vectorizable
)
1640 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1641 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1642 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1643 OPTIMIZE_FOR_SPEED
))
1651 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1652 gimple_stmt_iterator
*);
1654 /* Check whether a load or store statement in the loop described by
1655 LOOP_VINFO is possible in a loop using partial vectors. This is
1656 testing whether the vectorizer pass has the appropriate support,
1657 as well as whether the target does.
1659 VLS_TYPE says whether the statement is a load or store and VECTYPE
1660 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1661 says how the load or store is going to be implemented and GROUP_SIZE
1662 is the number of load or store statements in the containing group.
1663 If the access is a gather load or scatter store, GS_INFO describes
1664 its arguments. If the load or store is conditional, SCALAR_MASK is the
1665 condition under which it occurs.
1667 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1668 vectors is not supported, otherwise record the required rgroup control
1672 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo
, tree vectype
,
1673 vec_load_store_type vls_type
,
1675 vect_memory_access_type
1677 gather_scatter_info
*gs_info
,
1680 /* Invariant loads need no special support. */
1681 if (memory_access_type
== VMAT_INVARIANT
)
1684 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1685 machine_mode vecmode
= TYPE_MODE (vectype
);
1686 bool is_load
= (vls_type
== VLS_LOAD
);
1687 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1690 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1691 : !vect_store_lanes_supported (vectype
, group_size
, true))
1693 if (dump_enabled_p ())
1694 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1695 "can't operate on partial vectors because"
1696 " the target doesn't have an appropriate"
1697 " load/store-lanes instruction.\n");
1698 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1701 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1702 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1706 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1708 internal_fn ifn
= (is_load
1709 ? IFN_MASK_GATHER_LOAD
1710 : IFN_MASK_SCATTER_STORE
);
1711 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1712 gs_info
->memory_type
,
1713 gs_info
->offset_vectype
,
1716 if (dump_enabled_p ())
1717 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1718 "can't operate on partial vectors because"
1719 " the target doesn't have an appropriate"
1720 " gather load or scatter store instruction.\n");
1721 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1724 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1725 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1729 if (memory_access_type
!= VMAT_CONTIGUOUS
1730 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1732 /* Element X of the data must come from iteration i * VF + X of the
1733 scalar loop. We need more work to support other mappings. */
1734 if (dump_enabled_p ())
1735 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1736 "can't operate on partial vectors because an"
1737 " access isn't contiguous.\n");
1738 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1742 if (!VECTOR_MODE_P (vecmode
))
1744 if (dump_enabled_p ())
1745 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1746 "can't operate on partial vectors when emulating"
1747 " vector operations.\n");
1748 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1752 /* We might load more scalars than we need for permuting SLP loads.
1753 We checked in get_group_load_store_type that the extra elements
1754 don't leak into a new vector. */
1755 auto get_valid_nvectors
= [] (poly_uint64 size
, poly_uint64 nunits
)
1757 unsigned int nvectors
;
1758 if (can_div_away_from_zero_p (size
, nunits
, &nvectors
))
1763 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1764 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1765 machine_mode mask_mode
;
1766 bool using_partial_vectors_p
= false;
1767 if (targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1768 && can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1770 unsigned int nvectors
= get_valid_nvectors (group_size
* vf
, nunits
);
1771 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1772 using_partial_vectors_p
= true;
1776 if (get_len_load_store_mode (vecmode
, is_load
).exists (&vmode
))
1778 unsigned int nvectors
= get_valid_nvectors (group_size
* vf
, nunits
);
1779 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
1780 unsigned factor
= (vecmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vecmode
);
1781 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, factor
);
1782 using_partial_vectors_p
= true;
1785 if (!using_partial_vectors_p
)
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1789 "can't operate on partial vectors because the"
1790 " target doesn't have the appropriate partial"
1791 " vectorization load or store.\n");
1792 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1796 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1797 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1798 that needs to be applied to all loads and stores in a vectorized loop.
1799 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1801 MASK_TYPE is the type of both masks. If new statements are needed,
1802 insert them before GSI. */
1805 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1806 gimple_stmt_iterator
*gsi
)
1808 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1812 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1813 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1814 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1815 vec_mask
, loop_mask
);
1816 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1820 /* Determine whether we can use a gather load or scatter store to vectorize
1821 strided load or store STMT_INFO by truncating the current offset to a
1822 smaller width. We need to be able to construct an offset vector:
1824 { 0, X, X*2, X*3, ... }
1826 without loss of precision, where X is STMT_INFO's DR_STEP.
1828 Return true if this is possible, describing the gather load or scatter
1829 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1832 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1833 loop_vec_info loop_vinfo
, bool masked_p
,
1834 gather_scatter_info
*gs_info
)
1836 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1837 data_reference
*dr
= dr_info
->dr
;
1838 tree step
= DR_STEP (dr
);
1839 if (TREE_CODE (step
) != INTEGER_CST
)
1841 /* ??? Perhaps we could use range information here? */
1842 if (dump_enabled_p ())
1843 dump_printf_loc (MSG_NOTE
, vect_location
,
1844 "cannot truncate variable step.\n");
1848 /* Get the number of bits in an element. */
1849 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1850 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1851 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1853 /* Set COUNT to the upper limit on the number of elements - 1.
1854 Start with the maximum vectorization factor. */
1855 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1857 /* Try lowering COUNT to the number of scalar latch iterations. */
1858 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1859 widest_int max_iters
;
1860 if (max_loop_iterations (loop
, &max_iters
)
1861 && max_iters
< count
)
1862 count
= max_iters
.to_shwi ();
1864 /* Try scales of 1 and the element size. */
1865 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1866 wi::overflow_type overflow
= wi::OVF_NONE
;
1867 for (int i
= 0; i
< 2; ++i
)
1869 int scale
= scales
[i
];
1871 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1874 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1875 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1878 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1879 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1881 /* Find the narrowest viable offset type. */
1882 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1883 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1886 /* See whether the target supports the operation with an offset
1887 no narrower than OFFSET_TYPE. */
1888 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1889 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1890 vectype
, memory_type
, offset_type
, scale
,
1891 &gs_info
->ifn
, &gs_info
->offset_vectype
)
1892 || gs_info
->ifn
== IFN_LAST
)
1895 gs_info
->decl
= NULL_TREE
;
1896 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1897 but we don't need to store that here. */
1898 gs_info
->base
= NULL_TREE
;
1899 gs_info
->element_type
= TREE_TYPE (vectype
);
1900 gs_info
->offset
= fold_convert (offset_type
, step
);
1901 gs_info
->offset_dt
= vect_constant_def
;
1902 gs_info
->scale
= scale
;
1903 gs_info
->memory_type
= memory_type
;
1907 if (overflow
&& dump_enabled_p ())
1908 dump_printf_loc (MSG_NOTE
, vect_location
,
1909 "truncating gather/scatter offset to %d bits"
1910 " might change its value.\n", element_bits
);
1915 /* Return true if we can use gather/scatter internal functions to
1916 vectorize STMT_INFO, which is a grouped or strided load or store.
1917 MASKED_P is true if load or store is conditional. When returning
1918 true, fill in GS_INFO with the information required to perform the
1922 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1923 loop_vec_info loop_vinfo
, bool masked_p
,
1924 gather_scatter_info
*gs_info
)
1926 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1927 || gs_info
->ifn
== IFN_LAST
)
1928 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1931 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1932 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1934 gcc_assert (TYPE_PRECISION (new_offset_type
)
1935 >= TYPE_PRECISION (old_offset_type
));
1936 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1938 if (dump_enabled_p ())
1939 dump_printf_loc (MSG_NOTE
, vect_location
,
1940 "using gather/scatter for strided/grouped access,"
1941 " scale = %d\n", gs_info
->scale
);
1946 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1947 elements with a known constant step. Return -1 if that step
1948 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1951 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
1953 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1954 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
1958 /* If the target supports a permute mask that reverses the elements in
1959 a vector of type VECTYPE, return that mask, otherwise return null. */
1962 perm_mask_for_reverse (tree vectype
)
1964 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1966 /* The encoding has a single stepped pattern. */
1967 vec_perm_builder
sel (nunits
, 1, 3);
1968 for (int i
= 0; i
< 3; ++i
)
1969 sel
.quick_push (nunits
- 1 - i
);
1971 vec_perm_indices
indices (sel
, 1, nunits
);
1972 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
1974 return vect_gen_perm_mask_checked (vectype
, indices
);
1977 /* A subroutine of get_load_store_type, with a subset of the same
1978 arguments. Handle the case where STMT_INFO is a load or store that
1979 accesses consecutive elements with a negative step. Sets *POFFSET
1980 to the offset to be applied to the DR for the first access. */
1982 static vect_memory_access_type
1983 get_negative_load_store_type (vec_info
*vinfo
,
1984 stmt_vec_info stmt_info
, tree vectype
,
1985 vec_load_store_type vls_type
,
1986 unsigned int ncopies
, poly_int64
*poffset
)
1988 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1989 dr_alignment_support alignment_support_scheme
;
1993 if (dump_enabled_p ())
1994 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1995 "multiple types with negative step.\n");
1996 return VMAT_ELEMENTWISE
;
1999 int misalignment
= dr_misalignment (dr_info
, vectype
);
2000 alignment_support_scheme
2001 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
, misalignment
);
2002 if (alignment_support_scheme
!= dr_aligned
2003 && alignment_support_scheme
!= dr_unaligned_supported
)
2005 if (dump_enabled_p ())
2006 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2007 "negative step but alignment required.\n");
2008 return VMAT_ELEMENTWISE
;
2011 if (vls_type
== VLS_STORE_INVARIANT
)
2013 if (dump_enabled_p ())
2014 dump_printf_loc (MSG_NOTE
, vect_location
,
2015 "negative step with invariant source;"
2016 " no permute needed.\n");
2017 *poffset
= -TYPE_VECTOR_SUBPARTS (vectype
) + 1;
2018 return VMAT_CONTIGUOUS_DOWN
;
2021 if (!perm_mask_for_reverse (vectype
))
2023 if (dump_enabled_p ())
2024 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2025 "negative step and reversing not supported.\n");
2026 return VMAT_ELEMENTWISE
;
2029 *poffset
= -TYPE_VECTOR_SUBPARTS (vectype
) + 1;
2030 return VMAT_CONTIGUOUS_REVERSE
;
2033 /* STMT_INFO is either a masked or unconditional store. Return the value
2037 vect_get_store_rhs (stmt_vec_info stmt_info
)
2039 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2041 gcc_assert (gimple_assign_single_p (assign
));
2042 return gimple_assign_rhs1 (assign
);
2044 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2046 internal_fn ifn
= gimple_call_internal_fn (call
);
2047 int index
= internal_fn_stored_value_index (ifn
);
2048 gcc_assert (index
>= 0);
2049 return gimple_call_arg (call
, index
);
2054 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2056 This function returns a vector type which can be composed with NETLS pieces,
2057 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2058 same vector size as the return vector. It checks target whether supports
2059 pieces-size vector mode for construction firstly, if target fails to, check
2060 pieces-size scalar mode for construction further. It returns NULL_TREE if
2061 fails to find the available composition.
2063 For example, for (vtype=V16QI, nelts=4), we can probably get:
2064 - V16QI with PTYPE V4QI.
2065 - V4SI with PTYPE SI.
2069 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
2071 gcc_assert (VECTOR_TYPE_P (vtype
));
2072 gcc_assert (known_gt (nelts
, 0U));
2074 machine_mode vmode
= TYPE_MODE (vtype
);
2075 if (!VECTOR_MODE_P (vmode
))
2078 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
2079 unsigned int pbsize
;
2080 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
2082 /* First check if vec_init optab supports construction from
2083 vector pieces directly. */
2084 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
2085 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
2087 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
2088 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
2089 != CODE_FOR_nothing
))
2091 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
2095 /* Otherwise check if exists an integer type of the same piece size and
2096 if vec_init optab supports construction from it directly. */
2097 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
2098 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
2099 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
2100 != CODE_FOR_nothing
))
2102 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
2103 return build_vector_type (*ptype
, nelts
);
2110 /* A subroutine of get_load_store_type, with a subset of the same
2111 arguments. Handle the case where STMT_INFO is part of a grouped load
2114 For stores, the statements in the group are all consecutive
2115 and there is no gap at the end. For loads, the statements in the
2116 group might not be consecutive; there can be gaps between statements
2117 as well as at the end. */
2120 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2121 tree vectype
, slp_tree slp_node
,
2122 bool masked_p
, vec_load_store_type vls_type
,
2123 vect_memory_access_type
*memory_access_type
,
2124 poly_int64
*poffset
,
2125 dr_alignment_support
*alignment_support_scheme
,
2127 gather_scatter_info
*gs_info
)
2129 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2130 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2131 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2132 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2133 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2134 bool single_element_p
= (stmt_info
== first_stmt_info
2135 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2136 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2137 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2139 /* True if the vectorized statements would access beyond the last
2140 statement in the group. */
2141 bool overrun_p
= false;
2143 /* True if we can cope with such overrun by peeling for gaps, so that
2144 there is at least one final scalar iteration after the vector loop. */
2145 bool can_overrun_p
= (!masked_p
2146 && vls_type
== VLS_LOAD
2150 /* There can only be a gap at the end of the group if the stride is
2151 known at compile time. */
2152 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2154 /* Stores can't yet have gaps. */
2155 gcc_assert (slp_node
|| vls_type
== VLS_LOAD
|| gap
== 0);
2159 /* For SLP vectorization we directly vectorize a subchain
2160 without permutation. */
2161 if (! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
2163 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node
)[0]);
2164 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2166 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2167 separated by the stride, until we have a complete vector.
2168 Fall back to scalar accesses if that isn't possible. */
2169 if (multiple_p (nunits
, group_size
))
2170 *memory_access_type
= VMAT_STRIDED_SLP
;
2172 *memory_access_type
= VMAT_ELEMENTWISE
;
2176 overrun_p
= loop_vinfo
&& gap
!= 0;
2177 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2179 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2180 "Grouped store with gaps requires"
2181 " non-consecutive accesses\n");
2184 /* An overrun is fine if the trailing elements are smaller
2185 than the alignment boundary B. Every vector access will
2186 be a multiple of B and so we are guaranteed to access a
2187 non-gap element in the same B-sized block. */
2189 && gap
< (vect_known_alignment_in_bytes (first_dr_info
,
2191 / vect_get_scalar_dr_size (first_dr_info
)))
2194 /* If the gap splits the vector in half and the target
2195 can do half-vector operations avoid the epilogue peeling
2196 by simply loading half of the vector only. Usually
2197 the construction with an upper zero half will be elided. */
2198 dr_alignment_support alss
;
2199 int misalign
= dr_misalignment (first_dr_info
, vectype
);
2203 && (((alss
= vect_supportable_dr_alignment (vinfo
, first_dr_info
,
2204 vectype
, misalign
)))
2206 || alss
== dr_unaligned_supported
)
2207 && known_eq (nunits
, (group_size
- gap
) * 2)
2208 && known_eq (nunits
, group_size
)
2209 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2213 if (overrun_p
&& !can_overrun_p
)
2215 if (dump_enabled_p ())
2216 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2217 "Peeling for outer loop is not supported\n");
2220 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2223 if (single_element_p
)
2224 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2225 only correct for single element "interleaving" SLP. */
2226 *memory_access_type
= get_negative_load_store_type
2227 (vinfo
, stmt_info
, vectype
, vls_type
, 1, poffset
);
2230 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2231 separated by the stride, until we have a complete vector.
2232 Fall back to scalar accesses if that isn't possible. */
2233 if (multiple_p (nunits
, group_size
))
2234 *memory_access_type
= VMAT_STRIDED_SLP
;
2236 *memory_access_type
= VMAT_ELEMENTWISE
;
2241 gcc_assert (!loop_vinfo
|| cmp
> 0);
2242 *memory_access_type
= VMAT_CONTIGUOUS
;
2248 /* We can always handle this case using elementwise accesses,
2249 but see if something more efficient is available. */
2250 *memory_access_type
= VMAT_ELEMENTWISE
;
2252 /* If there is a gap at the end of the group then these optimizations
2253 would access excess elements in the last iteration. */
2254 bool would_overrun_p
= (gap
!= 0);
2255 /* An overrun is fine if the trailing elements are smaller than the
2256 alignment boundary B. Every vector access will be a multiple of B
2257 and so we are guaranteed to access a non-gap element in the
2258 same B-sized block. */
2261 && gap
< (vect_known_alignment_in_bytes (first_dr_info
, vectype
)
2262 / vect_get_scalar_dr_size (first_dr_info
)))
2263 would_overrun_p
= false;
2265 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2266 && (can_overrun_p
|| !would_overrun_p
)
2267 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2269 /* First cope with the degenerate case of a single-element
2271 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2274 /* Otherwise try using LOAD/STORE_LANES. */
2275 else if (vls_type
== VLS_LOAD
2276 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2277 : vect_store_lanes_supported (vectype
, group_size
,
2280 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2281 overrun_p
= would_overrun_p
;
2284 /* If that fails, try using permuting loads. */
2285 else if (vls_type
== VLS_LOAD
2286 ? vect_grouped_load_supported (vectype
, single_element_p
,
2288 : vect_grouped_store_supported (vectype
, group_size
))
2290 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2291 overrun_p
= would_overrun_p
;
2295 /* As a last resort, trying using a gather load or scatter store.
2297 ??? Although the code can handle all group sizes correctly,
2298 it probably isn't a win to use separate strided accesses based
2299 on nearby locations. Or, even if it's a win over scalar code,
2300 it might not be a win over vectorizing at a lower VF, if that
2301 allows us to use contiguous accesses. */
2302 if (*memory_access_type
== VMAT_ELEMENTWISE
2305 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2307 *memory_access_type
= VMAT_GATHER_SCATTER
;
2310 if (*memory_access_type
== VMAT_GATHER_SCATTER
2311 || *memory_access_type
== VMAT_ELEMENTWISE
)
2313 *alignment_support_scheme
= dr_unaligned_supported
;
2314 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2318 *misalignment
= dr_misalignment (first_dr_info
, vectype
);
2319 *alignment_support_scheme
2320 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, vectype
,
2324 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2326 /* STMT is the leader of the group. Check the operands of all the
2327 stmts of the group. */
2328 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2329 while (next_stmt_info
)
2331 tree op
= vect_get_store_rhs (next_stmt_info
);
2332 enum vect_def_type dt
;
2333 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2335 if (dump_enabled_p ())
2336 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2337 "use not simple.\n");
2340 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2346 gcc_assert (can_overrun_p
);
2347 if (dump_enabled_p ())
2348 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2349 "Data access with gaps requires scalar "
2351 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2357 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2358 if there is a memory access type that the vectorized form can use,
2359 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2360 or scatters, fill in GS_INFO accordingly. In addition
2361 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2362 the target does not support the alignment scheme. *MISALIGNMENT
2363 is set according to the alignment of the access (including
2364 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2366 SLP says whether we're performing SLP rather than loop vectorization.
2367 MASKED_P is true if the statement is conditional on a vectorized mask.
2368 VECTYPE is the vector type that the vectorized statements will use.
2369 NCOPIES is the number of vector statements that will be needed. */
2372 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2373 tree vectype
, slp_tree slp_node
,
2374 bool masked_p
, vec_load_store_type vls_type
,
2375 unsigned int ncopies
,
2376 vect_memory_access_type
*memory_access_type
,
2377 poly_int64
*poffset
,
2378 dr_alignment_support
*alignment_support_scheme
,
2380 gather_scatter_info
*gs_info
)
2382 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2383 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2384 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2386 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2388 *memory_access_type
= VMAT_GATHER_SCATTER
;
2389 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2391 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2392 &gs_info
->offset_dt
,
2393 &gs_info
->offset_vectype
))
2395 if (dump_enabled_p ())
2396 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2397 "%s index use not simple.\n",
2398 vls_type
== VLS_LOAD
? "gather" : "scatter");
2401 else if (gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
2403 if (vls_type
!= VLS_LOAD
)
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2407 "unsupported emulated scatter.\n");
2410 else if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant ()
2411 || !TYPE_VECTOR_SUBPARTS
2412 (gs_info
->offset_vectype
).is_constant ()
2413 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2414 (gs_info
->offset_vectype
),
2415 TYPE_VECTOR_SUBPARTS (vectype
)))
2417 if (dump_enabled_p ())
2418 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2419 "unsupported vector types for emulated "
2424 /* Gather-scatter accesses perform only component accesses, alignment
2425 is irrelevant for them. */
2426 *alignment_support_scheme
= dr_unaligned_supported
;
2428 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2430 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
,
2432 vls_type
, memory_access_type
, poffset
,
2433 alignment_support_scheme
,
2434 misalignment
, gs_info
))
2437 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2439 gcc_assert (!slp_node
);
2441 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2443 *memory_access_type
= VMAT_GATHER_SCATTER
;
2445 *memory_access_type
= VMAT_ELEMENTWISE
;
2446 /* Alignment is irrelevant here. */
2447 *alignment_support_scheme
= dr_unaligned_supported
;
2451 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2454 gcc_assert (vls_type
== VLS_LOAD
);
2455 *memory_access_type
= VMAT_INVARIANT
;
2456 /* Invariant accesses perform only component accesses, alignment
2457 is irrelevant for them. */
2458 *alignment_support_scheme
= dr_unaligned_supported
;
2463 *memory_access_type
= get_negative_load_store_type
2464 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
, poffset
);
2466 *memory_access_type
= VMAT_CONTIGUOUS
;
2467 *misalignment
= dr_misalignment (STMT_VINFO_DR_INFO (stmt_info
),
2469 *alignment_support_scheme
2470 = vect_supportable_dr_alignment (vinfo
,
2471 STMT_VINFO_DR_INFO (stmt_info
),
2472 vectype
, *misalignment
);
2476 if ((*memory_access_type
== VMAT_ELEMENTWISE
2477 || *memory_access_type
== VMAT_STRIDED_SLP
)
2478 && !nunits
.is_constant ())
2480 if (dump_enabled_p ())
2481 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2482 "Not using elementwise accesses due to variable "
2483 "vectorization factor.\n");
2487 if (*alignment_support_scheme
== dr_unaligned_unsupported
)
2489 if (dump_enabled_p ())
2490 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2491 "unsupported unaligned access\n");
2495 /* FIXME: At the moment the cost model seems to underestimate the
2496 cost of using elementwise accesses. This check preserves the
2497 traditional behavior until that can be fixed. */
2498 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2499 if (!first_stmt_info
)
2500 first_stmt_info
= stmt_info
;
2501 if (*memory_access_type
== VMAT_ELEMENTWISE
2502 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2503 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2504 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2505 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2507 if (dump_enabled_p ())
2508 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2509 "not falling back to elementwise accesses\n");
2515 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2516 conditional operation STMT_INFO. When returning true, store the mask
2517 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2518 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2519 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2522 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2523 slp_tree slp_node
, unsigned mask_index
,
2524 tree
*mask
, slp_tree
*mask_node
,
2525 vect_def_type
*mask_dt_out
, tree
*mask_vectype_out
)
2527 enum vect_def_type mask_dt
;
2529 slp_tree mask_node_1
;
2530 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, mask_index
,
2531 mask
, &mask_node_1
, &mask_dt
, &mask_vectype
))
2533 if (dump_enabled_p ())
2534 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2535 "mask use not simple.\n");
2539 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask
)))
2541 if (dump_enabled_p ())
2542 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2543 "mask argument is not a boolean.\n");
2547 /* If the caller is not prepared for adjusting an external/constant
2548 SLP mask vector type fail. */
2551 && SLP_TREE_DEF_TYPE (mask_node_1
) != vect_internal_def
)
2553 if (dump_enabled_p ())
2554 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2555 "SLP mask argument is not vectorized.\n");
2559 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2561 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2563 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2565 if (dump_enabled_p ())
2566 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2567 "could not find an appropriate vector mask type.\n");
2571 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2572 TYPE_VECTOR_SUBPARTS (vectype
)))
2574 if (dump_enabled_p ())
2575 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2576 "vector mask type %T"
2577 " does not match vector data type %T.\n",
2578 mask_vectype
, vectype
);
2583 *mask_dt_out
= mask_dt
;
2584 *mask_vectype_out
= mask_vectype
;
2586 *mask_node
= mask_node_1
;
2590 /* Return true if stored value RHS is suitable for vectorizing store
2591 statement STMT_INFO. When returning true, store the type of the
2592 definition in *RHS_DT_OUT, the type of the vectorized store value in
2593 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2596 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2597 slp_tree slp_node
, tree rhs
,
2598 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2599 vec_load_store_type
*vls_type_out
)
2601 /* In the case this is a store from a constant make sure
2602 native_encode_expr can handle it. */
2603 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2605 if (dump_enabled_p ())
2606 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2607 "cannot encode constant as a byte sequence.\n");
2612 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2614 if (gimple_call_internal_p (call
)
2615 && internal_store_fn_p (gimple_call_internal_fn (call
)))
2616 op_no
= internal_fn_stored_value_index (gimple_call_internal_fn (call
));
2619 enum vect_def_type rhs_dt
;
2622 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, op_no
,
2623 &rhs
, &slp_op
, &rhs_dt
, &rhs_vectype
))
2625 if (dump_enabled_p ())
2626 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2627 "use not simple.\n");
2631 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2632 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2634 if (dump_enabled_p ())
2635 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2636 "incompatible vector types.\n");
2640 *rhs_dt_out
= rhs_dt
;
2641 *rhs_vectype_out
= rhs_vectype
;
2642 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2643 *vls_type_out
= VLS_STORE_INVARIANT
;
2645 *vls_type_out
= VLS_STORE
;
2649 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2650 Note that we support masks with floating-point type, in which case the
2651 floats are interpreted as a bitmask. */
2654 vect_build_all_ones_mask (vec_info
*vinfo
,
2655 stmt_vec_info stmt_info
, tree masktype
)
2657 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2658 return build_int_cst (masktype
, -1);
2659 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2661 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2662 mask
= build_vector_from_val (masktype
, mask
);
2663 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2665 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2669 for (int j
= 0; j
< 6; ++j
)
2671 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2672 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2673 mask
= build_vector_from_val (masktype
, mask
);
2674 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2679 /* Build an all-zero merge value of type VECTYPE while vectorizing
2680 STMT_INFO as a gather load. */
2683 vect_build_zero_merge_argument (vec_info
*vinfo
,
2684 stmt_vec_info stmt_info
, tree vectype
)
2687 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2688 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2689 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2693 for (int j
= 0; j
< 6; ++j
)
2695 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2696 merge
= build_real (TREE_TYPE (vectype
), r
);
2700 merge
= build_vector_from_val (vectype
, merge
);
2701 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2704 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2705 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2706 the gather load operation. If the load is conditional, MASK is the
2707 unvectorized condition and MASK_DT is its definition type, otherwise
2711 vect_build_gather_load_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2712 gimple_stmt_iterator
*gsi
,
2714 gather_scatter_info
*gs_info
,
2717 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2718 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2719 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2720 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2721 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2722 edge pe
= loop_preheader_edge (loop
);
2723 enum { NARROW
, NONE
, WIDEN
} modifier
;
2724 poly_uint64 gather_off_nunits
2725 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2727 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2728 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2729 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2730 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2731 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2732 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2733 tree scaletype
= TREE_VALUE (arglist
);
2734 tree real_masktype
= masktype
;
2735 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2737 || TREE_CODE (masktype
) == INTEGER_TYPE
2738 || types_compatible_p (srctype
, masktype
)));
2739 if (mask
&& TREE_CODE (masktype
) == INTEGER_TYPE
)
2740 masktype
= truth_type_for (srctype
);
2742 tree mask_halftype
= masktype
;
2743 tree perm_mask
= NULL_TREE
;
2744 tree mask_perm_mask
= NULL_TREE
;
2745 if (known_eq (nunits
, gather_off_nunits
))
2747 else if (known_eq (nunits
* 2, gather_off_nunits
))
2751 /* Currently widening gathers and scatters are only supported for
2752 fixed-length vectors. */
2753 int count
= gather_off_nunits
.to_constant ();
2754 vec_perm_builder
sel (count
, count
, 1);
2755 for (int i
= 0; i
< count
; ++i
)
2756 sel
.quick_push (i
| (count
/ 2));
2758 vec_perm_indices
indices (sel
, 1, count
);
2759 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2762 else if (known_eq (nunits
, gather_off_nunits
* 2))
2766 /* Currently narrowing gathers and scatters are only supported for
2767 fixed-length vectors. */
2768 int count
= nunits
.to_constant ();
2769 vec_perm_builder
sel (count
, count
, 1);
2770 sel
.quick_grow (count
);
2771 for (int i
= 0; i
< count
; ++i
)
2772 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2773 vec_perm_indices
indices (sel
, 2, count
);
2774 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2778 if (mask
&& masktype
== real_masktype
)
2780 for (int i
= 0; i
< count
; ++i
)
2781 sel
[i
] = i
| (count
/ 2);
2782 indices
.new_vector (sel
, 2, count
);
2783 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2786 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2791 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2792 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2794 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2795 if (!is_gimple_min_invariant (ptr
))
2798 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2799 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2800 gcc_assert (!new_bb
);
2803 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2805 tree vec_oprnd0
= NULL_TREE
;
2806 tree vec_mask
= NULL_TREE
;
2807 tree src_op
= NULL_TREE
;
2808 tree mask_op
= NULL_TREE
;
2809 tree prev_res
= NULL_TREE
;
2813 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2814 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2817 auto_vec
<tree
> vec_oprnds0
;
2818 auto_vec
<tree
> vec_masks
;
2819 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2820 modifier
== WIDEN
? ncopies
/ 2 : ncopies
,
2821 gs_info
->offset
, &vec_oprnds0
);
2823 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2824 modifier
== NARROW
? ncopies
/ 2 : ncopies
,
2825 mask
, &vec_masks
, masktype
);
2826 for (int j
= 0; j
< ncopies
; ++j
)
2829 if (modifier
== WIDEN
&& (j
& 1))
2830 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
2831 perm_mask
, stmt_info
, gsi
);
2833 op
= vec_oprnd0
= vec_oprnds0
[modifier
== WIDEN
? j
/ 2 : j
];
2835 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2837 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2838 TYPE_VECTOR_SUBPARTS (idxtype
)));
2839 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2840 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2841 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2842 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2848 if (mask_perm_mask
&& (j
& 1))
2849 mask_op
= permute_vec_elements (vinfo
, mask_op
, mask_op
,
2850 mask_perm_mask
, stmt_info
, gsi
);
2853 if (modifier
== NARROW
)
2856 vec_mask
= vec_masks
[j
/ 2];
2859 vec_mask
= vec_masks
[j
];
2862 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2864 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2865 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2866 gcc_assert (known_eq (sub1
, sub2
));
2867 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2868 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2870 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2871 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2875 if (modifier
== NARROW
&& masktype
!= real_masktype
)
2877 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2879 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2880 : VEC_UNPACK_LO_EXPR
,
2882 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2888 tree mask_arg
= mask_op
;
2889 if (masktype
!= real_masktype
)
2891 tree utype
, optype
= TREE_TYPE (mask_op
);
2892 if (TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2893 utype
= real_masktype
;
2895 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2896 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2897 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2899 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2900 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2902 if (!useless_type_conversion_p (real_masktype
, utype
))
2904 gcc_assert (TYPE_PRECISION (utype
)
2905 <= TYPE_PRECISION (real_masktype
));
2906 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2907 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2908 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2911 src_op
= build_zero_cst (srctype
);
2913 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2916 if (!useless_type_conversion_p (vectype
, rettype
))
2918 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2919 TYPE_VECTOR_SUBPARTS (rettype
)));
2920 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2921 gimple_call_set_lhs (new_stmt
, op
);
2922 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2923 var
= make_ssa_name (vec_dest
);
2924 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2925 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2926 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2930 var
= make_ssa_name (vec_dest
, new_stmt
);
2931 gimple_call_set_lhs (new_stmt
, var
);
2932 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2935 if (modifier
== NARROW
)
2942 var
= permute_vec_elements (vinfo
, prev_res
, var
, perm_mask
,
2944 new_stmt
= SSA_NAME_DEF_STMT (var
);
2947 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
2949 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
2952 /* Prepare the base and offset in GS_INFO for vectorization.
2953 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2954 to the vectorized offset argument for the first copy of STMT_INFO.
2955 STMT_INFO is the statement described by GS_INFO and LOOP is the
2959 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo
,
2960 class loop
*loop
, stmt_vec_info stmt_info
,
2961 gather_scatter_info
*gs_info
,
2962 tree
*dataref_ptr
, vec
<tree
> *vec_offset
)
2964 gimple_seq stmts
= NULL
;
2965 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2969 edge pe
= loop_preheader_edge (loop
);
2970 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2971 gcc_assert (!new_bb
);
2973 unsigned ncopies
= vect_get_num_copies (loop_vinfo
, gs_info
->offset_vectype
);
2974 vect_get_vec_defs_for_operand (loop_vinfo
, stmt_info
, ncopies
,
2975 gs_info
->offset
, vec_offset
,
2976 gs_info
->offset_vectype
);
2979 /* Prepare to implement a grouped or strided load or store using
2980 the gather load or scatter store operation described by GS_INFO.
2981 STMT_INFO is the load or store statement.
2983 Set *DATAREF_BUMP to the amount that should be added to the base
2984 address after each copy of the vectorized statement. Set *VEC_OFFSET
2985 to an invariant offset vector in which element I has the value
2986 I * DR_STEP / SCALE. */
2989 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2990 loop_vec_info loop_vinfo
,
2991 gather_scatter_info
*gs_info
,
2992 tree
*dataref_bump
, tree
*vec_offset
)
2994 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2995 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2997 tree bump
= size_binop (MULT_EXPR
,
2998 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
2999 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
3000 *dataref_bump
= cse_and_gimplify_to_preheader (loop_vinfo
, bump
);
3002 /* The offset given in GS_INFO can have pointer type, so use the element
3003 type of the vector instead. */
3004 tree offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
3006 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3007 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
3008 ssize_int (gs_info
->scale
));
3009 step
= fold_convert (offset_type
, step
);
3011 /* Create {0, X, X*2, X*3, ...}. */
3012 tree offset
= fold_build2 (VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
3013 build_zero_cst (offset_type
), step
);
3014 *vec_offset
= cse_and_gimplify_to_preheader (loop_vinfo
, offset
);
3017 /* Return the amount that should be added to a vector pointer to move
3018 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3019 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3023 vect_get_data_ptr_increment (vec_info
*vinfo
,
3024 dr_vec_info
*dr_info
, tree aggr_type
,
3025 vect_memory_access_type memory_access_type
)
3027 if (memory_access_type
== VMAT_INVARIANT
)
3028 return size_zero_node
;
3030 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3031 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
3032 if (tree_int_cst_sgn (step
) == -1)
3033 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3037 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3040 vectorizable_bswap (vec_info
*vinfo
,
3041 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3042 gimple
**vec_stmt
, slp_tree slp_node
,
3044 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
3047 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
3048 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3051 op
= gimple_call_arg (stmt
, 0);
3052 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3053 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3055 /* Multiple types in SLP are handled by creating the appropriate number of
3056 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3061 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3063 gcc_assert (ncopies
>= 1);
3065 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3069 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3070 unsigned word_bytes
;
3071 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3074 /* The encoding uses one stepped pattern for each byte in the word. */
3075 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3076 for (unsigned i
= 0; i
< 3; ++i
)
3077 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3078 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3080 vec_perm_indices
indices (elts
, 1, num_bytes
);
3081 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
3087 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3089 if (dump_enabled_p ())
3090 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3091 "incompatible vector types for invariants\n");
3095 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3096 DUMP_VECT_SCOPE ("vectorizable_bswap");
3097 record_stmt_cost (cost_vec
,
3098 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3099 record_stmt_cost (cost_vec
,
3101 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
,
3102 vec_perm
, stmt_info
, 0, vect_body
);
3106 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3109 vec
<tree
> vec_oprnds
= vNULL
;
3110 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
3112 /* Arguments are ready. create the new vector stmt. */
3115 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3118 tree tem
= make_ssa_name (char_vectype
);
3119 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3120 char_vectype
, vop
));
3121 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3122 tree tem2
= make_ssa_name (char_vectype
);
3123 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3124 tem
, tem
, bswap_vconst
);
3125 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3126 tem
= make_ssa_name (vectype
);
3127 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3129 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3131 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3133 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3137 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3139 vec_oprnds
.release ();
3143 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3144 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3145 in a single step. On success, store the binary pack code in
3149 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3150 tree_code
*convert_code
)
3152 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3153 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3157 int multi_step_cvt
= 0;
3158 auto_vec
<tree
, 8> interm_types
;
3159 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3160 &code
, &multi_step_cvt
, &interm_types
)
3164 *convert_code
= code
;
3168 /* Function vectorizable_call.
3170 Check if STMT_INFO performs a function call that can be vectorized.
3171 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3172 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3173 Return true if STMT_INFO is vectorizable in this way. */
3176 vectorizable_call (vec_info
*vinfo
,
3177 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3178 gimple
**vec_stmt
, slp_tree slp_node
,
3179 stmt_vector_for_cost
*cost_vec
)
3185 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3186 tree vectype_out
, vectype_in
;
3187 poly_uint64 nunits_in
;
3188 poly_uint64 nunits_out
;
3189 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3190 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3191 tree fndecl
, new_temp
, rhs_type
;
3192 enum vect_def_type dt
[4]
3193 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3194 vect_unknown_def_type
};
3195 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3196 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3197 int ndts
= ARRAY_SIZE (dt
);
3199 auto_vec
<tree
, 8> vargs
;
3200 auto_vec
<tree
, 8> orig_vargs
;
3201 enum { NARROW
, NONE
, WIDEN
} modifier
;
3205 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3208 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3212 /* Is STMT_INFO a vectorizable call? */
3213 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3217 if (gimple_call_internal_p (stmt
)
3218 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3219 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3220 /* Handled by vectorizable_load and vectorizable_store. */
3223 if (gimple_call_lhs (stmt
) == NULL_TREE
3224 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3227 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3229 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3231 /* Process function arguments. */
3232 rhs_type
= NULL_TREE
;
3233 vectype_in
= NULL_TREE
;
3234 nargs
= gimple_call_num_args (stmt
);
3236 /* Bail out if the function has more than four arguments, we do not have
3237 interesting builtin functions to vectorize with more than two arguments
3238 except for fma. No arguments is also not good. */
3239 if (nargs
== 0 || nargs
> 4)
3242 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3243 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3244 if (cfn
== CFN_GOMP_SIMD_LANE
)
3247 rhs_type
= unsigned_type_node
;
3251 if (internal_fn_p (cfn
))
3252 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3254 for (i
= 0; i
< nargs
; i
++)
3256 if ((int) i
== mask_opno
)
3258 if (!vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_opno
,
3259 &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3264 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3265 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3267 if (dump_enabled_p ())
3268 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3269 "use not simple.\n");
3273 /* We can only handle calls with arguments of the same type. */
3275 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3277 if (dump_enabled_p ())
3278 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3279 "argument types differ.\n");
3283 rhs_type
= TREE_TYPE (op
);
3286 vectype_in
= vectypes
[i
];
3287 else if (vectypes
[i
]
3288 && !types_compatible_p (vectypes
[i
], vectype_in
))
3290 if (dump_enabled_p ())
3291 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3292 "argument vector types differ.\n");
3296 /* If all arguments are external or constant defs, infer the vector type
3297 from the scalar type. */
3299 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3301 gcc_assert (vectype_in
);
3304 if (dump_enabled_p ())
3305 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3306 "no vectype for scalar type %T\n", rhs_type
);
3310 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3311 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3312 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3313 by a pack of the two vectors into an SI vector. We would need
3314 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3315 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3317 if (dump_enabled_p ())
3318 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3319 "mismatched vector sizes %T and %T\n",
3320 vectype_in
, vectype_out
);
3324 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3325 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3327 if (dump_enabled_p ())
3328 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3329 "mixed mask and nonmask vector types\n");
3334 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3335 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3336 if (known_eq (nunits_in
* 2, nunits_out
))
3338 else if (known_eq (nunits_out
, nunits_in
))
3340 else if (known_eq (nunits_out
* 2, nunits_in
))
3345 /* We only handle functions that do not read or clobber memory. */
3346 if (gimple_vuse (stmt
))
3348 if (dump_enabled_p ())
3349 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3350 "function reads from or writes to memory.\n");
3354 /* For now, we only vectorize functions if a target specific builtin
3355 is available. TODO -- in some cases, it might be profitable to
3356 insert the calls for pieces of the vector, in order to be able
3357 to vectorize other operations in the loop. */
3359 internal_fn ifn
= IFN_LAST
;
3360 tree callee
= gimple_call_fndecl (stmt
);
3362 /* First try using an internal function. */
3363 tree_code convert_code
= ERROR_MARK
;
3365 && (modifier
== NONE
3366 || (modifier
== NARROW
3367 && simple_integer_narrowing (vectype_out
, vectype_in
,
3369 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3372 /* If that fails, try asking for a target-specific built-in function. */
3373 if (ifn
== IFN_LAST
)
3375 if (cfn
!= CFN_LAST
)
3376 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3377 (cfn
, vectype_out
, vectype_in
);
3378 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3379 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3380 (callee
, vectype_out
, vectype_in
);
3383 if (ifn
== IFN_LAST
&& !fndecl
)
3385 if (cfn
== CFN_GOMP_SIMD_LANE
3388 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3389 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3390 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3391 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3393 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3394 { 0, 1, 2, ... vf - 1 } vector. */
3395 gcc_assert (nargs
== 0);
3397 else if (modifier
== NONE
3398 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3399 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3400 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3401 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3402 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3403 slp_op
, vectype_in
, cost_vec
);
3406 if (dump_enabled_p ())
3407 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3408 "function is not vectorizable.\n");
3415 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3416 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3418 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3420 /* Sanity check: make sure that at least one copy of the vectorized stmt
3421 needs to be generated. */
3422 gcc_assert (ncopies
>= 1);
3424 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3425 if (!vec_stmt
) /* transformation not required. */
3428 for (i
= 0; i
< nargs
; ++i
)
3429 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
], vectype_in
))
3431 if (dump_enabled_p ())
3432 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3433 "incompatible vector types for invariants\n");
3436 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3437 DUMP_VECT_SCOPE ("vectorizable_call");
3438 vect_model_simple_cost (vinfo
, stmt_info
,
3439 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3440 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3441 record_stmt_cost (cost_vec
, ncopies
/ 2,
3442 vec_promote_demote
, stmt_info
, 0, vect_body
);
3444 if (loop_vinfo
&& mask_opno
>= 0)
3446 unsigned int nvectors
= (slp_node
3447 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3449 tree scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3450 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3451 vectype_out
, scalar_mask
);
3458 if (dump_enabled_p ())
3459 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3462 scalar_dest
= gimple_call_lhs (stmt
);
3463 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3465 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3467 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3469 tree prev_res
= NULL_TREE
;
3470 vargs
.safe_grow (nargs
, true);
3471 orig_vargs
.safe_grow (nargs
, true);
3472 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3473 for (j
= 0; j
< ncopies
; ++j
)
3475 /* Build argument list for the vectorized call. */
3478 vec
<tree
> vec_oprnds0
;
3480 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3481 vec_oprnds0
= vec_defs
[0];
3483 /* Arguments are ready. Create the new vector stmt. */
3484 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3487 for (k
= 0; k
< nargs
; k
++)
3489 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3490 vargs
[k
] = vec_oprndsk
[i
];
3493 if (modifier
== NARROW
)
3495 /* We don't define any narrowing conditional functions
3497 gcc_assert (mask_opno
< 0);
3498 tree half_res
= make_ssa_name (vectype_in
);
3500 = gimple_build_call_internal_vec (ifn
, vargs
);
3501 gimple_call_set_lhs (call
, half_res
);
3502 gimple_call_set_nothrow (call
, true);
3503 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3506 prev_res
= half_res
;
3509 new_temp
= make_ssa_name (vec_dest
);
3510 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3511 prev_res
, half_res
);
3512 vect_finish_stmt_generation (vinfo
, stmt_info
,
3517 if (mask_opno
>= 0 && masked_loop_p
)
3519 unsigned int vec_num
= vec_oprnds0
.length ();
3520 /* Always true for SLP. */
3521 gcc_assert (ncopies
== 1);
3522 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3524 vargs
[mask_opno
] = prepare_load_store_mask
3525 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3529 if (ifn
!= IFN_LAST
)
3530 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3532 call
= gimple_build_call_vec (fndecl
, vargs
);
3533 new_temp
= make_ssa_name (vec_dest
, call
);
3534 gimple_call_set_lhs (call
, new_temp
);
3535 gimple_call_set_nothrow (call
, true);
3536 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3539 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3544 for (i
= 0; i
< nargs
; i
++)
3546 op
= gimple_call_arg (stmt
, i
);
3549 vec_defs
.quick_push (vNULL
);
3550 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3554 orig_vargs
[i
] = vargs
[i
] = vec_defs
[i
][j
];
3557 if (mask_opno
>= 0 && masked_loop_p
)
3559 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3562 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3563 vargs
[mask_opno
], gsi
);
3567 if (cfn
== CFN_GOMP_SIMD_LANE
)
3569 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3571 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3572 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3573 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3574 new_temp
= make_ssa_name (vec_dest
);
3575 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3576 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3578 else if (modifier
== NARROW
)
3580 /* We don't define any narrowing conditional functions at
3582 gcc_assert (mask_opno
< 0);
3583 tree half_res
= make_ssa_name (vectype_in
);
3584 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3585 gimple_call_set_lhs (call
, half_res
);
3586 gimple_call_set_nothrow (call
, true);
3587 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3590 prev_res
= half_res
;
3593 new_temp
= make_ssa_name (vec_dest
);
3594 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3595 prev_res
, half_res
);
3596 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3601 if (ifn
!= IFN_LAST
)
3602 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3604 call
= gimple_build_call_vec (fndecl
, vargs
);
3605 new_temp
= make_ssa_name (vec_dest
, call
);
3606 gimple_call_set_lhs (call
, new_temp
);
3607 gimple_call_set_nothrow (call
, true);
3608 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3612 if (j
== (modifier
== NARROW
? 1 : 0))
3613 *vec_stmt
= new_stmt
;
3614 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3616 for (i
= 0; i
< nargs
; i
++)
3618 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3619 vec_oprndsi
.release ();
3622 else if (modifier
== NARROW
)
3624 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3625 /* We don't define any narrowing conditional functions at present. */
3626 gcc_assert (mask_opno
< 0);
3627 for (j
= 0; j
< ncopies
; ++j
)
3629 /* Build argument list for the vectorized call. */
3631 vargs
.create (nargs
* 2);
3637 vec
<tree
> vec_oprnds0
;
3639 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3640 vec_oprnds0
= vec_defs
[0];
3642 /* Arguments are ready. Create the new vector stmt. */
3643 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3647 for (k
= 0; k
< nargs
; k
++)
3649 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3650 vargs
.quick_push (vec_oprndsk
[i
]);
3651 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3654 if (ifn
!= IFN_LAST
)
3655 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3657 call
= gimple_build_call_vec (fndecl
, vargs
);
3658 new_temp
= make_ssa_name (vec_dest
, call
);
3659 gimple_call_set_lhs (call
, new_temp
);
3660 gimple_call_set_nothrow (call
, true);
3661 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3662 SLP_TREE_VEC_STMTS (slp_node
).quick_push (call
);
3667 for (i
= 0; i
< nargs
; i
++)
3669 op
= gimple_call_arg (stmt
, i
);
3672 vec_defs
.quick_push (vNULL
);
3673 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
3674 op
, &vec_defs
[i
], vectypes
[i
]);
3676 vec_oprnd0
= vec_defs
[i
][2*j
];
3677 vec_oprnd1
= vec_defs
[i
][2*j
+1];
3679 vargs
.quick_push (vec_oprnd0
);
3680 vargs
.quick_push (vec_oprnd1
);
3683 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3684 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3685 gimple_call_set_lhs (new_stmt
, new_temp
);
3686 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3688 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3692 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3694 for (i
= 0; i
< nargs
; i
++)
3696 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3697 vec_oprndsi
.release ();
3701 /* No current target implements this case. */
3706 /* The call in STMT might prevent it from being removed in dce.
3707 We however cannot remove it here, due to the way the ssa name
3708 it defines is mapped to the new definition. So just replace
3709 rhs of the statement with something harmless. */
3714 stmt_info
= vect_orig_stmt (stmt_info
);
3715 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3718 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3719 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3725 struct simd_call_arg_info
3729 HOST_WIDE_INT linear_step
;
3730 enum vect_def_type dt
;
3732 bool simd_lane_linear
;
3735 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3736 is linear within simd lane (but not within whole loop), note it in
3740 vect_simd_lane_linear (tree op
, class loop
*loop
,
3741 struct simd_call_arg_info
*arginfo
)
3743 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3745 if (!is_gimple_assign (def_stmt
)
3746 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3747 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3750 tree base
= gimple_assign_rhs1 (def_stmt
);
3751 HOST_WIDE_INT linear_step
= 0;
3752 tree v
= gimple_assign_rhs2 (def_stmt
);
3753 while (TREE_CODE (v
) == SSA_NAME
)
3756 def_stmt
= SSA_NAME_DEF_STMT (v
);
3757 if (is_gimple_assign (def_stmt
))
3758 switch (gimple_assign_rhs_code (def_stmt
))
3761 t
= gimple_assign_rhs2 (def_stmt
);
3762 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3764 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3765 v
= gimple_assign_rhs1 (def_stmt
);
3768 t
= gimple_assign_rhs2 (def_stmt
);
3769 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3771 linear_step
= tree_to_shwi (t
);
3772 v
= gimple_assign_rhs1 (def_stmt
);
3775 t
= gimple_assign_rhs1 (def_stmt
);
3776 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3777 || (TYPE_PRECISION (TREE_TYPE (v
))
3778 < TYPE_PRECISION (TREE_TYPE (t
))))
3787 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3789 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3790 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3795 arginfo
->linear_step
= linear_step
;
3797 arginfo
->simd_lane_linear
= true;
3803 /* Return the number of elements in vector type VECTYPE, which is associated
3804 with a SIMD clone. At present these vectors always have a constant
3807 static unsigned HOST_WIDE_INT
3808 simd_clone_subparts (tree vectype
)
3810 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3813 /* Function vectorizable_simd_clone_call.
3815 Check if STMT_INFO performs a function call that can be vectorized
3816 by calling a simd clone of the function.
3817 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3818 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3819 Return true if STMT_INFO is vectorizable in this way. */
3822 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3823 gimple_stmt_iterator
*gsi
,
3824 gimple
**vec_stmt
, slp_tree slp_node
,
3825 stmt_vector_for_cost
*)
3830 tree vec_oprnd0
= NULL_TREE
;
3833 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3834 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3835 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3836 tree fndecl
, new_temp
;
3838 auto_vec
<simd_call_arg_info
> arginfo
;
3839 vec
<tree
> vargs
= vNULL
;
3841 tree lhs
, rtype
, ratype
;
3842 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3844 /* Is STMT a vectorizable call? */
3845 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3849 fndecl
= gimple_call_fndecl (stmt
);
3850 if (fndecl
== NULL_TREE
)
3853 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3854 if (node
== NULL
|| node
->simd_clones
== NULL
)
3857 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3860 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3864 if (gimple_call_lhs (stmt
)
3865 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3868 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3870 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3872 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3879 /* Process function arguments. */
3880 nargs
= gimple_call_num_args (stmt
);
3882 /* Bail out if the function has zero arguments. */
3886 arginfo
.reserve (nargs
, true);
3888 for (i
= 0; i
< nargs
; i
++)
3890 simd_call_arg_info thisarginfo
;
3893 thisarginfo
.linear_step
= 0;
3894 thisarginfo
.align
= 0;
3895 thisarginfo
.op
= NULL_TREE
;
3896 thisarginfo
.simd_lane_linear
= false;
3898 op
= gimple_call_arg (stmt
, i
);
3899 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3900 &thisarginfo
.vectype
)
3901 || thisarginfo
.dt
== vect_uninitialized_def
)
3903 if (dump_enabled_p ())
3904 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3905 "use not simple.\n");
3909 if (thisarginfo
.dt
== vect_constant_def
3910 || thisarginfo
.dt
== vect_external_def
)
3911 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3914 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3915 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo
.vectype
))
3917 if (dump_enabled_p ())
3918 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3919 "vector mask arguments are not supported\n");
3924 /* For linear arguments, the analyze phase should have saved
3925 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3926 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3927 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3929 gcc_assert (vec_stmt
);
3930 thisarginfo
.linear_step
3931 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3933 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3934 thisarginfo
.simd_lane_linear
3935 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3936 == boolean_true_node
);
3937 /* If loop has been peeled for alignment, we need to adjust it. */
3938 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3939 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3940 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3942 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3943 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3944 tree opt
= TREE_TYPE (thisarginfo
.op
);
3945 bias
= fold_convert (TREE_TYPE (step
), bias
);
3946 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3948 = fold_build2 (POINTER_TYPE_P (opt
)
3949 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3950 thisarginfo
.op
, bias
);
3954 && thisarginfo
.dt
!= vect_constant_def
3955 && thisarginfo
.dt
!= vect_external_def
3957 && TREE_CODE (op
) == SSA_NAME
3958 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3960 && tree_fits_shwi_p (iv
.step
))
3962 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3963 thisarginfo
.op
= iv
.base
;
3965 else if ((thisarginfo
.dt
== vect_constant_def
3966 || thisarginfo
.dt
== vect_external_def
)
3967 && POINTER_TYPE_P (TREE_TYPE (op
)))
3968 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3969 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3971 if (POINTER_TYPE_P (TREE_TYPE (op
))
3972 && !thisarginfo
.linear_step
3974 && thisarginfo
.dt
!= vect_constant_def
3975 && thisarginfo
.dt
!= vect_external_def
3978 && TREE_CODE (op
) == SSA_NAME
)
3979 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3981 arginfo
.quick_push (thisarginfo
);
3984 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3985 if (!vf
.is_constant ())
3987 if (dump_enabled_p ())
3988 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3989 "not considering SIMD clones; not yet supported"
3990 " for variable-width vectors.\n");
3994 unsigned int badness
= 0;
3995 struct cgraph_node
*bestn
= NULL
;
3996 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3997 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3999 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4000 n
= n
->simdclone
->next_clone
)
4002 unsigned int this_badness
= 0;
4003 unsigned int num_calls
;
4004 if (!constant_multiple_p (vf
, n
->simdclone
->simdlen
, &num_calls
)
4005 || n
->simdclone
->nargs
!= nargs
)
4008 this_badness
+= exact_log2 (num_calls
) * 4096;
4009 if (n
->simdclone
->inbranch
)
4010 this_badness
+= 8192;
4011 int target_badness
= targetm
.simd_clone
.usable (n
);
4012 if (target_badness
< 0)
4014 this_badness
+= target_badness
* 512;
4015 /* FORNOW: Have to add code to add the mask argument. */
4016 if (n
->simdclone
->inbranch
)
4018 for (i
= 0; i
< nargs
; i
++)
4020 switch (n
->simdclone
->args
[i
].arg_type
)
4022 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4023 if (!useless_type_conversion_p
4024 (n
->simdclone
->args
[i
].orig_type
,
4025 TREE_TYPE (gimple_call_arg (stmt
, i
))))
4027 else if (arginfo
[i
].dt
== vect_constant_def
4028 || arginfo
[i
].dt
== vect_external_def
4029 || arginfo
[i
].linear_step
)
4032 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4033 if (arginfo
[i
].dt
!= vect_constant_def
4034 && arginfo
[i
].dt
!= vect_external_def
)
4037 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4038 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4039 if (arginfo
[i
].dt
== vect_constant_def
4040 || arginfo
[i
].dt
== vect_external_def
4041 || (arginfo
[i
].linear_step
4042 != n
->simdclone
->args
[i
].linear_step
))
4045 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4046 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4047 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4048 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4049 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4050 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4054 case SIMD_CLONE_ARG_TYPE_MASK
:
4057 if (i
== (size_t) -1)
4059 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4064 if (arginfo
[i
].align
)
4065 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4066 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4068 if (i
== (size_t) -1)
4070 if (bestn
== NULL
|| this_badness
< badness
)
4073 badness
= this_badness
;
4080 for (i
= 0; i
< nargs
; i
++)
4081 if ((arginfo
[i
].dt
== vect_constant_def
4082 || arginfo
[i
].dt
== vect_external_def
)
4083 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4085 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
));
4086 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4088 if (arginfo
[i
].vectype
== NULL
4089 || !constant_multiple_p (bestn
->simdclone
->simdlen
,
4090 simd_clone_subparts (arginfo
[i
].vectype
)))
4094 fndecl
= bestn
->decl
;
4095 nunits
= bestn
->simdclone
->simdlen
;
4096 ncopies
= vector_unroll_factor (vf
, nunits
);
4098 /* If the function isn't const, only allow it in simd loops where user
4099 has asserted that at least nunits consecutive iterations can be
4100 performed using SIMD instructions. */
4101 if ((loop
== NULL
|| maybe_lt ((unsigned) loop
->safelen
, nunits
))
4102 && gimple_vuse (stmt
))
4105 /* Sanity check: make sure that at least one copy of the vectorized stmt
4106 needs to be generated. */
4107 gcc_assert (ncopies
>= 1);
4109 if (!vec_stmt
) /* transformation not required. */
4111 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4112 for (i
= 0; i
< nargs
; i
++)
4113 if ((bestn
->simdclone
->args
[i
].arg_type
4114 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4115 || (bestn
->simdclone
->args
[i
].arg_type
4116 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4118 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4121 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4122 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4123 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4124 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4125 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4126 tree sll
= arginfo
[i
].simd_lane_linear
4127 ? boolean_true_node
: boolean_false_node
;
4128 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4130 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4131 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4132 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4133 dt, slp_node, cost_vec); */
4139 if (dump_enabled_p ())
4140 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4143 scalar_dest
= gimple_call_lhs (stmt
);
4144 vec_dest
= NULL_TREE
;
4149 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4150 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4151 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4154 rtype
= TREE_TYPE (ratype
);
4158 auto_vec
<vec
<tree
> > vec_oprnds
;
4159 auto_vec
<unsigned> vec_oprnds_i
;
4160 vec_oprnds
.safe_grow_cleared (nargs
, true);
4161 vec_oprnds_i
.safe_grow_cleared (nargs
, true);
4162 for (j
= 0; j
< ncopies
; ++j
)
4164 /* Build argument list for the vectorized call. */
4166 vargs
.create (nargs
);
4170 for (i
= 0; i
< nargs
; i
++)
4172 unsigned int k
, l
, m
, o
;
4174 op
= gimple_call_arg (stmt
, i
);
4175 switch (bestn
->simdclone
->args
[i
].arg_type
)
4177 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4178 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4179 o
= vector_unroll_factor (nunits
,
4180 simd_clone_subparts (atype
));
4181 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4183 if (simd_clone_subparts (atype
)
4184 < simd_clone_subparts (arginfo
[i
].vectype
))
4186 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4187 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4188 / simd_clone_subparts (atype
));
4189 gcc_assert ((k
& (k
- 1)) == 0);
4192 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4193 ncopies
* o
/ k
, op
,
4195 vec_oprnds_i
[i
] = 0;
4196 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4200 vec_oprnd0
= arginfo
[i
].op
;
4201 if ((m
& (k
- 1)) == 0)
4202 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4204 arginfo
[i
].op
= vec_oprnd0
;
4206 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4208 bitsize_int ((m
& (k
- 1)) * prec
));
4210 = gimple_build_assign (make_ssa_name (atype
),
4212 vect_finish_stmt_generation (vinfo
, stmt_info
,
4214 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4218 k
= (simd_clone_subparts (atype
)
4219 / simd_clone_subparts (arginfo
[i
].vectype
));
4220 gcc_assert ((k
& (k
- 1)) == 0);
4221 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4223 vec_alloc (ctor_elts
, k
);
4226 for (l
= 0; l
< k
; l
++)
4228 if (m
== 0 && l
== 0)
4230 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4234 vec_oprnds_i
[i
] = 0;
4235 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4238 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4239 arginfo
[i
].op
= vec_oprnd0
;
4242 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4246 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0
),
4250 = build1 (VIEW_CONVERT_EXPR
, atype
, vec_oprnd0
);
4252 = gimple_build_assign (make_ssa_name (atype
),
4254 vect_finish_stmt_generation (vinfo
, stmt_info
,
4256 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4259 vargs
.safe_push (vec_oprnd0
);
4262 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4264 = gimple_build_assign (make_ssa_name (atype
),
4266 vect_finish_stmt_generation (vinfo
, stmt_info
,
4268 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4273 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4274 vargs
.safe_push (op
);
4276 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4277 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4282 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4283 &stmts
, true, NULL_TREE
);
4287 edge pe
= loop_preheader_edge (loop
);
4288 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4289 gcc_assert (!new_bb
);
4291 if (arginfo
[i
].simd_lane_linear
)
4293 vargs
.safe_push (arginfo
[i
].op
);
4296 tree phi_res
= copy_ssa_name (op
);
4297 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4298 add_phi_arg (new_phi
, arginfo
[i
].op
,
4299 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4301 = POINTER_TYPE_P (TREE_TYPE (op
))
4302 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4303 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4304 ? sizetype
: TREE_TYPE (op
);
4306 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4308 tree tcst
= wide_int_to_tree (type
, cst
);
4309 tree phi_arg
= copy_ssa_name (op
);
4311 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4312 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4313 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4314 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4316 arginfo
[i
].op
= phi_res
;
4317 vargs
.safe_push (phi_res
);
4322 = POINTER_TYPE_P (TREE_TYPE (op
))
4323 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4324 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4325 ? sizetype
: TREE_TYPE (op
);
4327 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4329 tree tcst
= wide_int_to_tree (type
, cst
);
4330 new_temp
= make_ssa_name (TREE_TYPE (op
));
4332 = gimple_build_assign (new_temp
, code
,
4333 arginfo
[i
].op
, tcst
);
4334 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4335 vargs
.safe_push (new_temp
);
4338 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4339 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4340 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4341 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4342 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4343 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4349 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4353 || known_eq (simd_clone_subparts (rtype
), nunits
));
4355 new_temp
= create_tmp_var (ratype
);
4356 else if (useless_type_conversion_p (vectype
, rtype
))
4357 new_temp
= make_ssa_name (vec_dest
, new_call
);
4359 new_temp
= make_ssa_name (rtype
, new_call
);
4360 gimple_call_set_lhs (new_call
, new_temp
);
4362 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4363 gimple
*new_stmt
= new_call
;
4367 if (!multiple_p (simd_clone_subparts (vectype
), nunits
))
4370 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4371 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4372 k
= vector_unroll_factor (nunits
,
4373 simd_clone_subparts (vectype
));
4374 gcc_assert ((k
& (k
- 1)) == 0);
4375 for (l
= 0; l
< k
; l
++)
4380 t
= build_fold_addr_expr (new_temp
);
4381 t
= build2 (MEM_REF
, vectype
, t
,
4382 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4385 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4386 bitsize_int (prec
), bitsize_int (l
* prec
));
4387 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4388 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4390 if (j
== 0 && l
== 0)
4391 *vec_stmt
= new_stmt
;
4392 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4396 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4399 else if (!multiple_p (nunits
, simd_clone_subparts (vectype
)))
4401 unsigned int k
= (simd_clone_subparts (vectype
)
4402 / simd_clone_subparts (rtype
));
4403 gcc_assert ((k
& (k
- 1)) == 0);
4404 if ((j
& (k
- 1)) == 0)
4405 vec_alloc (ret_ctor_elts
, k
);
4409 o
= vector_unroll_factor (nunits
,
4410 simd_clone_subparts (rtype
));
4411 for (m
= 0; m
< o
; m
++)
4413 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4414 size_int (m
), NULL_TREE
, NULL_TREE
);
4415 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4417 vect_finish_stmt_generation (vinfo
, stmt_info
,
4419 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4420 gimple_assign_lhs (new_stmt
));
4422 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4425 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4426 if ((j
& (k
- 1)) != k
- 1)
4428 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4430 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4431 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4433 if ((unsigned) j
== k
- 1)
4434 *vec_stmt
= new_stmt
;
4435 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4440 tree t
= build_fold_addr_expr (new_temp
);
4441 t
= build2 (MEM_REF
, vectype
, t
,
4442 build_int_cst (TREE_TYPE (t
), 0));
4443 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4444 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4445 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4447 else if (!useless_type_conversion_p (vectype
, rtype
))
4449 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
4451 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4452 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4457 *vec_stmt
= new_stmt
;
4458 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4461 for (i
= 0; i
< nargs
; ++i
)
4463 vec
<tree
> oprndsi
= vec_oprnds
[i
];
4468 /* The call in STMT might prevent it from being removed in dce.
4469 We however cannot remove it here, due to the way the ssa name
4470 it defines is mapped to the new definition. So just replace
4471 rhs of the statement with something harmless. */
4479 type
= TREE_TYPE (scalar_dest
);
4480 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4481 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4484 new_stmt
= gimple_build_nop ();
4485 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4486 unlink_stmt_vdef (stmt
);
4492 /* Function vect_gen_widened_results_half
4494 Create a vector stmt whose code, type, number of arguments, and result
4495 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4496 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4497 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4498 needs to be created (DECL is a function-decl of a target-builtin).
4499 STMT_INFO is the original scalar stmt that we are vectorizing. */
4502 vect_gen_widened_results_half (vec_info
*vinfo
, enum tree_code code
,
4503 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4504 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4505 stmt_vec_info stmt_info
)
4510 /* Generate half of the widened result: */
4511 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4512 if (op_type
!= binary_op
)
4514 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4515 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4516 gimple_assign_set_lhs (new_stmt
, new_temp
);
4517 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4523 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4524 For multi-step conversions store the resulting vectors and call the function
4528 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4530 stmt_vec_info stmt_info
,
4531 vec
<tree
> &vec_dsts
,
4532 gimple_stmt_iterator
*gsi
,
4533 slp_tree slp_node
, enum tree_code code
)
4536 tree vop0
, vop1
, new_tmp
, vec_dest
;
4538 vec_dest
= vec_dsts
.pop ();
4540 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4542 /* Create demotion operation. */
4543 vop0
= (*vec_oprnds
)[i
];
4544 vop1
= (*vec_oprnds
)[i
+ 1];
4545 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4546 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4547 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4548 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4551 /* Store the resulting vector for next recursive call. */
4552 (*vec_oprnds
)[i
/2] = new_tmp
;
4555 /* This is the last step of the conversion sequence. Store the
4556 vectors in SLP_NODE or in vector info of the scalar statement
4557 (or in STMT_VINFO_RELATED_STMT chain). */
4559 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4561 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4565 /* For multi-step demotion operations we first generate demotion operations
4566 from the source type to the intermediate types, and then combine the
4567 results (stored in VEC_OPRNDS) in demotion operation to the destination
4571 /* At each level of recursion we have half of the operands we had at the
4573 vec_oprnds
->truncate ((i
+1)/2);
4574 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
4576 stmt_info
, vec_dsts
, gsi
,
4577 slp_node
, VEC_PACK_TRUNC_EXPR
);
4580 vec_dsts
.quick_push (vec_dest
);
4584 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4585 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4586 STMT_INFO. For multi-step conversions store the resulting vectors and
4587 call the function recursively. */
4590 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
4591 vec
<tree
> *vec_oprnds0
,
4592 vec
<tree
> *vec_oprnds1
,
4593 stmt_vec_info stmt_info
, tree vec_dest
,
4594 gimple_stmt_iterator
*gsi
,
4595 enum tree_code code1
,
4596 enum tree_code code2
, int op_type
)
4599 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4600 gimple
*new_stmt1
, *new_stmt2
;
4601 vec
<tree
> vec_tmp
= vNULL
;
4603 vec_tmp
.create (vec_oprnds0
->length () * 2);
4604 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4606 if (op_type
== binary_op
)
4607 vop1
= (*vec_oprnds1
)[i
];
4611 /* Generate the two halves of promotion operation. */
4612 new_stmt1
= vect_gen_widened_results_half (vinfo
, code1
, vop0
, vop1
,
4613 op_type
, vec_dest
, gsi
,
4615 new_stmt2
= vect_gen_widened_results_half (vinfo
, code2
, vop0
, vop1
,
4616 op_type
, vec_dest
, gsi
,
4618 if (is_gimple_call (new_stmt1
))
4620 new_tmp1
= gimple_call_lhs (new_stmt1
);
4621 new_tmp2
= gimple_call_lhs (new_stmt2
);
4625 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4626 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4629 /* Store the results for the next step. */
4630 vec_tmp
.quick_push (new_tmp1
);
4631 vec_tmp
.quick_push (new_tmp2
);
4634 vec_oprnds0
->release ();
4635 *vec_oprnds0
= vec_tmp
;
4638 /* Create vectorized promotion stmts for widening stmts using only half the
4639 potential vector size for input. */
4641 vect_create_half_widening_stmts (vec_info
*vinfo
,
4642 vec
<tree
> *vec_oprnds0
,
4643 vec
<tree
> *vec_oprnds1
,
4644 stmt_vec_info stmt_info
, tree vec_dest
,
4645 gimple_stmt_iterator
*gsi
,
4646 enum tree_code code1
,
4654 vec
<tree
> vec_tmp
= vNULL
;
4656 vec_tmp
.create (vec_oprnds0
->length ());
4657 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4659 tree new_tmp1
, new_tmp2
, new_tmp3
, out_type
;
4661 gcc_assert (op_type
== binary_op
);
4662 vop1
= (*vec_oprnds1
)[i
];
4664 /* Widen the first vector input. */
4665 out_type
= TREE_TYPE (vec_dest
);
4666 new_tmp1
= make_ssa_name (out_type
);
4667 new_stmt1
= gimple_build_assign (new_tmp1
, NOP_EXPR
, vop0
);
4668 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt1
, gsi
);
4669 if (VECTOR_TYPE_P (TREE_TYPE (vop1
)))
4671 /* Widen the second vector input. */
4672 new_tmp2
= make_ssa_name (out_type
);
4673 new_stmt2
= gimple_build_assign (new_tmp2
, NOP_EXPR
, vop1
);
4674 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt2
, gsi
);
4675 /* Perform the operation. With both vector inputs widened. */
4676 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, new_tmp2
);
4680 /* Perform the operation. With the single vector input widened. */
4681 new_stmt3
= gimple_build_assign (vec_dest
, code1
, new_tmp1
, vop1
);
4684 new_tmp3
= make_ssa_name (vec_dest
, new_stmt3
);
4685 gimple_assign_set_lhs (new_stmt3
, new_tmp3
);
4686 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt3
, gsi
);
4688 /* Store the results for the next step. */
4689 vec_tmp
.quick_push (new_tmp3
);
4692 vec_oprnds0
->release ();
4693 *vec_oprnds0
= vec_tmp
;
4697 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4698 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4699 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4700 Return true if STMT_INFO is vectorizable in this way. */
4703 vectorizable_conversion (vec_info
*vinfo
,
4704 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4705 gimple
**vec_stmt
, slp_tree slp_node
,
4706 stmt_vector_for_cost
*cost_vec
)
4710 tree op0
, op1
= NULL_TREE
;
4711 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4712 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4713 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4715 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4717 poly_uint64 nunits_in
;
4718 poly_uint64 nunits_out
;
4719 tree vectype_out
, vectype_in
;
4721 tree lhs_type
, rhs_type
;
4722 enum { NARROW
, NONE
, WIDEN
} modifier
;
4723 vec
<tree
> vec_oprnds0
= vNULL
;
4724 vec
<tree
> vec_oprnds1
= vNULL
;
4726 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4727 int multi_step_cvt
= 0;
4728 vec
<tree
> interm_types
= vNULL
;
4729 tree intermediate_type
, cvt_type
= NULL_TREE
;
4731 unsigned short fltsz
;
4733 /* Is STMT a vectorizable conversion? */
4735 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4738 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4742 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4746 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4749 code
= gimple_assign_rhs_code (stmt
);
4750 if (!CONVERT_EXPR_CODE_P (code
)
4751 && code
!= FIX_TRUNC_EXPR
4752 && code
!= FLOAT_EXPR
4753 && code
!= WIDEN_PLUS_EXPR
4754 && code
!= WIDEN_MINUS_EXPR
4755 && code
!= WIDEN_MULT_EXPR
4756 && code
!= WIDEN_LSHIFT_EXPR
)
4759 bool widen_arith
= (code
== WIDEN_PLUS_EXPR
4760 || code
== WIDEN_MINUS_EXPR
4761 || code
== WIDEN_MULT_EXPR
4762 || code
== WIDEN_LSHIFT_EXPR
);
4763 op_type
= TREE_CODE_LENGTH (code
);
4765 /* Check types of lhs and rhs. */
4766 scalar_dest
= gimple_assign_lhs (stmt
);
4767 lhs_type
= TREE_TYPE (scalar_dest
);
4768 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4770 /* Check the operands of the operation. */
4771 slp_tree slp_op0
, slp_op1
= NULL
;
4772 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
4773 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
4775 if (dump_enabled_p ())
4776 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4777 "use not simple.\n");
4781 rhs_type
= TREE_TYPE (op0
);
4782 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4783 && !((INTEGRAL_TYPE_P (lhs_type
)
4784 && INTEGRAL_TYPE_P (rhs_type
))
4785 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4786 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4789 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4790 && ((INTEGRAL_TYPE_P (lhs_type
)
4791 && !type_has_mode_precision_p (lhs_type
))
4792 || (INTEGRAL_TYPE_P (rhs_type
)
4793 && !type_has_mode_precision_p (rhs_type
))))
4795 if (dump_enabled_p ())
4796 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4797 "type conversion to/from bit-precision unsupported."
4802 if (op_type
== binary_op
)
4804 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
4805 || code
== WIDEN_PLUS_EXPR
|| code
== WIDEN_MINUS_EXPR
);
4807 op1
= gimple_assign_rhs2 (stmt
);
4809 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
4810 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
4812 if (dump_enabled_p ())
4813 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4814 "use not simple.\n");
4817 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4820 vectype_in
= vectype1_in
;
4823 /* If op0 is an external or constant def, infer the vector type
4824 from the scalar type. */
4826 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
4828 gcc_assert (vectype_in
);
4831 if (dump_enabled_p ())
4832 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4833 "no vectype for scalar type %T\n", rhs_type
);
4838 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4839 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4841 if (dump_enabled_p ())
4842 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4843 "can't convert between boolean and non "
4844 "boolean vectors %T\n", rhs_type
);
4849 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4850 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4851 if (known_eq (nunits_out
, nunits_in
))
4856 else if (multiple_p (nunits_out
, nunits_in
))
4860 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4864 /* Multiple types in SLP are handled by creating the appropriate number of
4865 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4869 else if (modifier
== NARROW
)
4870 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4872 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4874 /* Sanity check: make sure that at least one copy of the vectorized stmt
4875 needs to be generated. */
4876 gcc_assert (ncopies
>= 1);
4878 bool found_mode
= false;
4879 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4880 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4881 opt_scalar_mode rhs_mode_iter
;
4883 /* Supportable by target? */
4887 if (code
!= FIX_TRUNC_EXPR
4888 && code
!= FLOAT_EXPR
4889 && !CONVERT_EXPR_CODE_P (code
))
4891 if (supportable_convert_operation (code
, vectype_out
, vectype_in
, &code1
))
4895 if (dump_enabled_p ())
4896 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4897 "conversion not supported by target.\n");
4901 if (known_eq (nunits_in
, nunits_out
))
4903 if (!supportable_half_widening_operation (code
, vectype_out
,
4904 vectype_in
, &code1
))
4906 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4909 if (supportable_widening_operation (vinfo
, code
, stmt_info
,
4910 vectype_out
, vectype_in
, &code1
,
4911 &code2
, &multi_step_cvt
,
4914 /* Binary widening operation can only be supported directly by the
4916 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4920 if (code
!= FLOAT_EXPR
4921 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4924 fltsz
= GET_MODE_SIZE (lhs_mode
);
4925 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4927 rhs_mode
= rhs_mode_iter
.require ();
4928 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4932 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4933 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4934 if (cvt_type
== NULL_TREE
)
4937 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4939 if (!supportable_convert_operation (code
, vectype_out
,
4940 cvt_type
, &codecvt1
))
4943 else if (!supportable_widening_operation (vinfo
, code
, stmt_info
,
4944 vectype_out
, cvt_type
,
4945 &codecvt1
, &codecvt2
,
4950 gcc_assert (multi_step_cvt
== 0);
4952 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
4954 vectype_in
, &code1
, &code2
,
4955 &multi_step_cvt
, &interm_types
))
4965 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4966 codecvt2
= ERROR_MARK
;
4970 interm_types
.safe_push (cvt_type
);
4971 cvt_type
= NULL_TREE
;
4976 gcc_assert (op_type
== unary_op
);
4977 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4978 &code1
, &multi_step_cvt
,
4982 if (code
!= FIX_TRUNC_EXPR
4983 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4987 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4988 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4989 if (cvt_type
== NULL_TREE
)
4991 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4994 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4995 &code1
, &multi_step_cvt
,
5004 if (!vec_stmt
) /* transformation not required. */
5007 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
5008 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
5010 if (dump_enabled_p ())
5011 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5012 "incompatible vector types for invariants\n");
5015 DUMP_VECT_SCOPE ("vectorizable_conversion");
5016 if (modifier
== NONE
)
5018 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
5019 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5022 else if (modifier
== NARROW
)
5024 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
5025 /* The final packing step produces one vector result per copy. */
5026 unsigned int nvectors
5027 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
5028 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5029 multi_step_cvt
, cost_vec
,
5034 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5035 /* The initial unpacking step produces two vector results
5036 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5037 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5038 unsigned int nvectors
5040 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5042 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5043 multi_step_cvt
, cost_vec
,
5046 interm_types
.release ();
5051 if (dump_enabled_p ())
5052 dump_printf_loc (MSG_NOTE
, vect_location
,
5053 "transform conversion. ncopies = %d.\n", ncopies
);
5055 if (op_type
== binary_op
)
5057 if (CONSTANT_CLASS_P (op0
))
5058 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5059 else if (CONSTANT_CLASS_P (op1
))
5060 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5063 /* In case of multi-step conversion, we first generate conversion operations
5064 to the intermediate types, and then from that types to the final one.
5065 We create vector destinations for the intermediate type (TYPES) received
5066 from supportable_*_operation, and store them in the correct order
5067 for future use in vect_create_vectorized_*_stmts (). */
5068 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5069 vec_dest
= vect_create_destination_var (scalar_dest
,
5070 (cvt_type
&& modifier
== WIDEN
)
5071 ? cvt_type
: vectype_out
);
5072 vec_dsts
.quick_push (vec_dest
);
5076 for (i
= interm_types
.length () - 1;
5077 interm_types
.iterate (i
, &intermediate_type
); i
--)
5079 vec_dest
= vect_create_destination_var (scalar_dest
,
5081 vec_dsts
.quick_push (vec_dest
);
5086 vec_dest
= vect_create_destination_var (scalar_dest
,
5088 ? vectype_out
: cvt_type
);
5093 if (modifier
== WIDEN
)
5095 else if (modifier
== NARROW
)
5098 ninputs
= vect_pow2 (multi_step_cvt
);
5106 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5108 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5110 /* Arguments are ready, create the new vector stmt. */
5111 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
5112 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
5113 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5114 gimple_assign_set_lhs (new_stmt
, new_temp
);
5115 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5118 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5120 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5125 /* In case the vectorization factor (VF) is bigger than the number
5126 of elements that we can fit in a vectype (nunits), we have to
5127 generate more than one vector stmt - i.e - we need to "unroll"
5128 the vector stmt by a factor VF/nunits. */
5129 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5131 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
5133 if (code
== WIDEN_LSHIFT_EXPR
)
5135 int oprnds_size
= vec_oprnds0
.length ();
5136 vec_oprnds1
.create (oprnds_size
);
5137 for (i
= 0; i
< oprnds_size
; ++i
)
5138 vec_oprnds1
.quick_push (op1
);
5140 /* Arguments are ready. Create the new vector stmts. */
5141 for (i
= multi_step_cvt
; i
>= 0; i
--)
5143 tree this_dest
= vec_dsts
[i
];
5144 enum tree_code c1
= code1
, c2
= code2
;
5145 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5150 if (known_eq (nunits_out
, nunits_in
))
5151 vect_create_half_widening_stmts (vinfo
, &vec_oprnds0
,
5152 &vec_oprnds1
, stmt_info
,
5156 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5157 &vec_oprnds1
, stmt_info
,
5162 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5167 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5168 new_temp
= make_ssa_name (vec_dest
);
5169 new_stmt
= gimple_build_assign (new_temp
, codecvt1
, vop0
);
5170 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5173 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
5176 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5178 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5183 /* In case the vectorization factor (VF) is bigger than the number
5184 of elements that we can fit in a vectype (nunits), we have to
5185 generate more than one vector stmt - i.e - we need to "unroll"
5186 the vector stmt by a factor VF/nunits. */
5187 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5189 /* Arguments are ready. Create the new vector stmts. */
5191 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5193 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5194 new_temp
= make_ssa_name (vec_dest
);
5196 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5197 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5198 vec_oprnds0
[i
] = new_temp
;
5201 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5203 stmt_info
, vec_dsts
, gsi
,
5208 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5210 vec_oprnds0
.release ();
5211 vec_oprnds1
.release ();
5212 interm_types
.release ();
5217 /* Return true if we can assume from the scalar form of STMT_INFO that
5218 neither the scalar nor the vector forms will generate code. STMT_INFO
5219 is known not to involve a data reference. */
5222 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5224 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5228 tree lhs
= gimple_assign_lhs (stmt
);
5229 tree_code code
= gimple_assign_rhs_code (stmt
);
5230 tree rhs
= gimple_assign_rhs1 (stmt
);
5232 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5235 if (CONVERT_EXPR_CODE_P (code
))
5236 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5241 /* Function vectorizable_assignment.
5243 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5244 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5245 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5246 Return true if STMT_INFO is vectorizable in this way. */
5249 vectorizable_assignment (vec_info
*vinfo
,
5250 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5251 gimple
**vec_stmt
, slp_tree slp_node
,
5252 stmt_vector_for_cost
*cost_vec
)
5257 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5259 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5263 vec
<tree
> vec_oprnds
= vNULL
;
5265 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5266 enum tree_code code
;
5269 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5272 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5276 /* Is vectorizable assignment? */
5277 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5281 scalar_dest
= gimple_assign_lhs (stmt
);
5282 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5285 if (STMT_VINFO_DATA_REF (stmt_info
))
5288 code
= gimple_assign_rhs_code (stmt
);
5289 if (!(gimple_assign_single_p (stmt
)
5290 || code
== PAREN_EXPR
5291 || CONVERT_EXPR_CODE_P (code
)))
5294 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5295 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5297 /* Multiple types in SLP are handled by creating the appropriate number of
5298 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5303 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5305 gcc_assert (ncopies
>= 1);
5308 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5309 &dt
[0], &vectype_in
))
5311 if (dump_enabled_p ())
5312 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5313 "use not simple.\n");
5317 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5319 /* We can handle NOP_EXPR conversions that do not change the number
5320 of elements or the vector size. */
5321 if ((CONVERT_EXPR_CODE_P (code
)
5322 || code
== VIEW_CONVERT_EXPR
)
5324 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5325 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5326 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5329 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
5330 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5332 if (dump_enabled_p ())
5333 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5334 "can't convert between boolean and non "
5335 "boolean vectors %T\n", TREE_TYPE (op
));
5340 /* We do not handle bit-precision changes. */
5341 if ((CONVERT_EXPR_CODE_P (code
)
5342 || code
== VIEW_CONVERT_EXPR
)
5343 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5344 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5345 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5346 /* But a conversion that does not change the bit-pattern is ok. */
5347 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5348 > TYPE_PRECISION (TREE_TYPE (op
)))
5349 && TYPE_UNSIGNED (TREE_TYPE (op
))))
5351 if (dump_enabled_p ())
5352 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5353 "type conversion to/from bit-precision "
5358 if (!vec_stmt
) /* transformation not required. */
5361 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
5363 if (dump_enabled_p ())
5364 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5365 "incompatible vector types for invariants\n");
5368 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5369 DUMP_VECT_SCOPE ("vectorizable_assignment");
5370 if (!vect_nop_conversion_p (stmt_info
))
5371 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5377 if (dump_enabled_p ())
5378 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5381 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5384 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
5386 /* Arguments are ready. create the new vector stmt. */
5387 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5389 if (CONVERT_EXPR_CODE_P (code
)
5390 || code
== VIEW_CONVERT_EXPR
)
5391 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5392 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5393 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5394 gimple_assign_set_lhs (new_stmt
, new_temp
);
5395 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5397 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5399 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5402 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5404 vec_oprnds
.release ();
5409 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5410 either as shift by a scalar or by a vector. */
5413 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5416 machine_mode vec_mode
;
5421 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5425 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5427 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5429 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5431 || (optab_handler (optab
, TYPE_MODE (vectype
))
5432 == CODE_FOR_nothing
))
5436 vec_mode
= TYPE_MODE (vectype
);
5437 icode
= (int) optab_handler (optab
, vec_mode
);
5438 if (icode
== CODE_FOR_nothing
)
5445 /* Function vectorizable_shift.
5447 Check if STMT_INFO performs a shift operation that can be vectorized.
5448 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5449 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5450 Return true if STMT_INFO is vectorizable in this way. */
5453 vectorizable_shift (vec_info
*vinfo
,
5454 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5455 gimple
**vec_stmt
, slp_tree slp_node
,
5456 stmt_vector_for_cost
*cost_vec
)
5460 tree op0
, op1
= NULL
;
5461 tree vec_oprnd1
= NULL_TREE
;
5463 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5464 enum tree_code code
;
5465 machine_mode vec_mode
;
5469 machine_mode optab_op2_mode
;
5470 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5472 poly_uint64 nunits_in
;
5473 poly_uint64 nunits_out
;
5478 vec
<tree
> vec_oprnds0
= vNULL
;
5479 vec
<tree
> vec_oprnds1
= vNULL
;
5482 bool scalar_shift_arg
= true;
5483 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5484 bool incompatible_op1_vectype_p
= false;
5486 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5489 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5490 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5494 /* Is STMT a vectorizable binary/unary operation? */
5495 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5499 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5502 code
= gimple_assign_rhs_code (stmt
);
5504 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5505 || code
== RROTATE_EXPR
))
5508 scalar_dest
= gimple_assign_lhs (stmt
);
5509 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5510 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5512 if (dump_enabled_p ())
5513 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5514 "bit-precision shifts not supported.\n");
5519 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5520 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5522 if (dump_enabled_p ())
5523 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5524 "use not simple.\n");
5527 /* If op0 is an external or constant def, infer the vector type
5528 from the scalar type. */
5530 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
5532 gcc_assert (vectype
);
5535 if (dump_enabled_p ())
5536 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5537 "no vectype for scalar type\n");
5541 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5542 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5543 if (maybe_ne (nunits_out
, nunits_in
))
5546 stmt_vec_info op1_def_stmt_info
;
5548 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
5549 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
5551 if (dump_enabled_p ())
5552 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5553 "use not simple.\n");
5557 /* Multiple types in SLP are handled by creating the appropriate number of
5558 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5563 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5565 gcc_assert (ncopies
>= 1);
5567 /* Determine whether the shift amount is a vector, or scalar. If the
5568 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5570 if ((dt
[1] == vect_internal_def
5571 || dt
[1] == vect_induction_def
5572 || dt
[1] == vect_nested_cycle
)
5574 scalar_shift_arg
= false;
5575 else if (dt
[1] == vect_constant_def
5576 || dt
[1] == vect_external_def
5577 || dt
[1] == vect_internal_def
)
5579 /* In SLP, need to check whether the shift count is the same,
5580 in loops if it is a constant or invariant, it is always
5584 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5585 stmt_vec_info slpstmt_info
;
5587 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5589 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5590 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5591 scalar_shift_arg
= false;
5594 /* For internal SLP defs we have to make sure we see scalar stmts
5595 for all vector elements.
5596 ??? For different vectors we could resort to a different
5597 scalar shift operand but code-generation below simply always
5599 if (dt
[1] == vect_internal_def
5600 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5602 scalar_shift_arg
= false;
5605 /* If the shift amount is computed by a pattern stmt we cannot
5606 use the scalar amount directly thus give up and use a vector
5608 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5609 scalar_shift_arg
= false;
5613 if (dump_enabled_p ())
5614 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5615 "operand mode requires invariant argument.\n");
5619 /* Vector shifted by vector. */
5620 bool was_scalar_shift_arg
= scalar_shift_arg
;
5621 if (!scalar_shift_arg
)
5623 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5624 if (dump_enabled_p ())
5625 dump_printf_loc (MSG_NOTE
, vect_location
,
5626 "vector/vector shift/rotate found.\n");
5629 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
5631 incompatible_op1_vectype_p
5632 = (op1_vectype
== NULL_TREE
5633 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5634 TYPE_VECTOR_SUBPARTS (vectype
))
5635 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5636 if (incompatible_op1_vectype_p
5638 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
5639 || slp_op1
->refcnt
!= 1))
5641 if (dump_enabled_p ())
5642 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5643 "unusable type for last operand in"
5644 " vector/vector shift/rotate.\n");
5648 /* See if the machine has a vector shifted by scalar insn and if not
5649 then see if it has a vector shifted by vector insn. */
5652 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5654 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5656 if (dump_enabled_p ())
5657 dump_printf_loc (MSG_NOTE
, vect_location
,
5658 "vector/scalar shift/rotate found.\n");
5662 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5664 && (optab_handler (optab
, TYPE_MODE (vectype
))
5665 != CODE_FOR_nothing
))
5667 scalar_shift_arg
= false;
5669 if (dump_enabled_p ())
5670 dump_printf_loc (MSG_NOTE
, vect_location
,
5671 "vector/vector shift/rotate found.\n");
5674 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
5678 /* Unlike the other binary operators, shifts/rotates have
5679 the rhs being int, instead of the same type as the lhs,
5680 so make sure the scalar is the right type if we are
5681 dealing with vectors of long long/long/short/char. */
5682 incompatible_op1_vectype_p
5684 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
5686 if (incompatible_op1_vectype_p
5687 && dt
[1] == vect_internal_def
)
5689 if (dump_enabled_p ())
5690 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5691 "unusable type for last operand in"
5692 " vector/vector shift/rotate.\n");
5699 /* Supportable by target? */
5702 if (dump_enabled_p ())
5703 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5707 vec_mode
= TYPE_MODE (vectype
);
5708 icode
= (int) optab_handler (optab
, vec_mode
);
5709 if (icode
== CODE_FOR_nothing
)
5711 if (dump_enabled_p ())
5712 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5713 "op not supported by target.\n");
5716 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5717 if (vect_emulated_vector_p (vectype
))
5720 if (!vec_stmt
) /* transformation not required. */
5723 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5724 || ((!scalar_shift_arg
|| dt
[1] == vect_internal_def
)
5725 && (!incompatible_op1_vectype_p
5726 || dt
[1] == vect_constant_def
)
5727 && !vect_maybe_update_slp_op_vectype
5729 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
5731 if (dump_enabled_p ())
5732 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5733 "incompatible vector types for invariants\n");
5736 /* Now adjust the constant shift amount in place. */
5738 && incompatible_op1_vectype_p
5739 && dt
[1] == vect_constant_def
)
5741 for (unsigned i
= 0;
5742 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
5744 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
5745 = fold_convert (TREE_TYPE (vectype
),
5746 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
5747 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
5751 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5752 DUMP_VECT_SCOPE ("vectorizable_shift");
5753 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
5754 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
5760 if (dump_enabled_p ())
5761 dump_printf_loc (MSG_NOTE
, vect_location
,
5762 "transform binary/unary operation.\n");
5764 if (incompatible_op1_vectype_p
&& !slp_node
)
5766 gcc_assert (!scalar_shift_arg
&& was_scalar_shift_arg
);
5767 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5768 if (dt
[1] != vect_constant_def
)
5769 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
5770 TREE_TYPE (vectype
), NULL
);
5774 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5776 if (scalar_shift_arg
&& dt
[1] != vect_internal_def
)
5778 /* Vector shl and shr insn patterns can be defined with scalar
5779 operand 2 (shift operand). In this case, use constant or loop
5780 invariant op1 directly, without extending it to vector mode
5782 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5783 if (!VECTOR_MODE_P (optab_op2_mode
))
5785 if (dump_enabled_p ())
5786 dump_printf_loc (MSG_NOTE
, vect_location
,
5787 "operand 1 using scalar mode.\n");
5789 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
5790 vec_oprnds1
.quick_push (vec_oprnd1
);
5791 /* Store vec_oprnd1 for every vector stmt to be created.
5792 We check during the analysis that all the shift arguments
5794 TODO: Allow different constants for different vector
5795 stmts generated for an SLP instance. */
5797 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
5798 vec_oprnds1
.quick_push (vec_oprnd1
);
5801 else if (!scalar_shift_arg
&& slp_node
&& incompatible_op1_vectype_p
)
5803 if (was_scalar_shift_arg
)
5805 /* If the argument was the same in all lanes create
5806 the correctly typed vector shift amount directly. */
5807 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5808 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
5809 !loop_vinfo
? gsi
: NULL
);
5810 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
5811 !loop_vinfo
? gsi
: NULL
);
5812 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5813 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
5814 vec_oprnds1
.quick_push (vec_oprnd1
);
5816 else if (dt
[1] == vect_constant_def
)
5817 /* The constant shift amount has been adjusted in place. */
5820 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
5823 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5824 (a special case for certain kind of vector shifts); otherwise,
5825 operand 1 should be of a vector type (the usual case). */
5826 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5828 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
5830 /* Arguments are ready. Create the new vector stmt. */
5831 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5833 /* For internal defs where we need to use a scalar shift arg
5834 extract the first lane. */
5835 if (scalar_shift_arg
&& dt
[1] == vect_internal_def
)
5837 vop1
= vec_oprnds1
[0];
5838 new_temp
= make_ssa_name (TREE_TYPE (TREE_TYPE (vop1
)));
5840 = gimple_build_assign (new_temp
,
5841 build3 (BIT_FIELD_REF
, TREE_TYPE (new_temp
),
5843 TYPE_SIZE (TREE_TYPE (new_temp
)),
5844 bitsize_zero_node
));
5845 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5849 vop1
= vec_oprnds1
[i
];
5850 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5851 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5852 gimple_assign_set_lhs (new_stmt
, new_temp
);
5853 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5855 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5857 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5861 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5863 vec_oprnds0
.release ();
5864 vec_oprnds1
.release ();
5870 /* Function vectorizable_operation.
5872 Check if STMT_INFO performs a binary, unary or ternary operation that can
5874 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5875 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5876 Return true if STMT_INFO is vectorizable in this way. */
5879 vectorizable_operation (vec_info
*vinfo
,
5880 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5881 gimple
**vec_stmt
, slp_tree slp_node
,
5882 stmt_vector_for_cost
*cost_vec
)
5886 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5888 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5889 enum tree_code code
, orig_code
;
5890 machine_mode vec_mode
;
5894 bool target_support_p
;
5895 enum vect_def_type dt
[3]
5896 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5898 poly_uint64 nunits_in
;
5899 poly_uint64 nunits_out
;
5901 int ncopies
, vec_num
;
5903 vec
<tree
> vec_oprnds0
= vNULL
;
5904 vec
<tree
> vec_oprnds1
= vNULL
;
5905 vec
<tree
> vec_oprnds2
= vNULL
;
5906 tree vop0
, vop1
, vop2
;
5907 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5909 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5912 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5916 /* Is STMT a vectorizable binary/unary operation? */
5917 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5921 /* Loads and stores are handled in vectorizable_{load,store}. */
5922 if (STMT_VINFO_DATA_REF (stmt_info
))
5925 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5927 /* Shifts are handled in vectorizable_shift. */
5928 if (code
== LSHIFT_EXPR
5929 || code
== RSHIFT_EXPR
5930 || code
== LROTATE_EXPR
5931 || code
== RROTATE_EXPR
)
5934 /* Comparisons are handled in vectorizable_comparison. */
5935 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
5938 /* Conditions are handled in vectorizable_condition. */
5939 if (code
== COND_EXPR
)
5942 /* For pointer addition and subtraction, we should use the normal
5943 plus and minus for the vector operation. */
5944 if (code
== POINTER_PLUS_EXPR
)
5946 if (code
== POINTER_DIFF_EXPR
)
5949 /* Support only unary or binary operations. */
5950 op_type
= TREE_CODE_LENGTH (code
);
5951 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5953 if (dump_enabled_p ())
5954 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5955 "num. args = %d (not unary/binary/ternary op).\n",
5960 scalar_dest
= gimple_assign_lhs (stmt
);
5961 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5963 /* Most operations cannot handle bit-precision types without extra
5965 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
5967 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5968 /* Exception are bitwise binary operations. */
5969 && code
!= BIT_IOR_EXPR
5970 && code
!= BIT_XOR_EXPR
5971 && code
!= BIT_AND_EXPR
)
5973 if (dump_enabled_p ())
5974 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5975 "bit-precision arithmetic not supported.\n");
5980 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5981 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5983 if (dump_enabled_p ())
5984 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5985 "use not simple.\n");
5988 /* If op0 is an external or constant def, infer the vector type
5989 from the scalar type. */
5992 /* For boolean type we cannot determine vectype by
5993 invariant value (don't know whether it is a vector
5994 of booleans or vector of integers). We use output
5995 vectype because operations on boolean don't change
5997 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5999 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6001 if (dump_enabled_p ())
6002 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6003 "not supported operation on bool value.\n");
6006 vectype
= vectype_out
;
6009 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6013 gcc_assert (vectype
);
6016 if (dump_enabled_p ())
6017 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6018 "no vectype for scalar type %T\n",
6024 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6025 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6026 if (maybe_ne (nunits_out
, nunits_in
))
6029 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6030 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6031 if (op_type
== binary_op
|| op_type
== ternary_op
)
6033 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6034 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6036 if (dump_enabled_p ())
6037 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6038 "use not simple.\n");
6042 if (op_type
== ternary_op
)
6044 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6045 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6047 if (dump_enabled_p ())
6048 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6049 "use not simple.\n");
6054 /* Multiple types in SLP are handled by creating the appropriate number of
6055 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6060 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6064 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6068 gcc_assert (ncopies
>= 1);
6070 /* Reject attempts to combine mask types with nonmask types, e.g. if
6071 we have an AND between a (nonmask) boolean loaded from memory and
6072 a (mask) boolean result of a comparison.
6074 TODO: We could easily fix these cases up using pattern statements. */
6075 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6076 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6077 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6079 if (dump_enabled_p ())
6080 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6081 "mixed mask and nonmask vector types\n");
6085 /* Supportable by target? */
6087 vec_mode
= TYPE_MODE (vectype
);
6088 if (code
== MULT_HIGHPART_EXPR
)
6089 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6092 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6095 if (dump_enabled_p ())
6096 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6100 target_support_p
= (optab_handler (optab
, vec_mode
)
6101 != CODE_FOR_nothing
);
6104 bool using_emulated_vectors_p
= vect_emulated_vector_p (vectype
);
6105 if (!target_support_p
)
6107 if (dump_enabled_p ())
6108 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6109 "op not supported by target.\n");
6110 /* Check only during analysis. */
6111 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
6112 || (!vec_stmt
&& !vect_can_vectorize_without_simd_p (code
)))
6114 if (dump_enabled_p ())
6115 dump_printf_loc (MSG_NOTE
, vect_location
,
6116 "proceeding using word mode.\n");
6117 using_emulated_vectors_p
= true;
6120 if (using_emulated_vectors_p
6121 && !vect_can_vectorize_without_simd_p (code
))
6123 if (dump_enabled_p ())
6124 dump_printf (MSG_NOTE
, "using word mode not possible.\n");
6128 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6129 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6130 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6132 if (!vec_stmt
) /* transformation not required. */
6134 /* If this operation is part of a reduction, a fully-masked loop
6135 should only change the active lanes of the reduction chain,
6136 keeping the inactive lanes as-is. */
6138 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
6141 if (cond_fn
== IFN_LAST
6142 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
6143 OPTIMIZE_FOR_SPEED
))
6145 if (dump_enabled_p ())
6146 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6147 "can't use a fully-masked loop because no"
6148 " conditional operation is available.\n");
6149 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
6152 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6156 /* Put types on constant and invariant SLP children. */
6158 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6159 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6160 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6162 if (dump_enabled_p ())
6163 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6164 "incompatible vector types for invariants\n");
6168 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6169 DUMP_VECT_SCOPE ("vectorizable_operation");
6170 vect_model_simple_cost (vinfo
, stmt_info
,
6171 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6172 if (using_emulated_vectors_p
)
6174 /* The above vect_model_simple_cost call handles constants
6175 in the prologue and (mis-)costs one of the stmts as
6176 vector stmt. See tree-vect-generic.c:do_plus_minus/do_negate
6177 for the actual lowering that will be applied. */
6179 = slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
;
6193 record_stmt_cost (cost_vec
, n
, scalar_stmt
, stmt_info
, 0, vect_body
);
6200 if (dump_enabled_p ())
6201 dump_printf_loc (MSG_NOTE
, vect_location
,
6202 "transform binary/unary operation.\n");
6204 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6206 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6207 vectors with unsigned elements, but the result is signed. So, we
6208 need to compute the MINUS_EXPR into vectype temporary and
6209 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6210 tree vec_cvt_dest
= NULL_TREE
;
6211 if (orig_code
== POINTER_DIFF_EXPR
)
6213 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6214 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6218 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6220 /* In case the vectorization factor (VF) is bigger than the number
6221 of elements that we can fit in a vectype (nunits), we have to generate
6222 more than one vector stmt - i.e - we need to "unroll" the
6223 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6224 from one copy of the vector stmt to the next, in the field
6225 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6226 stages to find the correct vector defs to be used when vectorizing
6227 stmts that use the defs of the current stmt. The example below
6228 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6229 we need to create 4 vectorized stmts):
6231 before vectorization:
6232 RELATED_STMT VEC_STMT
6236 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6238 RELATED_STMT VEC_STMT
6239 VS1_0: vx0 = memref0 VS1_1 -
6240 VS1_1: vx1 = memref1 VS1_2 -
6241 VS1_2: vx2 = memref2 VS1_3 -
6242 VS1_3: vx3 = memref3 - -
6243 S1: x = load - VS1_0
6246 step2: vectorize stmt S2 (done here):
6247 To vectorize stmt S2 we first need to find the relevant vector
6248 def for the first operand 'x'. This is, as usual, obtained from
6249 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6250 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6251 relevant vector def 'vx0'. Having found 'vx0' we can generate
6252 the vector stmt VS2_0, and as usual, record it in the
6253 STMT_VINFO_VEC_STMT of stmt S2.
6254 When creating the second copy (VS2_1), we obtain the relevant vector
6255 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6256 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6257 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6258 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6259 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6260 chain of stmts and pointers:
6261 RELATED_STMT VEC_STMT
6262 VS1_0: vx0 = memref0 VS1_1 -
6263 VS1_1: vx1 = memref1 VS1_2 -
6264 VS1_2: vx2 = memref2 VS1_3 -
6265 VS1_3: vx3 = memref3 - -
6266 S1: x = load - VS1_0
6267 VS2_0: vz0 = vx0 + v1 VS2_1 -
6268 VS2_1: vz1 = vx1 + v1 VS2_2 -
6269 VS2_2: vz2 = vx2 + v1 VS2_3 -
6270 VS2_3: vz3 = vx3 + v1 - -
6271 S2: z = x + 1 - VS2_0 */
6273 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6274 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
6275 /* Arguments are ready. Create the new vector stmt. */
6276 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6278 gimple
*new_stmt
= NULL
;
6279 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6280 ? vec_oprnds1
[i
] : NULL_TREE
);
6281 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
6282 if (masked_loop_p
&& reduc_idx
>= 0)
6284 /* Perform the operation on active elements only and take
6285 inactive elements from the reduction chain input. */
6287 vop2
= reduc_idx
== 1 ? vop1
: vop0
;
6288 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
6290 gcall
*call
= gimple_build_call_internal (cond_fn
, 4, mask
,
6292 new_temp
= make_ssa_name (vec_dest
, call
);
6293 gimple_call_set_lhs (call
, new_temp
);
6294 gimple_call_set_nothrow (call
, true);
6295 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
6300 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
6301 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6302 gimple_assign_set_lhs (new_stmt
, new_temp
);
6303 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6306 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6307 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6309 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6310 gimple_assign_set_lhs (new_stmt
, new_temp
);
6311 vect_finish_stmt_generation (vinfo
, stmt_info
,
6316 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6318 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6322 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6324 vec_oprnds0
.release ();
6325 vec_oprnds1
.release ();
6326 vec_oprnds2
.release ();
6331 /* A helper function to ensure data reference DR_INFO's base alignment. */
6334 ensure_base_align (dr_vec_info
*dr_info
)
6336 if (dr_info
->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6339 if (dr_info
->base_misaligned
)
6341 tree base_decl
= dr_info
->base_decl
;
6343 // We should only be able to increase the alignment of a base object if
6344 // we know what its new alignment should be at compile time.
6345 unsigned HOST_WIDE_INT align_base_to
=
6346 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6348 if (decl_in_symtab_p (base_decl
))
6349 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6350 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6352 SET_DECL_ALIGN (base_decl
, align_base_to
);
6353 DECL_USER_ALIGN (base_decl
) = 1;
6355 dr_info
->base_misaligned
= false;
6360 /* Function get_group_alias_ptr_type.
6362 Return the alias type for the group starting at FIRST_STMT_INFO. */
6365 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6367 struct data_reference
*first_dr
, *next_dr
;
6369 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6370 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6371 while (next_stmt_info
)
6373 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6374 if (get_alias_set (DR_REF (first_dr
))
6375 != get_alias_set (DR_REF (next_dr
)))
6377 if (dump_enabled_p ())
6378 dump_printf_loc (MSG_NOTE
, vect_location
,
6379 "conflicting alias set types.\n");
6380 return ptr_type_node
;
6382 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6384 return reference_alias_ptr_type (DR_REF (first_dr
));
6388 /* Function scan_operand_equal_p.
6390 Helper function for check_scan_store. Compare two references
6391 with .GOMP_SIMD_LANE bases. */
6394 scan_operand_equal_p (tree ref1
, tree ref2
)
6396 tree ref
[2] = { ref1
, ref2
};
6397 poly_int64 bitsize
[2], bitpos
[2];
6398 tree offset
[2], base
[2];
6399 for (int i
= 0; i
< 2; ++i
)
6402 int unsignedp
, reversep
, volatilep
= 0;
6403 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6404 &offset
[i
], &mode
, &unsignedp
,
6405 &reversep
, &volatilep
);
6406 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6408 if (TREE_CODE (base
[i
]) == MEM_REF
6409 && offset
[i
] == NULL_TREE
6410 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6412 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6413 if (is_gimple_assign (def_stmt
)
6414 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6415 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6416 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6418 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6420 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6421 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6426 if (!operand_equal_p (base
[0], base
[1], 0))
6428 if (maybe_ne (bitsize
[0], bitsize
[1]))
6430 if (offset
[0] != offset
[1])
6432 if (!offset
[0] || !offset
[1])
6434 if (!operand_equal_p (offset
[0], offset
[1], 0))
6437 for (int i
= 0; i
< 2; ++i
)
6439 step
[i
] = integer_one_node
;
6440 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6442 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6443 if (is_gimple_assign (def_stmt
)
6444 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6445 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6448 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6449 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6452 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6454 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6455 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6457 tree rhs1
= NULL_TREE
;
6458 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6460 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6461 if (gimple_assign_cast_p (def_stmt
))
6462 rhs1
= gimple_assign_rhs1 (def_stmt
);
6464 else if (CONVERT_EXPR_P (offset
[i
]))
6465 rhs1
= TREE_OPERAND (offset
[i
], 0);
6467 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6468 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6469 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6470 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6473 if (!operand_equal_p (offset
[0], offset
[1], 0)
6474 || !operand_equal_p (step
[0], step
[1], 0))
6482 enum scan_store_kind
{
6483 /* Normal permutation. */
6484 scan_store_kind_perm
,
6486 /* Whole vector left shift permutation with zero init. */
6487 scan_store_kind_lshift_zero
,
6489 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6490 scan_store_kind_lshift_cond
6493 /* Function check_scan_store.
6495 Verify if we can perform the needed permutations or whole vector shifts.
6496 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6497 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6498 to do at each step. */
6501 scan_store_can_perm_p (tree vectype
, tree init
,
6502 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6504 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6505 unsigned HOST_WIDE_INT nunits
;
6506 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6508 int units_log2
= exact_log2 (nunits
);
6509 if (units_log2
<= 0)
6513 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6514 for (i
= 0; i
<= units_log2
; ++i
)
6516 unsigned HOST_WIDE_INT j
, k
;
6517 enum scan_store_kind kind
= scan_store_kind_perm
;
6518 vec_perm_builder
sel (nunits
, nunits
, 1);
6519 sel
.quick_grow (nunits
);
6520 if (i
== units_log2
)
6522 for (j
= 0; j
< nunits
; ++j
)
6523 sel
[j
] = nunits
- 1;
6527 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6529 for (k
= 0; j
< nunits
; ++j
, ++k
)
6530 sel
[j
] = nunits
+ k
;
6532 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6533 if (!can_vec_perm_const_p (vec_mode
, indices
))
6535 if (i
== units_log2
)
6538 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6540 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6542 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6543 /* Whole vector shifts shift in zeros, so if init is all zero
6544 constant, there is no need to do anything further. */
6545 if ((TREE_CODE (init
) != INTEGER_CST
6546 && TREE_CODE (init
) != REAL_CST
)
6547 || !initializer_zerop (init
))
6549 tree masktype
= truth_type_for (vectype
);
6550 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6552 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6555 kind
= whole_vector_shift_kind
;
6557 if (use_whole_vector
)
6559 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6560 use_whole_vector
->safe_grow_cleared (i
, true);
6561 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6562 use_whole_vector
->safe_push (kind
);
6570 /* Function check_scan_store.
6572 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6575 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
6576 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6577 vect_memory_access_type memory_access_type
)
6579 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6580 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6583 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6586 || memory_access_type
!= VMAT_CONTIGUOUS
6587 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6588 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6589 || loop_vinfo
== NULL
6590 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6591 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6592 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
6593 || !integer_zerop (DR_INIT (dr_info
->dr
))
6594 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6595 || !alias_sets_conflict_p (get_alias_set (vectype
),
6596 get_alias_set (TREE_TYPE (ref_type
))))
6598 if (dump_enabled_p ())
6599 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6600 "unsupported OpenMP scan store.\n");
6604 /* We need to pattern match code built by OpenMP lowering and simplified
6605 by following optimizations into something we can handle.
6606 #pragma omp simd reduction(inscan,+:r)
6610 #pragma omp scan inclusive (r)
6613 shall have body with:
6614 // Initialization for input phase, store the reduction initializer:
6615 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6616 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6618 // Actual input phase:
6620 r.0_5 = D.2042[_20];
6623 // Initialization for scan phase:
6624 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6630 // Actual scan phase:
6632 r.1_8 = D.2042[_20];
6634 The "omp simd array" variable D.2042 holds the privatized copy used
6635 inside of the loop and D.2043 is another one that holds copies of
6636 the current original list item. The separate GOMP_SIMD_LANE ifn
6637 kinds are there in order to allow optimizing the initializer store
6638 and combiner sequence, e.g. if it is originally some C++ish user
6639 defined reduction, but allow the vectorizer to pattern recognize it
6640 and turn into the appropriate vectorized scan.
6642 For exclusive scan, this is slightly different:
6643 #pragma omp simd reduction(inscan,+:r)
6647 #pragma omp scan exclusive (r)
6650 shall have body with:
6651 // Initialization for input phase, store the reduction initializer:
6652 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6653 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6655 // Actual input phase:
6657 r.0_5 = D.2042[_20];
6660 // Initialization for scan phase:
6661 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6667 // Actual scan phase:
6669 r.1_8 = D.2044[_20];
6672 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6674 /* Match the D.2042[_21] = 0; store above. Just require that
6675 it is a constant or external definition store. */
6676 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6679 if (dump_enabled_p ())
6680 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6681 "unsupported OpenMP scan initializer store.\n");
6685 if (! loop_vinfo
->scan_map
)
6686 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6687 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6688 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6691 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6693 /* These stores can be vectorized normally. */
6697 if (rhs_dt
!= vect_internal_def
)
6700 if (dump_enabled_p ())
6701 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6702 "unsupported OpenMP scan combiner pattern.\n");
6706 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6707 tree rhs
= gimple_assign_rhs1 (stmt
);
6708 if (TREE_CODE (rhs
) != SSA_NAME
)
6711 gimple
*other_store_stmt
= NULL
;
6712 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6713 bool inscan_var_store
6714 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6716 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6718 if (!inscan_var_store
)
6720 use_operand_p use_p
;
6721 imm_use_iterator iter
;
6722 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6724 gimple
*use_stmt
= USE_STMT (use_p
);
6725 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6727 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6728 || !is_gimple_assign (use_stmt
)
6729 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6731 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6733 other_store_stmt
= use_stmt
;
6735 if (other_store_stmt
== NULL
)
6737 rhs
= gimple_assign_lhs (other_store_stmt
);
6738 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6742 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6744 use_operand_p use_p
;
6745 imm_use_iterator iter
;
6746 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6748 gimple
*use_stmt
= USE_STMT (use_p
);
6749 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6751 if (other_store_stmt
)
6753 other_store_stmt
= use_stmt
;
6759 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6760 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6761 || !is_gimple_assign (def_stmt
)
6762 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6765 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6766 /* For pointer addition, we should use the normal plus for the vector
6770 case POINTER_PLUS_EXPR
:
6773 case MULT_HIGHPART_EXPR
:
6778 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6781 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6782 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6783 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6786 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6787 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6788 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6789 || !gimple_assign_load_p (load1_stmt
)
6790 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6791 || !gimple_assign_load_p (load2_stmt
))
6794 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6795 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6796 if (load1_stmt_info
== NULL
6797 || load2_stmt_info
== NULL
6798 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
6799 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
6800 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
6801 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6804 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
6806 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6807 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
6808 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
6810 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6812 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6816 use_operand_p use_p
;
6817 imm_use_iterator iter
;
6818 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
6820 gimple
*use_stmt
= USE_STMT (use_p
);
6821 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
6823 if (other_store_stmt
)
6825 other_store_stmt
= use_stmt
;
6829 if (other_store_stmt
== NULL
)
6831 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
6832 || !gimple_store_p (other_store_stmt
))
6835 stmt_vec_info other_store_stmt_info
6836 = loop_vinfo
->lookup_stmt (other_store_stmt
);
6837 if (other_store_stmt_info
== NULL
6838 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
6839 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6842 gimple
*stmt1
= stmt
;
6843 gimple
*stmt2
= other_store_stmt
;
6844 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6845 std::swap (stmt1
, stmt2
);
6846 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6847 gimple_assign_rhs1 (load2_stmt
)))
6849 std::swap (rhs1
, rhs2
);
6850 std::swap (load1_stmt
, load2_stmt
);
6851 std::swap (load1_stmt_info
, load2_stmt_info
);
6853 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6854 gimple_assign_rhs1 (load1_stmt
)))
6857 tree var3
= NULL_TREE
;
6858 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
6859 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
6860 gimple_assign_rhs1 (load2_stmt
)))
6862 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6864 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6865 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
6866 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
6868 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6869 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
6870 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
6871 || lookup_attribute ("omp simd inscan exclusive",
6872 DECL_ATTRIBUTES (var3
)))
6876 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
6877 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
6878 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
6881 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6882 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
6883 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
6884 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
6885 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6886 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
6889 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6890 std::swap (var1
, var2
);
6892 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6894 if (!lookup_attribute ("omp simd inscan exclusive",
6895 DECL_ATTRIBUTES (var1
)))
6900 if (loop_vinfo
->scan_map
== NULL
)
6902 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
6906 /* The IL is as expected, now check if we can actually vectorize it.
6913 should be vectorized as (where _40 is the vectorized rhs
6914 from the D.2042[_21] = 0; store):
6915 _30 = MEM <vector(8) int> [(int *)&D.2043];
6916 _31 = MEM <vector(8) int> [(int *)&D.2042];
6917 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6919 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6920 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6922 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6923 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6924 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6926 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6927 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6929 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6930 MEM <vector(8) int> [(int *)&D.2043] = _39;
6931 MEM <vector(8) int> [(int *)&D.2042] = _38;
6938 should be vectorized as (where _40 is the vectorized rhs
6939 from the D.2042[_21] = 0; store):
6940 _30 = MEM <vector(8) int> [(int *)&D.2043];
6941 _31 = MEM <vector(8) int> [(int *)&D.2042];
6942 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6943 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6945 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6946 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6947 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6949 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6950 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6951 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6953 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6954 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6957 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6958 MEM <vector(8) int> [(int *)&D.2044] = _39;
6959 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6960 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6961 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6962 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
6965 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
6966 if (units_log2
== -1)
6973 /* Function vectorizable_scan_store.
6975 Helper of vectorizable_score, arguments like on vectorizable_store.
6976 Handle only the transformation, checking is done in check_scan_store. */
6979 vectorizable_scan_store (vec_info
*vinfo
,
6980 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6981 gimple
**vec_stmt
, int ncopies
)
6983 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6984 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6985 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
6986 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6988 if (dump_enabled_p ())
6989 dump_printf_loc (MSG_NOTE
, vect_location
,
6990 "transform scan store. ncopies = %d\n", ncopies
);
6992 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6993 tree rhs
= gimple_assign_rhs1 (stmt
);
6994 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
6996 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6997 bool inscan_var_store
6998 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7000 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7002 use_operand_p use_p
;
7003 imm_use_iterator iter
;
7004 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7006 gimple
*use_stmt
= USE_STMT (use_p
);
7007 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7009 rhs
= gimple_assign_lhs (use_stmt
);
7014 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7015 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7016 if (code
== POINTER_PLUS_EXPR
)
7018 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7019 && commutative_tree_code (code
));
7020 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7021 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7022 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7023 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7024 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7025 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7026 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7027 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7028 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7029 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7030 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7032 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7034 std::swap (rhs1
, rhs2
);
7035 std::swap (var1
, var2
);
7036 std::swap (load1_dr_info
, load2_dr_info
);
7039 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7042 unsigned HOST_WIDE_INT nunits
;
7043 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7045 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7046 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7047 gcc_assert (units_log2
> 0);
7048 auto_vec
<tree
, 16> perms
;
7049 perms
.quick_grow (units_log2
+ 1);
7050 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7051 for (int i
= 0; i
<= units_log2
; ++i
)
7053 unsigned HOST_WIDE_INT j
, k
;
7054 vec_perm_builder
sel (nunits
, nunits
, 1);
7055 sel
.quick_grow (nunits
);
7056 if (i
== units_log2
)
7057 for (j
= 0; j
< nunits
; ++j
)
7058 sel
[j
] = nunits
- 1;
7061 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7063 for (k
= 0; j
< nunits
; ++j
, ++k
)
7064 sel
[j
] = nunits
+ k
;
7066 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7067 if (!use_whole_vector
.is_empty ()
7068 && use_whole_vector
[i
] != scan_store_kind_perm
)
7070 if (zero_vec
== NULL_TREE
)
7071 zero_vec
= build_zero_cst (vectype
);
7072 if (masktype
== NULL_TREE
7073 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7074 masktype
= truth_type_for (vectype
);
7075 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7078 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7081 tree vec_oprnd1
= NULL_TREE
;
7082 tree vec_oprnd2
= NULL_TREE
;
7083 tree vec_oprnd3
= NULL_TREE
;
7084 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7085 tree dataref_offset
= build_int_cst (ref_type
, 0);
7086 tree bump
= vect_get_data_ptr_increment (vinfo
, dr_info
,
7087 vectype
, VMAT_CONTIGUOUS
);
7088 tree ldataref_ptr
= NULL_TREE
;
7089 tree orig
= NULL_TREE
;
7090 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7091 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7092 auto_vec
<tree
> vec_oprnds1
;
7093 auto_vec
<tree
> vec_oprnds2
;
7094 auto_vec
<tree
> vec_oprnds3
;
7095 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
7096 *init
, &vec_oprnds1
,
7097 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
7098 rhs2
, &vec_oprnds3
);
7099 for (int j
= 0; j
< ncopies
; j
++)
7101 vec_oprnd1
= vec_oprnds1
[j
];
7102 if (ldataref_ptr
== NULL
)
7103 vec_oprnd2
= vec_oprnds2
[j
];
7104 vec_oprnd3
= vec_oprnds3
[j
];
7107 else if (!inscan_var_store
)
7108 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7112 vec_oprnd2
= make_ssa_name (vectype
);
7113 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7114 unshare_expr (ldataref_ptr
),
7116 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7117 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7118 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7119 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7120 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7123 tree v
= vec_oprnd2
;
7124 for (int i
= 0; i
< units_log2
; ++i
)
7126 tree new_temp
= make_ssa_name (vectype
);
7127 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7129 && (use_whole_vector
[i
]
7130 != scan_store_kind_perm
))
7131 ? zero_vec
: vec_oprnd1
, v
,
7133 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7134 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7135 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7137 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7139 /* Whole vector shift shifted in zero bits, but if *init
7140 is not initializer_zerop, we need to replace those elements
7141 with elements from vec_oprnd1. */
7142 tree_vector_builder
vb (masktype
, nunits
, 1);
7143 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
7144 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
7145 ? boolean_false_node
: boolean_true_node
);
7147 tree new_temp2
= make_ssa_name (vectype
);
7148 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
7149 new_temp
, vec_oprnd1
);
7150 vect_finish_stmt_generation (vinfo
, stmt_info
,
7152 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7153 new_temp
= new_temp2
;
7156 /* For exclusive scan, perform the perms[i] permutation once
7159 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
7167 tree new_temp2
= make_ssa_name (vectype
);
7168 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
7169 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7170 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7175 tree new_temp
= make_ssa_name (vectype
);
7176 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
7177 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7178 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7180 tree last_perm_arg
= new_temp
;
7181 /* For exclusive scan, new_temp computed above is the exclusive scan
7182 prefix sum. Turn it into inclusive prefix sum for the broadcast
7183 of the last element into orig. */
7184 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7186 last_perm_arg
= make_ssa_name (vectype
);
7187 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
7188 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7189 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7192 orig
= make_ssa_name (vectype
);
7193 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
7194 last_perm_arg
, perms
[units_log2
]);
7195 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7196 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7198 if (!inscan_var_store
)
7200 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7201 unshare_expr (dataref_ptr
),
7203 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7204 g
= gimple_build_assign (data_ref
, new_temp
);
7205 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7206 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7210 if (inscan_var_store
)
7211 for (int j
= 0; j
< ncopies
; j
++)
7214 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7216 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7217 unshare_expr (dataref_ptr
),
7219 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
7220 gimple
*g
= gimple_build_assign (data_ref
, orig
);
7221 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7222 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7228 /* Function vectorizable_store.
7230 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7231 that can be vectorized.
7232 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7233 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7234 Return true if STMT_INFO is vectorizable in this way. */
7237 vectorizable_store (vec_info
*vinfo
,
7238 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7239 gimple
**vec_stmt
, slp_tree slp_node
,
7240 stmt_vector_for_cost
*cost_vec
)
7244 tree vec_oprnd
= NULL_TREE
;
7246 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7247 class loop
*loop
= NULL
;
7248 machine_mode vec_mode
;
7250 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
7251 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7252 tree dataref_ptr
= NULL_TREE
;
7253 tree dataref_offset
= NULL_TREE
;
7254 gimple
*ptr_incr
= NULL
;
7257 stmt_vec_info first_stmt_info
;
7259 unsigned int group_size
, i
;
7260 vec
<tree
> oprnds
= vNULL
;
7261 vec
<tree
> result_chain
= vNULL
;
7262 vec
<tree
> vec_oprnds
= vNULL
;
7263 bool slp
= (slp_node
!= NULL
);
7264 unsigned int vec_num
;
7265 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
7267 gather_scatter_info gs_info
;
7269 vec_load_store_type vls_type
;
7272 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7275 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7279 /* Is vectorizable store? */
7281 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7282 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7284 tree scalar_dest
= gimple_assign_lhs (assign
);
7285 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
7286 && is_pattern_stmt_p (stmt_info
))
7287 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
7288 if (TREE_CODE (scalar_dest
) != ARRAY_REF
7289 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
7290 && TREE_CODE (scalar_dest
) != INDIRECT_REF
7291 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7292 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7293 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7294 && TREE_CODE (scalar_dest
) != MEM_REF
)
7299 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7300 if (!call
|| !gimple_call_internal_p (call
))
7303 internal_fn ifn
= gimple_call_internal_fn (call
);
7304 if (!internal_store_fn_p (ifn
))
7307 if (slp_node
!= NULL
)
7309 if (dump_enabled_p ())
7310 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7311 "SLP of masked stores not supported.\n");
7315 int mask_index
= internal_fn_mask_index (ifn
);
7317 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
7318 &mask
, NULL
, &mask_dt
, &mask_vectype
))
7322 op
= vect_get_store_rhs (stmt_info
);
7324 /* Cannot have hybrid store SLP -- that would mean storing to the
7325 same location twice. */
7326 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7328 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7329 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7333 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7334 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7339 /* Multiple types in SLP are handled by creating the appropriate number of
7340 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7345 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7347 gcc_assert (ncopies
>= 1);
7349 /* FORNOW. This restriction should be relaxed. */
7350 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7352 if (dump_enabled_p ())
7353 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7354 "multiple types in nested loop.\n");
7358 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
7359 op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7362 elem_type
= TREE_TYPE (vectype
);
7363 vec_mode
= TYPE_MODE (vectype
);
7365 if (!STMT_VINFO_DATA_REF (stmt_info
))
7368 vect_memory_access_type memory_access_type
;
7369 enum dr_alignment_support alignment_support_scheme
;
7372 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, vls_type
,
7373 ncopies
, &memory_access_type
, &poffset
,
7374 &alignment_support_scheme
, &misalignment
, &gs_info
))
7379 if (memory_access_type
== VMAT_CONTIGUOUS
)
7381 if (!VECTOR_MODE_P (vec_mode
)
7382 || !can_vec_mask_load_store_p (vec_mode
,
7383 TYPE_MODE (mask_vectype
), false))
7386 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7387 && (memory_access_type
!= VMAT_GATHER_SCATTER
7388 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7390 if (dump_enabled_p ())
7391 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7392 "unsupported access type for masked store.\n");
7398 /* FORNOW. In some cases can vectorize even if data-type not supported
7399 (e.g. - array initialization with 0). */
7400 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7404 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7405 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7406 && memory_access_type
!= VMAT_GATHER_SCATTER
7407 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7410 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7411 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7412 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7416 first_stmt_info
= stmt_info
;
7417 first_dr_info
= dr_info
;
7418 group_size
= vec_num
= 1;
7421 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7423 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7424 memory_access_type
))
7428 if (!vec_stmt
) /* transformation not required. */
7430 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7433 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
7434 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, vls_type
,
7435 group_size
, memory_access_type
,
7439 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
7442 if (dump_enabled_p ())
7443 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7444 "incompatible vector types for invariants\n");
7448 if (dump_enabled_p ()
7449 && memory_access_type
!= VMAT_ELEMENTWISE
7450 && memory_access_type
!= VMAT_GATHER_SCATTER
7451 && alignment_support_scheme
!= dr_aligned
)
7452 dump_printf_loc (MSG_NOTE
, vect_location
,
7453 "Vectorizing an unaligned access.\n");
7455 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7456 vect_model_store_cost (vinfo
, stmt_info
, ncopies
,
7457 memory_access_type
, alignment_support_scheme
,
7458 misalignment
, vls_type
, slp_node
, cost_vec
);
7461 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7465 ensure_base_align (dr_info
);
7467 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7469 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7470 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7471 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7472 tree ptr
, var
, scale
, vec_mask
;
7473 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7474 tree mask_halfvectype
= mask_vectype
;
7475 edge pe
= loop_preheader_edge (loop
);
7478 enum { NARROW
, NONE
, WIDEN
} modifier
;
7479 poly_uint64 scatter_off_nunits
7480 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7482 if (known_eq (nunits
, scatter_off_nunits
))
7484 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7488 /* Currently gathers and scatters are only supported for
7489 fixed-length vectors. */
7490 unsigned int count
= scatter_off_nunits
.to_constant ();
7491 vec_perm_builder
sel (count
, count
, 1);
7492 for (i
= 0; i
< (unsigned int) count
; ++i
)
7493 sel
.quick_push (i
| (count
/ 2));
7495 vec_perm_indices
indices (sel
, 1, count
);
7496 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7498 gcc_assert (perm_mask
!= NULL_TREE
);
7500 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7504 /* Currently gathers and scatters are only supported for
7505 fixed-length vectors. */
7506 unsigned int count
= nunits
.to_constant ();
7507 vec_perm_builder
sel (count
, count
, 1);
7508 for (i
= 0; i
< (unsigned int) count
; ++i
)
7509 sel
.quick_push (i
| (count
/ 2));
7511 vec_perm_indices
indices (sel
, 2, count
);
7512 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7513 gcc_assert (perm_mask
!= NULL_TREE
);
7517 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7522 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7523 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7524 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7525 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7526 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7527 scaletype
= TREE_VALUE (arglist
);
7529 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7530 && TREE_CODE (rettype
) == VOID_TYPE
);
7532 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7533 if (!is_gimple_min_invariant (ptr
))
7535 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7536 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7537 gcc_assert (!new_bb
);
7540 if (mask
== NULL_TREE
)
7542 mask_arg
= build_int_cst (masktype
, -1);
7543 mask_arg
= vect_init_vector (vinfo
, stmt_info
,
7544 mask_arg
, masktype
, NULL
);
7547 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7549 auto_vec
<tree
> vec_oprnds0
;
7550 auto_vec
<tree
> vec_oprnds1
;
7551 auto_vec
<tree
> vec_masks
;
7554 tree mask_vectype
= truth_type_for (vectype
);
7555 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7557 ? ncopies
/ 2 : ncopies
,
7558 mask
, &vec_masks
, mask_vectype
);
7560 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7562 ? ncopies
/ 2 : ncopies
,
7563 gs_info
.offset
, &vec_oprnds0
);
7564 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7566 ? ncopies
/ 2 : ncopies
,
7568 for (j
= 0; j
< ncopies
; ++j
)
7570 if (modifier
== WIDEN
)
7573 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
7574 perm_mask
, stmt_info
, gsi
);
7576 op
= vec_oprnd0
= vec_oprnds0
[j
/ 2];
7577 src
= vec_oprnd1
= vec_oprnds1
[j
];
7579 mask_op
= vec_mask
= vec_masks
[j
];
7581 else if (modifier
== NARROW
)
7584 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
7585 perm_mask
, stmt_info
, gsi
);
7587 src
= vec_oprnd1
= vec_oprnds1
[j
/ 2];
7588 op
= vec_oprnd0
= vec_oprnds0
[j
];
7590 mask_op
= vec_mask
= vec_masks
[j
/ 2];
7594 op
= vec_oprnd0
= vec_oprnds0
[j
];
7595 src
= vec_oprnd1
= vec_oprnds1
[j
];
7597 mask_op
= vec_mask
= vec_masks
[j
];
7600 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7602 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7603 TYPE_VECTOR_SUBPARTS (srctype
)));
7604 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7605 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7607 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7608 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7612 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7614 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7615 TYPE_VECTOR_SUBPARTS (idxtype
)));
7616 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7617 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7619 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7620 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7628 if (modifier
== NARROW
)
7630 var
= vect_get_new_ssa_name (mask_halfvectype
,
7633 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7634 : VEC_UNPACK_LO_EXPR
,
7636 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7639 tree optype
= TREE_TYPE (mask_arg
);
7640 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7643 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7644 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7645 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7647 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7648 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7650 if (!useless_type_conversion_p (masktype
, utype
))
7652 gcc_assert (TYPE_PRECISION (utype
)
7653 <= TYPE_PRECISION (masktype
));
7654 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7655 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7656 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7662 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7663 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7665 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
7667 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7670 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7671 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
7673 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7674 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7679 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7681 /* We vectorize all the stmts of the interleaving group when we
7682 reach the last stmt in the group. */
7683 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7684 < DR_GROUP_SIZE (first_stmt_info
)
7693 grouped_store
= false;
7694 /* VEC_NUM is the number of vect stmts to be created for this
7696 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7697 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7698 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7699 == first_stmt_info
);
7700 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7701 op
= vect_get_store_rhs (first_stmt_info
);
7704 /* VEC_NUM is the number of vect stmts to be created for this
7706 vec_num
= group_size
;
7708 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7711 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7713 if (dump_enabled_p ())
7714 dump_printf_loc (MSG_NOTE
, vect_location
,
7715 "transform store. ncopies = %d\n", ncopies
);
7717 if (memory_access_type
== VMAT_ELEMENTWISE
7718 || memory_access_type
== VMAT_STRIDED_SLP
)
7720 gimple_stmt_iterator incr_gsi
;
7726 tree stride_base
, stride_step
, alias_off
;
7730 /* Checked by get_load_store_type. */
7731 unsigned int const_nunits
= nunits
.to_constant ();
7733 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7734 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7736 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
7738 = fold_build_pointer_plus
7739 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7740 size_binop (PLUS_EXPR
,
7741 convert_to_ptrofftype (dr_offset
),
7742 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7743 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7745 /* For a store with loop-invariant (but other than power-of-2)
7746 stride (i.e. not a grouped access) like so:
7748 for (i = 0; i < n; i += stride)
7751 we generate a new induction variable and new stores from
7752 the components of the (vectorized) rhs:
7754 for (j = 0; ; j += VF*stride)
7759 array[j + stride] = tmp2;
7763 unsigned nstores
= const_nunits
;
7765 tree ltype
= elem_type
;
7766 tree lvectype
= vectype
;
7769 if (group_size
< const_nunits
7770 && const_nunits
% group_size
== 0)
7772 nstores
= const_nunits
/ group_size
;
7774 ltype
= build_vector_type (elem_type
, group_size
);
7777 /* First check if vec_extract optab doesn't support extraction
7778 of vector elts directly. */
7779 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7781 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7782 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
7783 group_size
).exists (&vmode
)
7784 || (convert_optab_handler (vec_extract_optab
,
7785 TYPE_MODE (vectype
), vmode
)
7786 == CODE_FOR_nothing
))
7788 /* Try to avoid emitting an extract of vector elements
7789 by performing the extracts using an integer type of the
7790 same size, extracting from a vector of those and then
7791 re-interpreting it as the original vector type if
7794 = group_size
* GET_MODE_BITSIZE (elmode
);
7795 unsigned int lnunits
= const_nunits
/ group_size
;
7796 /* If we can't construct such a vector fall back to
7797 element extracts from the original vector type and
7798 element size stores. */
7799 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7800 && VECTOR_MODE_P (TYPE_MODE (vectype
))
7801 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
7802 lnunits
).exists (&vmode
)
7803 && (convert_optab_handler (vec_extract_optab
,
7805 != CODE_FOR_nothing
))
7809 ltype
= build_nonstandard_integer_type (lsize
, 1);
7810 lvectype
= build_vector_type (ltype
, nstores
);
7812 /* Else fall back to vector extraction anyway.
7813 Fewer stores are more important than avoiding spilling
7814 of the vector we extract from. Compared to the
7815 construction case in vectorizable_load no store-forwarding
7816 issue exists here for reasonable archs. */
7819 else if (group_size
>= const_nunits
7820 && group_size
% const_nunits
== 0)
7823 lnel
= const_nunits
;
7827 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
7828 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7831 ivstep
= stride_step
;
7832 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
7833 build_int_cst (TREE_TYPE (ivstep
), vf
));
7835 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7837 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7838 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7839 create_iv (stride_base
, ivstep
, NULL
,
7840 loop
, &incr_gsi
, insert_after
,
7842 incr
= gsi_stmt (incr_gsi
);
7844 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7846 alias_off
= build_int_cst (ref_type
, 0);
7847 stmt_vec_info next_stmt_info
= first_stmt_info
;
7848 for (g
= 0; g
< group_size
; g
++)
7850 running_off
= offvar
;
7853 tree size
= TYPE_SIZE_UNIT (ltype
);
7854 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
7856 tree newoff
= copy_ssa_name (running_off
, NULL
);
7857 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7859 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7860 running_off
= newoff
;
7863 op
= vect_get_store_rhs (next_stmt_info
);
7864 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
,
7866 unsigned int group_el
= 0;
7867 unsigned HOST_WIDE_INT
7868 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7869 for (j
= 0; j
< ncopies
; j
++)
7871 vec_oprnd
= vec_oprnds
[j
];
7872 /* Pun the vector to extract from if necessary. */
7873 if (lvectype
!= vectype
)
7875 tree tem
= make_ssa_name (lvectype
);
7877 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
7878 lvectype
, vec_oprnd
));
7879 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
7882 for (i
= 0; i
< nstores
; i
++)
7884 tree newref
, newoff
;
7885 gimple
*incr
, *assign
;
7886 tree size
= TYPE_SIZE (ltype
);
7887 /* Extract the i'th component. */
7888 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
7889 bitsize_int (i
), size
);
7890 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
7893 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
7897 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7899 newref
= build2 (MEM_REF
, ltype
,
7900 running_off
, this_off
);
7901 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
7903 /* And store it to *running_off. */
7904 assign
= gimple_build_assign (newref
, elem
);
7905 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
7909 || group_el
== group_size
)
7911 newoff
= copy_ssa_name (running_off
, NULL
);
7912 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7913 running_off
, stride_step
);
7914 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7916 running_off
= newoff
;
7919 if (g
== group_size
- 1
7922 if (j
== 0 && i
== 0)
7924 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
7928 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7929 vec_oprnds
.release ();
7937 auto_vec
<tree
> dr_chain (group_size
);
7938 oprnds
.create (group_size
);
7940 gcc_assert (alignment_support_scheme
);
7941 vec_loop_masks
*loop_masks
7942 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7943 ? &LOOP_VINFO_MASKS (loop_vinfo
)
7945 vec_loop_lens
*loop_lens
7946 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
7947 ? &LOOP_VINFO_LENS (loop_vinfo
)
7950 /* Shouldn't go with length-based approach if fully masked. */
7951 gcc_assert (!loop_lens
|| !loop_masks
);
7953 /* Targets with store-lane instructions must not require explicit
7954 realignment. vect_supportable_dr_alignment always returns either
7955 dr_aligned or dr_unaligned_supported for masked operations. */
7956 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
7959 || alignment_support_scheme
== dr_aligned
7960 || alignment_support_scheme
== dr_unaligned_supported
);
7962 tree offset
= NULL_TREE
;
7963 if (!known_eq (poffset
, 0))
7964 offset
= size_int (poffset
);
7967 tree vec_offset
= NULL_TREE
;
7968 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7970 aggr_type
= NULL_TREE
;
7973 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
7975 aggr_type
= elem_type
;
7976 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
7977 &bump
, &vec_offset
);
7981 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7982 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7984 aggr_type
= vectype
;
7985 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
7986 memory_access_type
);
7990 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
7992 /* In case the vectorization factor (VF) is bigger than the number
7993 of elements that we can fit in a vectype (nunits), we have to generate
7994 more than one vector stmt - i.e - we need to "unroll" the
7995 vector stmt by a factor VF/nunits. */
7997 /* In case of interleaving (non-unit grouped access):
8004 We create vectorized stores starting from base address (the access of the
8005 first stmt in the chain (S2 in the above example), when the last store stmt
8006 of the chain (S4) is reached:
8009 VS2: &base + vec_size*1 = vx0
8010 VS3: &base + vec_size*2 = vx1
8011 VS4: &base + vec_size*3 = vx3
8013 Then permutation statements are generated:
8015 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8016 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8019 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8020 (the order of the data-refs in the output of vect_permute_store_chain
8021 corresponds to the order of scalar stmts in the interleaving chain - see
8022 the documentation of vect_permute_store_chain()).
8024 In case of both multiple types and interleaving, above vector stores and
8025 permutation stmts are created for every copy. The result vector stmts are
8026 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8027 STMT_VINFO_RELATED_STMT for the next copies.
8030 auto_vec
<tree
> vec_masks
;
8031 tree vec_mask
= NULL
;
8032 auto_vec
<tree
> vec_offsets
;
8033 auto_vec
<vec
<tree
> > gvec_oprnds
;
8034 gvec_oprnds
.safe_grow_cleared (group_size
, true);
8035 for (j
= 0; j
< ncopies
; j
++)
8042 /* Get vectorized arguments for SLP_NODE. */
8043 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1,
8045 vec_oprnd
= vec_oprnds
[0];
8049 /* For interleaved stores we collect vectorized defs for all the
8050 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8051 used as an input to vect_permute_store_chain().
8053 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8054 and OPRNDS are of size 1. */
8055 stmt_vec_info next_stmt_info
= first_stmt_info
;
8056 for (i
= 0; i
< group_size
; i
++)
8058 /* Since gaps are not supported for interleaved stores,
8059 DR_GROUP_SIZE is the exact number of stmts in the chain.
8060 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8061 that there is no interleaving, DR_GROUP_SIZE is 1,
8062 and only one iteration of the loop will be executed. */
8063 op
= vect_get_store_rhs (next_stmt_info
);
8064 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
8065 ncopies
, op
, &gvec_oprnds
[i
]);
8066 vec_oprnd
= gvec_oprnds
[i
][0];
8067 dr_chain
.quick_push (gvec_oprnds
[i
][0]);
8068 oprnds
.quick_push (gvec_oprnds
[i
][0]);
8069 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8073 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
8074 mask
, &vec_masks
, mask_vectype
);
8075 vec_mask
= vec_masks
[0];
8079 /* We should have catched mismatched types earlier. */
8080 gcc_assert (useless_type_conversion_p (vectype
,
8081 TREE_TYPE (vec_oprnd
)));
8082 bool simd_lane_access_p
8083 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8084 if (simd_lane_access_p
8086 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8087 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8088 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
8089 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8090 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8091 get_alias_set (TREE_TYPE (ref_type
))))
8093 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8094 dataref_offset
= build_int_cst (ref_type
, 0);
8096 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8098 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
8099 &gs_info
, &dataref_ptr
,
8101 vec_offset
= vec_offsets
[0];
8105 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
8106 simd_lane_access_p
? loop
: NULL
,
8107 offset
, &dummy
, gsi
, &ptr_incr
,
8108 simd_lane_access_p
, NULL_TREE
, bump
);
8112 /* For interleaved stores we created vectorized defs for all the
8113 defs stored in OPRNDS in the previous iteration (previous copy).
8114 DR_CHAIN is then used as an input to vect_permute_store_chain().
8115 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8116 OPRNDS are of size 1. */
8117 for (i
= 0; i
< group_size
; i
++)
8119 vec_oprnd
= gvec_oprnds
[i
][j
];
8120 dr_chain
[i
] = gvec_oprnds
[i
][j
];
8121 oprnds
[i
] = gvec_oprnds
[i
][j
];
8124 vec_mask
= vec_masks
[j
];
8127 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8128 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8129 vec_offset
= vec_offsets
[j
];
8131 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8135 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8139 /* Get an array into which we can store the individual vectors. */
8140 vec_array
= create_vector_array (vectype
, vec_num
);
8142 /* Invalidate the current contents of VEC_ARRAY. This should
8143 become an RTL clobber too, which prevents the vector registers
8144 from being upward-exposed. */
8145 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8147 /* Store the individual vectors into the array. */
8148 for (i
= 0; i
< vec_num
; i
++)
8150 vec_oprnd
= dr_chain
[i
];
8151 write_vector_array (vinfo
, stmt_info
,
8152 gsi
, vec_oprnd
, vec_array
, i
);
8155 tree final_mask
= NULL
;
8157 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8160 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8167 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8169 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8170 tree alias_ptr
= build_int_cst (ref_type
, align
);
8171 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8172 dataref_ptr
, alias_ptr
,
8173 final_mask
, vec_array
);
8178 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8179 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8180 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
8182 gimple_call_set_lhs (call
, data_ref
);
8184 gimple_call_set_nothrow (call
, true);
8185 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8188 /* Record that VEC_ARRAY is now dead. */
8189 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8197 result_chain
.create (group_size
);
8199 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
8200 gsi
, &result_chain
);
8203 stmt_vec_info next_stmt_info
= first_stmt_info
;
8204 for (i
= 0; i
< vec_num
; i
++)
8207 unsigned HOST_WIDE_INT align
;
8209 tree final_mask
= NULL_TREE
;
8211 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8213 vectype
, vec_num
* j
+ i
);
8215 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8218 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8220 tree scale
= size_int (gs_info
.scale
);
8223 call
= gimple_build_call_internal
8224 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
8225 scale
, vec_oprnd
, final_mask
);
8227 call
= gimple_build_call_internal
8228 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
8230 gimple_call_set_nothrow (call
, true);
8231 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8237 /* Bump the vector pointer. */
8238 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
8239 gsi
, stmt_info
, bump
);
8242 vec_oprnd
= vec_oprnds
[i
];
8243 else if (grouped_store
)
8244 /* For grouped stores vectorized defs are interleaved in
8245 vect_permute_store_chain(). */
8246 vec_oprnd
= result_chain
[i
];
8248 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
8249 if (alignment_support_scheme
== dr_aligned
)
8251 gcc_assert (aligned_access_p (first_dr_info
, vectype
));
8254 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
8256 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
8260 misalign
= misalignment
;
8261 if (dataref_offset
== NULL_TREE
8262 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8263 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
8265 align
= least_bit_hwi (misalign
| align
);
8267 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8269 tree perm_mask
= perm_mask_for_reverse (vectype
);
8270 tree perm_dest
= vect_create_destination_var
8271 (vect_get_store_rhs (stmt_info
), vectype
);
8272 tree new_temp
= make_ssa_name (perm_dest
);
8274 /* Generate the permute statement. */
8276 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
8277 vec_oprnd
, perm_mask
);
8278 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8280 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8281 vec_oprnd
= new_temp
;
8284 /* Arguments are ready. Create the new vector stmt. */
8287 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8289 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
8291 final_mask
, vec_oprnd
);
8292 gimple_call_set_nothrow (call
, true);
8293 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8299 = vect_get_loop_len (loop_vinfo
, loop_lens
,
8300 vec_num
* ncopies
, vec_num
* j
+ i
);
8301 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
8302 machine_mode vmode
= TYPE_MODE (vectype
);
8303 opt_machine_mode new_ovmode
8304 = get_len_load_store_mode (vmode
, false);
8305 machine_mode new_vmode
= new_ovmode
.require ();
8306 /* Need conversion if it's wrapped with VnQI. */
8307 if (vmode
!= new_vmode
)
8310 = build_vector_type_for_mode (unsigned_intQI_type_node
,
8313 = vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
8315 = build1 (VIEW_CONVERT_EXPR
, new_vtype
, vec_oprnd
);
8317 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
,
8319 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
8324 = gimple_build_call_internal (IFN_LEN_STORE
, 4, dataref_ptr
,
8325 ptr
, final_len
, vec_oprnd
);
8326 gimple_call_set_nothrow (call
, true);
8327 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8332 data_ref
= fold_build2 (MEM_REF
, vectype
,
8336 : build_int_cst (ref_type
, 0));
8337 if (alignment_support_scheme
== dr_aligned
)
8338 gcc_assert (aligned_access_p (first_dr_info
, vectype
));
8340 TREE_TYPE (data_ref
)
8341 = build_aligned_type (TREE_TYPE (data_ref
),
8342 align
* BITS_PER_UNIT
);
8343 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8344 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
8345 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8351 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8352 if (!next_stmt_info
)
8359 *vec_stmt
= new_stmt
;
8360 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8364 for (i
= 0; i
< group_size
; ++i
)
8366 vec
<tree
> oprndsi
= gvec_oprnds
[i
];
8370 result_chain
.release ();
8371 vec_oprnds
.release ();
8376 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8377 VECTOR_CST mask. No checks are made that the target platform supports the
8378 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8379 vect_gen_perm_mask_checked. */
8382 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8386 poly_uint64 nunits
= sel
.length ();
8387 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8389 mask_type
= build_vector_type (ssizetype
, nunits
);
8390 return vec_perm_indices_to_tree (mask_type
, sel
);
8393 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8394 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8397 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8399 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
8400 return vect_gen_perm_mask_any (vectype
, sel
);
8403 /* Given a vector variable X and Y, that was generated for the scalar
8404 STMT_INFO, generate instructions to permute the vector elements of X and Y
8405 using permutation mask MASK_VEC, insert them at *GSI and return the
8406 permuted vector variable. */
8409 permute_vec_elements (vec_info
*vinfo
,
8410 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8411 gimple_stmt_iterator
*gsi
)
8413 tree vectype
= TREE_TYPE (x
);
8414 tree perm_dest
, data_ref
;
8417 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8418 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8419 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8421 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8422 data_ref
= make_ssa_name (perm_dest
);
8424 /* Generate the permute statement. */
8425 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8426 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8431 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8432 inserting them on the loops preheader edge. Returns true if we
8433 were successful in doing so (and thus STMT_INFO can be moved then),
8434 otherwise returns false. */
8437 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8443 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8445 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8446 if (!gimple_nop_p (def_stmt
)
8447 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8449 /* Make sure we don't need to recurse. While we could do
8450 so in simple cases when there are more complex use webs
8451 we don't have an easy way to preserve stmt order to fulfil
8452 dependencies within them. */
8455 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8457 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8459 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8460 if (!gimple_nop_p (def_stmt2
)
8461 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8471 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8473 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8474 if (!gimple_nop_p (def_stmt
)
8475 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8477 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8478 gsi_remove (&gsi
, false);
8479 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8486 /* vectorizable_load.
8488 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8489 that can be vectorized.
8490 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8491 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8492 Return true if STMT_INFO is vectorizable in this way. */
8495 vectorizable_load (vec_info
*vinfo
,
8496 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8497 gimple
**vec_stmt
, slp_tree slp_node
,
8498 stmt_vector_for_cost
*cost_vec
)
8501 tree vec_dest
= NULL
;
8502 tree data_ref
= NULL
;
8503 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8504 class loop
*loop
= NULL
;
8505 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8506 bool nested_in_vect_loop
= false;
8511 tree dataref_ptr
= NULL_TREE
;
8512 tree dataref_offset
= NULL_TREE
;
8513 gimple
*ptr_incr
= NULL
;
8516 unsigned int group_size
;
8517 poly_uint64 group_gap_adj
;
8518 tree msq
= NULL_TREE
, lsq
;
8519 tree byte_offset
= NULL_TREE
;
8520 tree realignment_token
= NULL_TREE
;
8522 vec
<tree
> dr_chain
= vNULL
;
8523 bool grouped_load
= false;
8524 stmt_vec_info first_stmt_info
;
8525 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8526 bool compute_in_loop
= false;
8527 class loop
*at_loop
;
8529 bool slp
= (slp_node
!= NULL
);
8530 bool slp_perm
= false;
8531 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8534 gather_scatter_info gs_info
;
8536 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8538 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8541 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8545 if (!STMT_VINFO_DATA_REF (stmt_info
))
8548 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8549 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8551 scalar_dest
= gimple_assign_lhs (assign
);
8552 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8555 tree_code code
= gimple_assign_rhs_code (assign
);
8556 if (code
!= ARRAY_REF
8557 && code
!= BIT_FIELD_REF
8558 && code
!= INDIRECT_REF
8559 && code
!= COMPONENT_REF
8560 && code
!= IMAGPART_EXPR
8561 && code
!= REALPART_EXPR
8563 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8568 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8569 if (!call
|| !gimple_call_internal_p (call
))
8572 internal_fn ifn
= gimple_call_internal_fn (call
);
8573 if (!internal_load_fn_p (ifn
))
8576 scalar_dest
= gimple_call_lhs (call
);
8580 int mask_index
= internal_fn_mask_index (ifn
);
8582 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
,
8583 /* ??? For SLP we only have operands for
8584 the mask operand. */
8585 slp_node
? 0 : mask_index
,
8586 &mask
, NULL
, &mask_dt
, &mask_vectype
))
8590 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8591 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8595 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8596 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8597 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8602 /* Multiple types in SLP are handled by creating the appropriate number of
8603 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8608 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8610 gcc_assert (ncopies
>= 1);
8612 /* FORNOW. This restriction should be relaxed. */
8613 if (nested_in_vect_loop
&& ncopies
> 1)
8615 if (dump_enabled_p ())
8616 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8617 "multiple types in nested loop.\n");
8621 /* Invalidate assumptions made by dependence analysis when vectorization
8622 on the unrolled body effectively re-orders stmts. */
8624 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8625 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8626 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8628 if (dump_enabled_p ())
8629 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8630 "cannot perform implicit CSE when unrolling "
8631 "with negative dependence distance\n");
8635 elem_type
= TREE_TYPE (vectype
);
8636 mode
= TYPE_MODE (vectype
);
8638 /* FORNOW. In some cases can vectorize even if data-type not supported
8639 (e.g. - data copies). */
8640 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8642 if (dump_enabled_p ())
8643 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8644 "Aligned load, but unsupported type.\n");
8648 /* Check if the load is a part of an interleaving chain. */
8649 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8651 grouped_load
= true;
8653 gcc_assert (!nested_in_vect_loop
);
8654 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8656 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8657 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8659 /* Refuse non-SLP vectorization of SLP-only groups. */
8660 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8662 if (dump_enabled_p ())
8663 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8664 "cannot vectorize load in non-SLP mode.\n");
8668 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8674 /* In BB vectorization we may not actually use a loaded vector
8675 accessing elements in excess of DR_GROUP_SIZE. */
8676 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8677 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
8678 unsigned HOST_WIDE_INT nunits
;
8679 unsigned j
, k
, maxk
= 0;
8680 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
8683 tree vectype
= SLP_TREE_VECTYPE (slp_node
);
8684 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
8685 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
8687 if (dump_enabled_p ())
8688 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8689 "BB vectorization with gaps at the end of "
8690 "a load is not supported\n");
8697 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
8700 if (dump_enabled_p ())
8701 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
8703 "unsupported load permutation\n");
8708 /* Invalidate assumptions made by dependence analysis when vectorization
8709 on the unrolled body effectively re-orders stmts. */
8710 if (!PURE_SLP_STMT (stmt_info
)
8711 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8712 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8713 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8715 if (dump_enabled_p ())
8716 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8717 "cannot perform implicit CSE when performing "
8718 "group loads with negative dependence distance\n");
8725 vect_memory_access_type memory_access_type
;
8726 enum dr_alignment_support alignment_support_scheme
;
8729 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, VLS_LOAD
,
8730 ncopies
, &memory_access_type
, &poffset
,
8731 &alignment_support_scheme
, &misalignment
, &gs_info
))
8736 if (memory_access_type
== VMAT_CONTIGUOUS
)
8738 machine_mode vec_mode
= TYPE_MODE (vectype
);
8739 if (!VECTOR_MODE_P (vec_mode
)
8740 || !can_vec_mask_load_store_p (vec_mode
,
8741 TYPE_MODE (mask_vectype
), true))
8744 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8745 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8747 if (dump_enabled_p ())
8748 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8749 "unsupported access type for masked load.\n");
8752 else if (memory_access_type
== VMAT_GATHER_SCATTER
8753 && gs_info
.ifn
== IFN_LAST
8756 if (dump_enabled_p ())
8757 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8758 "unsupported masked emulated gather.\n");
8763 if (!vec_stmt
) /* transformation not required. */
8767 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
8770 if (dump_enabled_p ())
8771 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8772 "incompatible vector types for invariants\n");
8777 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8780 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
8781 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, VLS_LOAD
,
8782 group_size
, memory_access_type
,
8785 if (dump_enabled_p ()
8786 && memory_access_type
!= VMAT_ELEMENTWISE
8787 && memory_access_type
!= VMAT_GATHER_SCATTER
8788 && alignment_support_scheme
!= dr_aligned
)
8789 dump_printf_loc (MSG_NOTE
, vect_location
,
8790 "Vectorizing an unaligned access.\n");
8792 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
8793 vect_model_load_cost (vinfo
, stmt_info
, ncopies
, vf
, memory_access_type
,
8794 alignment_support_scheme
, misalignment
,
8795 &gs_info
, slp_node
, cost_vec
);
8800 gcc_assert (memory_access_type
8801 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8803 if (dump_enabled_p ())
8804 dump_printf_loc (MSG_NOTE
, vect_location
,
8805 "transform load. ncopies = %d\n", ncopies
);
8809 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8810 ensure_base_align (dr_info
);
8812 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8814 vect_build_gather_load_calls (vinfo
,
8815 stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
8819 if (memory_access_type
== VMAT_INVARIANT
)
8821 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
8822 /* If we have versioned for aliasing or the loop doesn't
8823 have any data dependencies that would preclude this,
8824 then we are sure this is a loop invariant load and
8825 thus we can insert it on the preheader edge. */
8826 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8827 && !nested_in_vect_loop
8828 && hoist_defs_of_uses (stmt_info
, loop
));
8831 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
8832 if (dump_enabled_p ())
8833 dump_printf_loc (MSG_NOTE
, vect_location
,
8834 "hoisting out of the vectorized loop: %G", stmt
);
8835 scalar_dest
= copy_ssa_name (scalar_dest
);
8836 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
8837 gsi_insert_on_edge_immediate
8838 (loop_preheader_edge (loop
),
8839 gimple_build_assign (scalar_dest
, rhs
));
8841 /* These copies are all equivalent, but currently the representation
8842 requires a separate STMT_VINFO_VEC_STMT for each one. */
8843 gimple_stmt_iterator gsi2
= *gsi
;
8845 for (j
= 0; j
< ncopies
; j
++)
8848 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8851 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8853 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8855 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8859 *vec_stmt
= new_stmt
;
8860 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8866 if (memory_access_type
== VMAT_ELEMENTWISE
8867 || memory_access_type
== VMAT_STRIDED_SLP
)
8869 gimple_stmt_iterator incr_gsi
;
8874 vec
<constructor_elt
, va_gc
> *v
= NULL
;
8875 tree stride_base
, stride_step
, alias_off
;
8876 /* Checked by get_load_store_type. */
8877 unsigned int const_nunits
= nunits
.to_constant ();
8878 unsigned HOST_WIDE_INT cst_offset
= 0;
8881 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
));
8882 gcc_assert (!nested_in_vect_loop
);
8886 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8887 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8891 first_stmt_info
= stmt_info
;
8892 first_dr_info
= dr_info
;
8894 if (slp
&& grouped_load
)
8896 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8897 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8903 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
8904 * vect_get_place_in_interleaving_chain (stmt_info
,
8907 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
8910 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
8912 = fold_build_pointer_plus
8913 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8914 size_binop (PLUS_EXPR
,
8915 convert_to_ptrofftype (dr_offset
),
8916 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8917 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8919 /* For a load with loop-invariant (but other than power-of-2)
8920 stride (i.e. not a grouped access) like so:
8922 for (i = 0; i < n; i += stride)
8925 we generate a new induction variable and new accesses to
8926 form a new vector (or vectors, depending on ncopies):
8928 for (j = 0; ; j += VF*stride)
8930 tmp2 = array[j + stride];
8932 vectemp = {tmp1, tmp2, ...}
8935 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
8936 build_int_cst (TREE_TYPE (stride_step
), vf
));
8938 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8940 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8941 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8942 create_iv (stride_base
, ivstep
, NULL
,
8943 loop
, &incr_gsi
, insert_after
,
8946 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8948 running_off
= offvar
;
8949 alias_off
= build_int_cst (ref_type
, 0);
8950 int nloads
= const_nunits
;
8952 tree ltype
= TREE_TYPE (vectype
);
8953 tree lvectype
= vectype
;
8954 auto_vec
<tree
> dr_chain
;
8955 if (memory_access_type
== VMAT_STRIDED_SLP
)
8957 if (group_size
< const_nunits
)
8959 /* First check if vec_init optab supports construction from vector
8960 elts directly. Otherwise avoid emitting a constructor of
8961 vector elements by performing the loads using an integer type
8962 of the same size, constructing a vector of those and then
8963 re-interpreting it as the original vector type. This avoids a
8964 huge runtime penalty due to the general inability to perform
8965 store forwarding from smaller stores to a larger load. */
8968 = vector_vector_composition_type (vectype
,
8969 const_nunits
/ group_size
,
8971 if (vtype
!= NULL_TREE
)
8973 nloads
= const_nunits
/ group_size
;
8982 lnel
= const_nunits
;
8985 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
8987 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8988 else if (nloads
== 1)
8993 /* For SLP permutation support we need to load the whole group,
8994 not only the number of vector stmts the permutation result
8998 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9000 unsigned int const_vf
= vf
.to_constant ();
9001 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
9002 dr_chain
.create (ncopies
);
9005 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9007 unsigned int group_el
= 0;
9008 unsigned HOST_WIDE_INT
9009 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
9010 for (j
= 0; j
< ncopies
; j
++)
9013 vec_alloc (v
, nloads
);
9014 gimple
*new_stmt
= NULL
;
9015 for (i
= 0; i
< nloads
; i
++)
9017 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
9018 group_el
* elsz
+ cst_offset
);
9019 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
9020 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9021 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
9022 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9024 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9025 gimple_assign_lhs (new_stmt
));
9029 || group_el
== group_size
)
9031 tree newoff
= copy_ssa_name (running_off
);
9032 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
9033 running_off
, stride_step
);
9034 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
9036 running_off
= newoff
;
9042 tree vec_inv
= build_constructor (lvectype
, v
);
9043 new_temp
= vect_init_vector (vinfo
, stmt_info
,
9044 vec_inv
, lvectype
, gsi
);
9045 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9046 if (lvectype
!= vectype
)
9048 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
9050 build1 (VIEW_CONVERT_EXPR
,
9051 vectype
, new_temp
));
9052 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9059 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
9061 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9066 *vec_stmt
= new_stmt
;
9067 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9073 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
9079 if (memory_access_type
== VMAT_GATHER_SCATTER
9080 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
9081 grouped_load
= false;
9085 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9086 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9087 /* For SLP vectorization we directly vectorize a subchain
9088 without permutation. */
9089 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
9090 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
9091 /* For BB vectorization always use the first stmt to base
9092 the data ref pointer on. */
9094 first_stmt_info_for_drptr
9095 = vect_find_first_scalar_stmt_in_slp (slp_node
);
9097 /* Check if the chain of loads is already vectorized. */
9098 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
9099 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9100 ??? But we can only do so if there is exactly one
9101 as we have no way to get at the rest. Leave the CSE
9103 ??? With the group load eventually participating
9104 in multiple different permutations (having multiple
9105 slp nodes which refer to the same group) the CSE
9106 is even wrong code. See PR56270. */
9109 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9112 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
9115 /* VEC_NUM is the number of vect stmts to be created for this group. */
9118 grouped_load
= false;
9119 /* If an SLP permutation is from N elements to N elements,
9120 and if one vector holds a whole number of N, we can load
9121 the inputs to the permutation in the same way as an
9122 unpermuted sequence. In other cases we need to load the
9123 whole group, not only the number of vector stmts the
9124 permutation result fits in. */
9125 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
9127 && (group_size
!= scalar_lanes
9128 || !multiple_p (nunits
, group_size
)))
9130 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9131 variable VF; see vect_transform_slp_perm_load. */
9132 unsigned int const_vf
= vf
.to_constant ();
9133 unsigned int const_nunits
= nunits
.to_constant ();
9134 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
9135 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
9139 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9141 = group_size
- scalar_lanes
;
9145 vec_num
= group_size
;
9147 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
9151 first_stmt_info
= stmt_info
;
9152 first_dr_info
= dr_info
;
9153 group_size
= vec_num
= 1;
9155 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
9158 gcc_assert (alignment_support_scheme
);
9159 vec_loop_masks
*loop_masks
9160 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
9161 ? &LOOP_VINFO_MASKS (loop_vinfo
)
9163 vec_loop_lens
*loop_lens
9164 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
9165 ? &LOOP_VINFO_LENS (loop_vinfo
)
9168 /* Shouldn't go with length-based approach if fully masked. */
9169 gcc_assert (!loop_lens
|| !loop_masks
);
9171 /* Targets with store-lane instructions must not require explicit
9172 realignment. vect_supportable_dr_alignment always returns either
9173 dr_aligned or dr_unaligned_supported for masked operations. */
9174 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
9177 || alignment_support_scheme
== dr_aligned
9178 || alignment_support_scheme
== dr_unaligned_supported
);
9180 /* In case the vectorization factor (VF) is bigger than the number
9181 of elements that we can fit in a vectype (nunits), we have to generate
9182 more than one vector stmt - i.e - we need to "unroll" the
9183 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9184 from one copy of the vector stmt to the next, in the field
9185 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9186 stages to find the correct vector defs to be used when vectorizing
9187 stmts that use the defs of the current stmt. The example below
9188 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9189 need to create 4 vectorized stmts):
9191 before vectorization:
9192 RELATED_STMT VEC_STMT
9196 step 1: vectorize stmt S1:
9197 We first create the vector stmt VS1_0, and, as usual, record a
9198 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9199 Next, we create the vector stmt VS1_1, and record a pointer to
9200 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9201 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9203 RELATED_STMT VEC_STMT
9204 VS1_0: vx0 = memref0 VS1_1 -
9205 VS1_1: vx1 = memref1 VS1_2 -
9206 VS1_2: vx2 = memref2 VS1_3 -
9207 VS1_3: vx3 = memref3 - -
9208 S1: x = load - VS1_0
9212 /* In case of interleaving (non-unit grouped access):
9219 Vectorized loads are created in the order of memory accesses
9220 starting from the access of the first stmt of the chain:
9223 VS2: vx1 = &base + vec_size*1
9224 VS3: vx3 = &base + vec_size*2
9225 VS4: vx4 = &base + vec_size*3
9227 Then permutation statements are generated:
9229 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9230 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9233 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9234 (the order of the data-refs in the output of vect_permute_load_chain
9235 corresponds to the order of scalar stmts in the interleaving chain - see
9236 the documentation of vect_permute_load_chain()).
9237 The generation of permutation stmts and recording them in
9238 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9240 In case of both multiple types and interleaving, the vector loads and
9241 permutation stmts above are created for every copy. The result vector
9242 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9243 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9245 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9246 on a target that supports unaligned accesses (dr_unaligned_supported)
9247 we generate the following code:
9251 p = p + indx * vectype_size;
9256 Otherwise, the data reference is potentially unaligned on a target that
9257 does not support unaligned accesses (dr_explicit_realign_optimized) -
9258 then generate the following code, in which the data in each iteration is
9259 obtained by two vector loads, one from the previous iteration, and one
9260 from the current iteration:
9262 msq_init = *(floor(p1))
9263 p2 = initial_addr + VS - 1;
9264 realignment_token = call target_builtin;
9267 p2 = p2 + indx * vectype_size
9269 vec_dest = realign_load (msq, lsq, realignment_token)
9274 /* If the misalignment remains the same throughout the execution of the
9275 loop, we can create the init_addr and permutation mask at the loop
9276 preheader. Otherwise, it needs to be created inside the loop.
9277 This can only occur when vectorizing memory accesses in the inner-loop
9278 nested within an outer-loop that is being vectorized. */
9280 if (nested_in_vect_loop
9281 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
9282 GET_MODE_SIZE (TYPE_MODE (vectype
))))
9284 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
9285 compute_in_loop
= true;
9288 bool diff_first_stmt_info
9289 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
9291 if ((alignment_support_scheme
== dr_explicit_realign_optimized
9292 || alignment_support_scheme
== dr_explicit_realign
)
9293 && !compute_in_loop
)
9295 /* If we have different first_stmt_info, we can't set up realignment
9296 here, since we can't guarantee first_stmt_info DR has been
9297 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9298 distance from first_stmt_info DR instead as below. */
9299 if (!diff_first_stmt_info
)
9300 msq
= vect_setup_realignment (vinfo
,
9301 first_stmt_info
, gsi
, &realignment_token
,
9302 alignment_support_scheme
, NULL_TREE
,
9304 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9306 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
9307 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
9309 gcc_assert (!first_stmt_info_for_drptr
);
9315 tree offset
= NULL_TREE
;
9316 if (!known_eq (poffset
, 0))
9317 offset
= size_int (poffset
);
9318 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9319 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
9322 tree vec_offset
= NULL_TREE
;
9323 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9325 aggr_type
= NULL_TREE
;
9328 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9330 aggr_type
= elem_type
;
9331 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
9332 &bump
, &vec_offset
);
9336 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9337 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
9339 aggr_type
= vectype
;
9340 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
9341 memory_access_type
);
9344 vec
<tree
> vec_offsets
= vNULL
;
9345 auto_vec
<tree
> vec_masks
;
9347 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
9348 mask
, &vec_masks
, mask_vectype
, NULL_TREE
);
9349 tree vec_mask
= NULL_TREE
;
9350 poly_uint64 group_elt
= 0;
9351 for (j
= 0; j
< ncopies
; j
++)
9353 /* 1. Create the vector or array pointer update chain. */
9356 bool simd_lane_access_p
9357 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9358 if (simd_lane_access_p
9359 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9360 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9361 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9362 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9363 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9364 get_alias_set (TREE_TYPE (ref_type
)))
9365 && (alignment_support_scheme
== dr_aligned
9366 || alignment_support_scheme
== dr_unaligned_supported
))
9368 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9369 dataref_offset
= build_int_cst (ref_type
, 0);
9371 else if (diff_first_stmt_info
)
9374 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
9375 aggr_type
, at_loop
, offset
, &dummy
,
9376 gsi
, &ptr_incr
, simd_lane_access_p
,
9378 /* Adjust the pointer by the difference to first_stmt. */
9379 data_reference_p ptrdr
9380 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9382 = fold_convert (sizetype
,
9383 size_binop (MINUS_EXPR
,
9384 DR_INIT (first_dr_info
->dr
),
9386 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9388 if (alignment_support_scheme
== dr_explicit_realign
)
9390 msq
= vect_setup_realignment (vinfo
,
9391 first_stmt_info_for_drptr
, gsi
,
9393 alignment_support_scheme
,
9394 dataref_ptr
, &at_loop
);
9395 gcc_assert (!compute_in_loop
);
9398 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9400 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
9401 &gs_info
, &dataref_ptr
,
9406 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9408 offset
, &dummy
, gsi
, &ptr_incr
,
9412 vec_mask
= vec_masks
[0];
9417 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9419 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9420 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9423 vec_mask
= vec_masks
[j
];
9426 if (grouped_load
|| slp_perm
)
9427 dr_chain
.create (vec_num
);
9429 gimple
*new_stmt
= NULL
;
9430 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9434 vec_array
= create_vector_array (vectype
, vec_num
);
9436 tree final_mask
= NULL_TREE
;
9438 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9441 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9448 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9450 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
9451 tree alias_ptr
= build_int_cst (ref_type
, align
);
9452 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9453 dataref_ptr
, alias_ptr
,
9459 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9460 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9461 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9463 gimple_call_set_lhs (call
, vec_array
);
9464 gimple_call_set_nothrow (call
, true);
9465 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9468 /* Extract each vector into an SSA_NAME. */
9469 for (i
= 0; i
< vec_num
; i
++)
9471 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
9473 dr_chain
.quick_push (new_temp
);
9476 /* Record the mapping between SSA_NAMEs and statements. */
9477 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
9479 /* Record that VEC_ARRAY is now dead. */
9480 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
9484 for (i
= 0; i
< vec_num
; i
++)
9486 tree final_mask
= NULL_TREE
;
9488 && memory_access_type
!= VMAT_INVARIANT
)
9489 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9491 vectype
, vec_num
* j
+ i
);
9493 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9497 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9498 gsi
, stmt_info
, bump
);
9500 /* 2. Create the vector-load in the loop. */
9501 switch (alignment_support_scheme
)
9504 case dr_unaligned_supported
:
9506 unsigned int misalign
;
9507 unsigned HOST_WIDE_INT align
;
9509 if (memory_access_type
== VMAT_GATHER_SCATTER
9510 && gs_info
.ifn
!= IFN_LAST
)
9512 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9513 vec_offset
= vec_offsets
[j
];
9514 tree zero
= build_zero_cst (vectype
);
9515 tree scale
= size_int (gs_info
.scale
);
9518 call
= gimple_build_call_internal
9519 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
9520 vec_offset
, scale
, zero
, final_mask
);
9522 call
= gimple_build_call_internal
9523 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
9524 vec_offset
, scale
, zero
);
9525 gimple_call_set_nothrow (call
, true);
9527 data_ref
= NULL_TREE
;
9530 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
9532 /* Emulated gather-scatter. */
9533 gcc_assert (!final_mask
);
9534 unsigned HOST_WIDE_INT const_nunits
9535 = nunits
.to_constant ();
9536 unsigned HOST_WIDE_INT const_offset_nunits
9537 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
)
9539 vec
<constructor_elt
, va_gc
> *ctor_elts
;
9540 vec_alloc (ctor_elts
, const_nunits
);
9541 gimple_seq stmts
= NULL
;
9542 /* We support offset vectors with more elements
9543 than the data vector for now. */
9544 unsigned HOST_WIDE_INT factor
9545 = const_offset_nunits
/ const_nunits
;
9546 vec_offset
= vec_offsets
[j
/ factor
];
9547 unsigned elt_offset
= (j
% factor
) * const_nunits
;
9548 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
9549 tree scale
= size_int (gs_info
.scale
);
9551 = get_object_alignment (DR_REF (first_dr_info
->dr
));
9552 tree ltype
= build_aligned_type (TREE_TYPE (vectype
),
9554 for (unsigned k
= 0; k
< const_nunits
; ++k
)
9556 tree boff
= size_binop (MULT_EXPR
,
9557 TYPE_SIZE (idx_type
),
9560 tree idx
= gimple_build (&stmts
, BIT_FIELD_REF
,
9561 idx_type
, vec_offset
,
9562 TYPE_SIZE (idx_type
),
9564 idx
= gimple_convert (&stmts
, sizetype
, idx
);
9565 idx
= gimple_build (&stmts
, MULT_EXPR
,
9566 sizetype
, idx
, scale
);
9567 tree ptr
= gimple_build (&stmts
, PLUS_EXPR
,
9568 TREE_TYPE (dataref_ptr
),
9570 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
9571 tree elt
= make_ssa_name (TREE_TYPE (vectype
));
9572 tree ref
= build2 (MEM_REF
, ltype
, ptr
,
9573 build_int_cst (ref_type
, 0));
9574 new_stmt
= gimple_build_assign (elt
, ref
);
9575 gimple_seq_add_stmt (&stmts
, new_stmt
);
9576 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
, elt
);
9578 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
9579 new_stmt
= gimple_build_assign (NULL_TREE
,
9581 (vectype
, ctor_elts
));
9582 data_ref
= NULL_TREE
;
9587 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9588 if (alignment_support_scheme
== dr_aligned
)
9590 gcc_assert (aligned_access_p (first_dr_info
, vectype
));
9593 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
9595 align
= dr_alignment
9596 (vect_dr_behavior (vinfo
, first_dr_info
));
9600 misalign
= misalignment
;
9601 if (dataref_offset
== NULL_TREE
9602 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9603 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9605 align
= least_bit_hwi (misalign
| align
);
9609 tree ptr
= build_int_cst (ref_type
,
9610 align
* BITS_PER_UNIT
);
9612 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9615 gimple_call_set_nothrow (call
, true);
9617 data_ref
= NULL_TREE
;
9619 else if (loop_lens
&& memory_access_type
!= VMAT_INVARIANT
)
9622 = vect_get_loop_len (loop_vinfo
, loop_lens
,
9625 tree ptr
= build_int_cst (ref_type
,
9626 align
* BITS_PER_UNIT
);
9628 = gimple_build_call_internal (IFN_LEN_LOAD
, 3,
9631 gimple_call_set_nothrow (call
, true);
9633 data_ref
= NULL_TREE
;
9635 /* Need conversion if it's wrapped with VnQI. */
9636 machine_mode vmode
= TYPE_MODE (vectype
);
9637 opt_machine_mode new_ovmode
9638 = get_len_load_store_mode (vmode
, true);
9639 machine_mode new_vmode
= new_ovmode
.require ();
9640 if (vmode
!= new_vmode
)
9642 tree qi_type
= unsigned_intQI_type_node
;
9644 = build_vector_type_for_mode (qi_type
, new_vmode
);
9645 tree var
= vect_get_new_ssa_name (new_vtype
,
9647 gimple_set_lhs (call
, var
);
9648 vect_finish_stmt_generation (vinfo
, stmt_info
, call
,
9650 tree op
= build1 (VIEW_CONVERT_EXPR
, vectype
, var
);
9652 = gimple_build_assign (vec_dest
,
9653 VIEW_CONVERT_EXPR
, op
);
9658 tree ltype
= vectype
;
9659 tree new_vtype
= NULL_TREE
;
9660 unsigned HOST_WIDE_INT gap
9661 = DR_GROUP_GAP (first_stmt_info
);
9662 unsigned int vect_align
9663 = vect_known_alignment_in_bytes (first_dr_info
,
9665 unsigned int scalar_dr_size
9666 = vect_get_scalar_dr_size (first_dr_info
);
9667 /* If there's no peeling for gaps but we have a gap
9668 with slp loads then load the lower half of the
9669 vector only. See get_group_load_store_type for
9670 when we apply this optimization. */
9673 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9675 && known_eq (nunits
, (group_size
- gap
) * 2)
9676 && known_eq (nunits
, group_size
)
9677 && gap
>= (vect_align
/ scalar_dr_size
))
9681 = vector_vector_composition_type (vectype
, 2,
9683 if (new_vtype
!= NULL_TREE
)
9687 = (dataref_offset
? dataref_offset
9688 : build_int_cst (ref_type
, 0));
9689 if (ltype
!= vectype
9690 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9692 unsigned HOST_WIDE_INT gap_offset
9693 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
9694 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
9695 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
9698 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
9699 if (alignment_support_scheme
== dr_aligned
)
9702 TREE_TYPE (data_ref
)
9703 = build_aligned_type (TREE_TYPE (data_ref
),
9704 align
* BITS_PER_UNIT
);
9705 if (ltype
!= vectype
)
9707 vect_copy_ref_info (data_ref
,
9708 DR_REF (first_dr_info
->dr
));
9709 tree tem
= make_ssa_name (ltype
);
9710 new_stmt
= gimple_build_assign (tem
, data_ref
);
9711 vect_finish_stmt_generation (vinfo
, stmt_info
,
9714 vec
<constructor_elt
, va_gc
> *v
;
9716 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9718 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9719 build_zero_cst (ltype
));
9720 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9724 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9725 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9726 build_zero_cst (ltype
));
9728 gcc_assert (new_vtype
!= NULL_TREE
);
9729 if (new_vtype
== vectype
)
9730 new_stmt
= gimple_build_assign (
9731 vec_dest
, build_constructor (vectype
, v
));
9734 tree new_vname
= make_ssa_name (new_vtype
);
9735 new_stmt
= gimple_build_assign (
9736 new_vname
, build_constructor (new_vtype
, v
));
9737 vect_finish_stmt_generation (vinfo
, stmt_info
,
9739 new_stmt
= gimple_build_assign (
9740 vec_dest
, build1 (VIEW_CONVERT_EXPR
, vectype
,
9747 case dr_explicit_realign
:
9751 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9753 if (compute_in_loop
)
9754 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
9756 dr_explicit_realign
,
9759 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9760 ptr
= copy_ssa_name (dataref_ptr
);
9762 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9763 // For explicit realign the target alignment should be
9764 // known at compile time.
9765 unsigned HOST_WIDE_INT align
=
9766 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9767 new_stmt
= gimple_build_assign
9768 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
9770 (TREE_TYPE (dataref_ptr
),
9771 -(HOST_WIDE_INT
) align
));
9772 vect_finish_stmt_generation (vinfo
, stmt_info
,
9775 = build2 (MEM_REF
, vectype
, ptr
,
9776 build_int_cst (ref_type
, 0));
9777 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9778 vec_dest
= vect_create_destination_var (scalar_dest
,
9780 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9781 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9782 gimple_assign_set_lhs (new_stmt
, new_temp
);
9783 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
9784 vect_finish_stmt_generation (vinfo
, stmt_info
,
9788 bump
= size_binop (MULT_EXPR
, vs
,
9789 TYPE_SIZE_UNIT (elem_type
));
9790 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
9791 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
,
9793 new_stmt
= gimple_build_assign
9794 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
9796 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
9797 ptr
= copy_ssa_name (ptr
, new_stmt
);
9798 gimple_assign_set_lhs (new_stmt
, ptr
);
9799 vect_finish_stmt_generation (vinfo
, stmt_info
,
9802 = build2 (MEM_REF
, vectype
, ptr
,
9803 build_int_cst (ref_type
, 0));
9806 case dr_explicit_realign_optimized
:
9808 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9809 new_temp
= copy_ssa_name (dataref_ptr
);
9811 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9812 // We should only be doing this if we know the target
9813 // alignment at compile time.
9814 unsigned HOST_WIDE_INT align
=
9815 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9816 new_stmt
= gimple_build_assign
9817 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
9818 build_int_cst (TREE_TYPE (dataref_ptr
),
9819 -(HOST_WIDE_INT
) align
));
9820 vect_finish_stmt_generation (vinfo
, stmt_info
,
9823 = build2 (MEM_REF
, vectype
, new_temp
,
9824 build_int_cst (ref_type
, 0));
9830 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9831 /* DATA_REF is null if we've already built the statement. */
9834 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9835 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9837 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9838 gimple_set_lhs (new_stmt
, new_temp
);
9839 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9841 /* 3. Handle explicit realignment if necessary/supported.
9843 vec_dest = realign_load (msq, lsq, realignment_token) */
9844 if (alignment_support_scheme
== dr_explicit_realign_optimized
9845 || alignment_support_scheme
== dr_explicit_realign
)
9847 lsq
= gimple_assign_lhs (new_stmt
);
9848 if (!realignment_token
)
9849 realignment_token
= dataref_ptr
;
9850 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9851 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
9852 msq
, lsq
, realignment_token
);
9853 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9854 gimple_assign_set_lhs (new_stmt
, new_temp
);
9855 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9857 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9860 if (i
== vec_num
- 1 && j
== ncopies
- 1)
9861 add_phi_arg (phi
, lsq
,
9862 loop_latch_edge (containing_loop
),
9868 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9870 tree perm_mask
= perm_mask_for_reverse (vectype
);
9871 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
9872 perm_mask
, stmt_info
, gsi
);
9873 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9876 /* Collect vector loads and later create their permutation in
9877 vect_transform_grouped_load (). */
9878 if (grouped_load
|| slp_perm
)
9879 dr_chain
.quick_push (new_temp
);
9881 /* Store vector loads in the corresponding SLP_NODE. */
9882 if (slp
&& !slp_perm
)
9883 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9885 /* With SLP permutation we load the gaps as well, without
9886 we need to skip the gaps after we manage to fully load
9887 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9888 group_elt
+= nunits
;
9889 if (maybe_ne (group_gap_adj
, 0U)
9891 && known_eq (group_elt
, group_size
- group_gap_adj
))
9893 poly_wide_int bump_val
9894 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9896 if (tree_int_cst_sgn
9897 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
9898 bump_val
= -bump_val
;
9899 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9900 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9901 gsi
, stmt_info
, bump
);
9905 /* Bump the vector pointer to account for a gap or for excess
9906 elements loaded for a permuted SLP load. */
9907 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
9909 poly_wide_int bump_val
9910 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9912 if (tree_int_cst_sgn
9913 (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
9914 bump_val
= -bump_val
;
9915 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9916 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9921 if (slp
&& !slp_perm
)
9927 /* For SLP we know we've seen all possible uses of dr_chain so
9928 direct vect_transform_slp_perm_load to DCE the unused parts.
9929 ??? This is a hack to prevent compile-time issues as seen
9930 in PR101120 and friends. */
9931 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
9932 gsi
, vf
, false, &n_perms
,
9940 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
9941 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
9943 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9947 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9950 dr_chain
.release ();
9953 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9958 /* Function vect_is_simple_cond.
9961 LOOP - the loop that is being vectorized.
9962 COND - Condition that is checked for simple use.
9965 *COMP_VECTYPE - the vector type for the comparison.
9966 *DTS - The def types for the arguments of the comparison
9968 Returns whether a COND can be vectorized. Checks whether
9969 condition operands are supportable using vec_is_simple_use. */
9972 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
9973 slp_tree slp_node
, tree
*comp_vectype
,
9974 enum vect_def_type
*dts
, tree vectype
)
9977 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9981 if (TREE_CODE (cond
) == SSA_NAME
9982 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
9984 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
9985 &slp_op
, &dts
[0], comp_vectype
)
9987 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
9992 if (!COMPARISON_CLASS_P (cond
))
9995 lhs
= TREE_OPERAND (cond
, 0);
9996 rhs
= TREE_OPERAND (cond
, 1);
9998 if (TREE_CODE (lhs
) == SSA_NAME
)
10000 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
10001 &lhs
, &slp_op
, &dts
[0], &vectype1
))
10004 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
10005 || TREE_CODE (lhs
) == FIXED_CST
)
10006 dts
[0] = vect_constant_def
;
10010 if (TREE_CODE (rhs
) == SSA_NAME
)
10012 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
10013 &rhs
, &slp_op
, &dts
[1], &vectype2
))
10016 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
10017 || TREE_CODE (rhs
) == FIXED_CST
)
10018 dts
[1] = vect_constant_def
;
10022 if (vectype1
&& vectype2
10023 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10024 TYPE_VECTOR_SUBPARTS (vectype2
)))
10027 *comp_vectype
= vectype1
? vectype1
: vectype2
;
10028 /* Invariant comparison. */
10029 if (! *comp_vectype
)
10031 tree scalar_type
= TREE_TYPE (lhs
);
10032 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10033 *comp_vectype
= truth_type_for (vectype
);
10036 /* If we can widen the comparison to match vectype do so. */
10037 if (INTEGRAL_TYPE_P (scalar_type
)
10039 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
10040 TYPE_SIZE (TREE_TYPE (vectype
))))
10041 scalar_type
= build_nonstandard_integer_type
10042 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
10043 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
10051 /* vectorizable_condition.
10053 Check if STMT_INFO is conditional modify expression that can be vectorized.
10054 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10055 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10058 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10060 Return true if STMT_INFO is vectorizable in this way. */
10063 vectorizable_condition (vec_info
*vinfo
,
10064 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10066 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10068 tree scalar_dest
= NULL_TREE
;
10069 tree vec_dest
= NULL_TREE
;
10070 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
10071 tree then_clause
, else_clause
;
10072 tree comp_vectype
= NULL_TREE
;
10073 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
10074 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
10077 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10078 enum vect_def_type dts
[4]
10079 = {vect_unknown_def_type
, vect_unknown_def_type
,
10080 vect_unknown_def_type
, vect_unknown_def_type
};
10084 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10086 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10087 vec
<tree
> vec_oprnds0
= vNULL
;
10088 vec
<tree
> vec_oprnds1
= vNULL
;
10089 vec
<tree
> vec_oprnds2
= vNULL
;
10090 vec
<tree
> vec_oprnds3
= vNULL
;
10092 bool masked
= false;
10094 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10097 /* Is vectorizable conditional operation? */
10098 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10102 code
= gimple_assign_rhs_code (stmt
);
10103 if (code
!= COND_EXPR
)
10106 stmt_vec_info reduc_info
= NULL
;
10107 int reduc_index
= -1;
10108 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
10110 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
10113 if (STMT_SLP_TYPE (stmt_info
))
10115 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
10116 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
10117 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
10118 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
10119 || reduc_index
!= -1);
10123 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10127 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10128 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10133 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10137 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10141 gcc_assert (ncopies
>= 1);
10142 if (for_reduction
&& ncopies
> 1)
10143 return false; /* FORNOW */
10145 cond_expr
= gimple_assign_rhs1 (stmt
);
10147 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
10148 &comp_vectype
, &dts
[0], vectype
)
10152 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
10153 slp_tree then_slp_node
, else_slp_node
;
10154 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
10155 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
10157 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
10158 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
10161 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
10164 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
10167 masked
= !COMPARISON_CLASS_P (cond_expr
);
10168 vec_cmp_type
= truth_type_for (comp_vectype
);
10170 if (vec_cmp_type
== NULL_TREE
)
10173 cond_code
= TREE_CODE (cond_expr
);
10176 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
10177 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
10180 /* For conditional reductions, the "then" value needs to be the candidate
10181 value calculated by this iteration while the "else" value needs to be
10182 the result carried over from previous iterations. If the COND_EXPR
10183 is the other way around, we need to swap it. */
10184 bool must_invert_cmp_result
= false;
10185 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
10188 must_invert_cmp_result
= true;
10191 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
10192 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
10193 if (new_code
== ERROR_MARK
)
10194 must_invert_cmp_result
= true;
10197 cond_code
= new_code
;
10198 /* Make sure we don't accidentally use the old condition. */
10199 cond_expr
= NULL_TREE
;
10202 std::swap (then_clause
, else_clause
);
10205 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
10207 /* Boolean values may have another representation in vectors
10208 and therefore we prefer bit operations over comparison for
10209 them (which also works for scalar masks). We store opcodes
10210 to use in bitop1 and bitop2. Statement is vectorized as
10211 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10212 depending on bitop1 and bitop2 arity. */
10216 bitop1
= BIT_NOT_EXPR
;
10217 bitop2
= BIT_AND_EXPR
;
10220 bitop1
= BIT_NOT_EXPR
;
10221 bitop2
= BIT_IOR_EXPR
;
10224 bitop1
= BIT_NOT_EXPR
;
10225 bitop2
= BIT_AND_EXPR
;
10226 std::swap (cond_expr0
, cond_expr1
);
10229 bitop1
= BIT_NOT_EXPR
;
10230 bitop2
= BIT_IOR_EXPR
;
10231 std::swap (cond_expr0
, cond_expr1
);
10234 bitop1
= BIT_XOR_EXPR
;
10237 bitop1
= BIT_XOR_EXPR
;
10238 bitop2
= BIT_NOT_EXPR
;
10243 cond_code
= SSA_NAME
;
10246 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
10247 && reduction_type
== EXTRACT_LAST_REDUCTION
10248 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
10250 if (dump_enabled_p ())
10251 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10252 "reduction comparison operation not supported.\n");
10258 if (bitop1
!= NOP_EXPR
)
10260 machine_mode mode
= TYPE_MODE (comp_vectype
);
10263 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
10264 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10267 if (bitop2
!= NOP_EXPR
)
10269 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
10271 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10276 vect_cost_for_stmt kind
= vector_stmt
;
10277 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10278 /* Count one reduction-like operation per vector. */
10279 kind
= vec_to_scalar
;
10280 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
))
10284 && (!vect_maybe_update_slp_op_vectype
10285 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
10287 && !vect_maybe_update_slp_op_vectype
10288 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
10289 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
10290 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
10292 if (dump_enabled_p ())
10293 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10294 "incompatible vector types for invariants\n");
10298 if (loop_vinfo
&& for_reduction
10299 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
10301 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10302 vect_record_loop_mask (loop_vinfo
, &LOOP_VINFO_MASKS (loop_vinfo
),
10303 ncopies
* vec_num
, vectype
, NULL
);
10304 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10305 else if (STMT_VINFO_DEF_TYPE (reduc_info
) != vect_nested_cycle
)
10307 if (dump_enabled_p ())
10308 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10309 "conditional reduction prevents the use"
10310 " of partial vectors.\n");
10311 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
10315 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
10316 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
10324 scalar_dest
= gimple_assign_lhs (stmt
);
10325 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10326 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
10328 bool swap_cond_operands
= false;
10330 /* See whether another part of the vectorized code applies a loop
10331 mask to the condition, or to its inverse. */
10333 vec_loop_masks
*masks
= NULL
;
10334 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
10336 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10337 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10340 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
10341 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10342 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10345 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
10346 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
10347 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
10349 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
10350 cond_code
= cond
.code
;
10351 swap_cond_operands
= true;
10357 /* Handle cond expr. */
10359 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10360 cond_expr
, &vec_oprnds0
, comp_vectype
,
10361 then_clause
, &vec_oprnds2
, vectype
,
10362 reduction_type
!= EXTRACT_LAST_REDUCTION
10363 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10365 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10366 cond_expr0
, &vec_oprnds0
, comp_vectype
,
10367 cond_expr1
, &vec_oprnds1
, comp_vectype
,
10368 then_clause
, &vec_oprnds2
, vectype
,
10369 reduction_type
!= EXTRACT_LAST_REDUCTION
10370 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
10372 /* Arguments are ready. Create the new vector stmt. */
10373 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
10375 vec_then_clause
= vec_oprnds2
[i
];
10376 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
10377 vec_else_clause
= vec_oprnds3
[i
];
10379 if (swap_cond_operands
)
10380 std::swap (vec_then_clause
, vec_else_clause
);
10383 vec_compare
= vec_cond_lhs
;
10386 vec_cond_rhs
= vec_oprnds1
[i
];
10387 if (bitop1
== NOP_EXPR
)
10389 gimple_seq stmts
= NULL
;
10390 vec_compare
= gimple_build (&stmts
, cond_code
, vec_cmp_type
,
10391 vec_cond_lhs
, vec_cond_rhs
);
10392 gsi_insert_before (gsi
, stmts
, GSI_SAME_STMT
);
10396 new_temp
= make_ssa_name (vec_cmp_type
);
10398 if (bitop1
== BIT_NOT_EXPR
)
10399 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
10403 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
10405 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10406 if (bitop2
== NOP_EXPR
)
10407 vec_compare
= new_temp
;
10408 else if (bitop2
== BIT_NOT_EXPR
)
10410 /* Instead of doing ~x ? y : z do x ? z : y. */
10411 vec_compare
= new_temp
;
10412 std::swap (vec_then_clause
, vec_else_clause
);
10416 vec_compare
= make_ssa_name (vec_cmp_type
);
10418 = gimple_build_assign (vec_compare
, bitop2
,
10419 vec_cond_lhs
, new_temp
);
10420 vect_finish_stmt_generation (vinfo
, stmt_info
,
10426 /* If we decided to apply a loop mask to the result of the vector
10427 comparison, AND the comparison with the mask now. Later passes
10428 should then be able to reuse the AND results between mulitple
10432 for (int i = 0; i < 100; ++i)
10433 x[i] = y[i] ? z[i] : 10;
10435 results in following optimized GIMPLE:
10437 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10438 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10439 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10440 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10441 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10442 vect_iftmp.11_47, { 10, ... }>;
10444 instead of using a masked and unmasked forms of
10445 vec != { 0, ... } (masked in the MASK_LOAD,
10446 unmasked in the VEC_COND_EXPR). */
10448 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10449 in cases where that's necessary. */
10451 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
10453 if (!is_gimple_val (vec_compare
))
10455 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10456 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10458 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10459 vec_compare
= vec_compare_name
;
10462 if (must_invert_cmp_result
)
10464 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10465 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10468 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10469 vec_compare
= vec_compare_name
;
10475 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10477 tree tmp2
= make_ssa_name (vec_cmp_type
);
10479 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10481 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
10482 vec_compare
= tmp2
;
10487 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10489 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
10490 tree lhs
= gimple_get_lhs (old_stmt
);
10491 new_stmt
= gimple_build_call_internal
10492 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10494 gimple_call_set_lhs (new_stmt
, lhs
);
10495 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
10496 if (old_stmt
== gsi_stmt (*gsi
))
10497 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
10500 /* In this case we're moving the definition to later in the
10501 block. That doesn't matter because the only uses of the
10502 lhs are in phi statements. */
10503 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
10504 gsi_remove (&old_gsi
, true);
10505 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10510 new_temp
= make_ssa_name (vec_dest
);
10511 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10512 vec_then_clause
, vec_else_clause
);
10513 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10516 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10518 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10522 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10524 vec_oprnds0
.release ();
10525 vec_oprnds1
.release ();
10526 vec_oprnds2
.release ();
10527 vec_oprnds3
.release ();
10532 /* vectorizable_comparison.
10534 Check if STMT_INFO is comparison expression that can be vectorized.
10535 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10536 comparison, put it in VEC_STMT, and insert it at GSI.
10538 Return true if STMT_INFO is vectorizable in this way. */
10541 vectorizable_comparison (vec_info
*vinfo
,
10542 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10544 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10546 tree lhs
, rhs1
, rhs2
;
10547 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10548 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10549 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10551 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10552 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10554 poly_uint64 nunits
;
10556 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10558 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10559 vec
<tree
> vec_oprnds0
= vNULL
;
10560 vec
<tree
> vec_oprnds1
= vNULL
;
10564 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10567 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10570 mask_type
= vectype
;
10571 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10576 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10578 gcc_assert (ncopies
>= 1);
10579 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10582 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10586 code
= gimple_assign_rhs_code (stmt
);
10588 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10591 slp_tree slp_rhs1
, slp_rhs2
;
10592 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10593 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
10596 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10597 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
10600 if (vectype1
&& vectype2
10601 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10602 TYPE_VECTOR_SUBPARTS (vectype2
)))
10605 vectype
= vectype1
? vectype1
: vectype2
;
10607 /* Invariant comparison. */
10610 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10611 vectype
= mask_type
;
10613 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
10615 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10618 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10621 /* Can't compare mask and non-mask types. */
10622 if (vectype1
&& vectype2
10623 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10626 /* Boolean values may have another representation in vectors
10627 and therefore we prefer bit operations over comparison for
10628 them (which also works for scalar masks). We store opcodes
10629 to use in bitop1 and bitop2. Statement is vectorized as
10630 BITOP2 (rhs1 BITOP1 rhs2) or
10631 rhs1 BITOP2 (BITOP1 rhs2)
10632 depending on bitop1 and bitop2 arity. */
10633 bool swap_p
= false;
10634 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10636 if (code
== GT_EXPR
)
10638 bitop1
= BIT_NOT_EXPR
;
10639 bitop2
= BIT_AND_EXPR
;
10641 else if (code
== GE_EXPR
)
10643 bitop1
= BIT_NOT_EXPR
;
10644 bitop2
= BIT_IOR_EXPR
;
10646 else if (code
== LT_EXPR
)
10648 bitop1
= BIT_NOT_EXPR
;
10649 bitop2
= BIT_AND_EXPR
;
10652 else if (code
== LE_EXPR
)
10654 bitop1
= BIT_NOT_EXPR
;
10655 bitop2
= BIT_IOR_EXPR
;
10660 bitop1
= BIT_XOR_EXPR
;
10661 if (code
== EQ_EXPR
)
10662 bitop2
= BIT_NOT_EXPR
;
10668 if (bitop1
== NOP_EXPR
)
10670 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10675 machine_mode mode
= TYPE_MODE (vectype
);
10678 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10679 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10682 if (bitop2
!= NOP_EXPR
)
10684 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10685 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10690 /* Put types on constant and invariant SLP children. */
10692 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
10693 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
10695 if (dump_enabled_p ())
10696 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10697 "incompatible vector types for invariants\n");
10701 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10702 vect_model_simple_cost (vinfo
, stmt_info
,
10703 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10704 dts
, ndts
, slp_node
, cost_vec
);
10711 lhs
= gimple_assign_lhs (stmt
);
10712 mask
= vect_create_destination_var (lhs
, mask_type
);
10714 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10715 rhs1
, &vec_oprnds0
, vectype
,
10716 rhs2
, &vec_oprnds1
, vectype
);
10718 std::swap (vec_oprnds0
, vec_oprnds1
);
10720 /* Arguments are ready. Create the new vector stmt. */
10721 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10724 vec_rhs2
= vec_oprnds1
[i
];
10726 new_temp
= make_ssa_name (mask
);
10727 if (bitop1
== NOP_EXPR
)
10729 new_stmt
= gimple_build_assign (new_temp
, code
,
10730 vec_rhs1
, vec_rhs2
);
10731 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10735 if (bitop1
== BIT_NOT_EXPR
)
10736 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10738 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10740 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10741 if (bitop2
!= NOP_EXPR
)
10743 tree res
= make_ssa_name (mask
);
10744 if (bitop2
== BIT_NOT_EXPR
)
10745 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10747 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
10749 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10753 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10755 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10759 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10761 vec_oprnds0
.release ();
10762 vec_oprnds1
.release ();
10767 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10768 can handle all live statements in the node. Otherwise return true
10769 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10770 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10773 can_vectorize_live_stmts (vec_info
*vinfo
,
10774 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10775 slp_tree slp_node
, slp_instance slp_node_instance
,
10777 stmt_vector_for_cost
*cost_vec
)
10781 stmt_vec_info slp_stmt_info
;
10783 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
10785 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
10786 && !vectorizable_live_operation (vinfo
,
10787 slp_stmt_info
, gsi
, slp_node
,
10788 slp_node_instance
, i
,
10789 vec_stmt_p
, cost_vec
))
10793 else if (STMT_VINFO_LIVE_P (stmt_info
)
10794 && !vectorizable_live_operation (vinfo
, stmt_info
, gsi
,
10795 slp_node
, slp_node_instance
, -1,
10796 vec_stmt_p
, cost_vec
))
10802 /* Make sure the statement is vectorizable. */
10805 vect_analyze_stmt (vec_info
*vinfo
,
10806 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
10807 slp_tree node
, slp_instance node_instance
,
10808 stmt_vector_for_cost
*cost_vec
)
10810 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10811 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
10813 gimple_seq pattern_def_seq
;
10815 if (dump_enabled_p ())
10816 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
10819 if (gimple_has_volatile_ops (stmt_info
->stmt
))
10820 return opt_result::failure_at (stmt_info
->stmt
,
10822 " stmt has volatile operands: %G\n",
10825 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10827 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
10829 gimple_stmt_iterator si
;
10831 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
10833 stmt_vec_info pattern_def_stmt_info
10834 = vinfo
->lookup_stmt (gsi_stmt (si
));
10835 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
10836 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
10838 /* Analyze def stmt of STMT if it's a pattern stmt. */
10839 if (dump_enabled_p ())
10840 dump_printf_loc (MSG_NOTE
, vect_location
,
10841 "==> examining pattern def statement: %G",
10842 pattern_def_stmt_info
->stmt
);
10845 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
10846 need_to_vectorize
, node
, node_instance
,
10854 /* Skip stmts that do not need to be vectorized. In loops this is expected
10856 - the COND_EXPR which is the loop exit condition
10857 - any LABEL_EXPRs in the loop
10858 - computations that are used only for array indexing or loop control.
10859 In basic blocks we only analyze statements that are a part of some SLP
10860 instance, therefore, all the statements are relevant.
10862 Pattern statement needs to be analyzed instead of the original statement
10863 if the original statement is not relevant. Otherwise, we analyze both
10864 statements. In basic blocks we are called from some SLP instance
10865 traversal, don't analyze pattern stmts instead, the pattern stmts
10866 already will be part of SLP instance. */
10868 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
10869 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
10870 && !STMT_VINFO_LIVE_P (stmt_info
))
10872 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10873 && pattern_stmt_info
10874 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10875 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10877 /* Analyze PATTERN_STMT instead of the original stmt. */
10878 stmt_info
= pattern_stmt_info
;
10879 if (dump_enabled_p ())
10880 dump_printf_loc (MSG_NOTE
, vect_location
,
10881 "==> examining pattern statement: %G",
10886 if (dump_enabled_p ())
10887 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
10889 return opt_result::success ();
10892 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10894 && pattern_stmt_info
10895 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10896 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10898 /* Analyze PATTERN_STMT too. */
10899 if (dump_enabled_p ())
10900 dump_printf_loc (MSG_NOTE
, vect_location
,
10901 "==> examining pattern statement: %G",
10902 pattern_stmt_info
->stmt
);
10905 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
10906 node_instance
, cost_vec
);
10911 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
10913 case vect_internal_def
:
10916 case vect_reduction_def
:
10917 case vect_nested_cycle
:
10918 gcc_assert (!bb_vinfo
10919 && (relevance
== vect_used_in_outer
10920 || relevance
== vect_used_in_outer_by_reduction
10921 || relevance
== vect_used_by_reduction
10922 || relevance
== vect_unused_in_scope
10923 || relevance
== vect_used_only_live
));
10926 case vect_induction_def
:
10927 gcc_assert (!bb_vinfo
);
10930 case vect_constant_def
:
10931 case vect_external_def
:
10932 case vect_unknown_def_type
:
10934 gcc_unreachable ();
10937 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10939 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (node
);
10941 if (STMT_VINFO_RELEVANT_P (stmt_info
))
10943 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
10944 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
10945 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
10946 *need_to_vectorize
= true;
10949 if (PURE_SLP_STMT (stmt_info
) && !node
)
10951 if (dump_enabled_p ())
10952 dump_printf_loc (MSG_NOTE
, vect_location
,
10953 "handled only by SLP analysis\n");
10954 return opt_result::success ();
10959 && (STMT_VINFO_RELEVANT_P (stmt_info
)
10960 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
10961 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10962 -mveclibabi= takes preference over library functions with
10963 the simd attribute. */
10964 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10965 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
10967 || vectorizable_conversion (vinfo
, stmt_info
,
10968 NULL
, NULL
, node
, cost_vec
)
10969 || vectorizable_operation (vinfo
, stmt_info
,
10970 NULL
, NULL
, node
, cost_vec
)
10971 || vectorizable_assignment (vinfo
, stmt_info
,
10972 NULL
, NULL
, node
, cost_vec
)
10973 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10974 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10975 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
10976 node
, node_instance
, cost_vec
)
10977 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
10978 NULL
, node
, cost_vec
)
10979 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10980 || vectorizable_condition (vinfo
, stmt_info
,
10981 NULL
, NULL
, node
, cost_vec
)
10982 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
10984 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
10985 stmt_info
, NULL
, node
));
10989 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10990 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
10991 NULL
, NULL
, node
, cost_vec
)
10992 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
10994 || vectorizable_shift (vinfo
, stmt_info
,
10995 NULL
, NULL
, node
, cost_vec
)
10996 || vectorizable_operation (vinfo
, stmt_info
,
10997 NULL
, NULL
, node
, cost_vec
)
10998 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
11000 || vectorizable_load (vinfo
, stmt_info
,
11001 NULL
, NULL
, node
, cost_vec
)
11002 || vectorizable_store (vinfo
, stmt_info
,
11003 NULL
, NULL
, node
, cost_vec
)
11004 || vectorizable_condition (vinfo
, stmt_info
,
11005 NULL
, NULL
, node
, cost_vec
)
11006 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
11008 || vectorizable_phi (vinfo
, stmt_info
, NULL
, node
, cost_vec
));
11012 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11015 return opt_result::failure_at (stmt_info
->stmt
,
11017 " relevant stmt not supported: %G",
11020 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11021 need extra handling, except for vectorizable reductions. */
11023 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
11024 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
11025 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
11026 stmt_info
, NULL
, node
, node_instance
,
11028 return opt_result::failure_at (stmt_info
->stmt
,
11030 " live stmt not supported: %G",
11033 return opt_result::success ();
11037 /* Function vect_transform_stmt.
11039 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11042 vect_transform_stmt (vec_info
*vinfo
,
11043 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
11044 slp_tree slp_node
, slp_instance slp_node_instance
)
11046 bool is_store
= false;
11047 gimple
*vec_stmt
= NULL
;
11050 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
11052 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11054 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (slp_node
);
11056 switch (STMT_VINFO_TYPE (stmt_info
))
11058 case type_demotion_vec_info_type
:
11059 case type_promotion_vec_info_type
:
11060 case type_conversion_vec_info_type
:
11061 done
= vectorizable_conversion (vinfo
, stmt_info
,
11062 gsi
, &vec_stmt
, slp_node
, NULL
);
11066 case induc_vec_info_type
:
11067 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
11068 stmt_info
, &vec_stmt
, slp_node
,
11073 case shift_vec_info_type
:
11074 done
= vectorizable_shift (vinfo
, stmt_info
,
11075 gsi
, &vec_stmt
, slp_node
, NULL
);
11079 case op_vec_info_type
:
11080 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11085 case assignment_vec_info_type
:
11086 done
= vectorizable_assignment (vinfo
, stmt_info
,
11087 gsi
, &vec_stmt
, slp_node
, NULL
);
11091 case load_vec_info_type
:
11092 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
11097 case store_vec_info_type
:
11098 done
= vectorizable_store (vinfo
, stmt_info
,
11099 gsi
, &vec_stmt
, slp_node
, NULL
);
11101 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
11103 /* In case of interleaving, the whole chain is vectorized when the
11104 last store in the chain is reached. Store stmts before the last
11105 one are skipped, and there vec_stmt_info shouldn't be freed
11107 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
11108 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
11115 case condition_vec_info_type
:
11116 done
= vectorizable_condition (vinfo
, stmt_info
,
11117 gsi
, &vec_stmt
, slp_node
, NULL
);
11121 case comparison_vec_info_type
:
11122 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11127 case call_vec_info_type
:
11128 done
= vectorizable_call (vinfo
, stmt_info
,
11129 gsi
, &vec_stmt
, slp_node
, NULL
);
11132 case call_simd_clone_vec_info_type
:
11133 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
11137 case reduc_vec_info_type
:
11138 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11139 gsi
, &vec_stmt
, slp_node
);
11143 case cycle_phi_info_type
:
11144 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
11145 &vec_stmt
, slp_node
, slp_node_instance
);
11149 case lc_phi_info_type
:
11150 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
11151 stmt_info
, &vec_stmt
, slp_node
);
11155 case phi_info_type
:
11156 done
= vectorizable_phi (vinfo
, stmt_info
, &vec_stmt
, slp_node
, NULL
);
11161 if (!STMT_VINFO_LIVE_P (stmt_info
))
11163 if (dump_enabled_p ())
11164 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11165 "stmt not supported.\n");
11166 gcc_unreachable ();
11171 if (!slp_node
&& vec_stmt
)
11172 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
11174 if (STMT_VINFO_TYPE (stmt_info
) != store_vec_info_type
)
11176 /* Handle stmts whose DEF is used outside the loop-nest that is
11177 being vectorized. */
11178 done
= can_vectorize_live_stmts (vinfo
, stmt_info
, gsi
, slp_node
,
11179 slp_node_instance
, true, NULL
);
11184 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
11190 /* Remove a group of stores (for SLP or interleaving), free their
11194 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
11196 stmt_vec_info next_stmt_info
= first_stmt_info
;
11198 while (next_stmt_info
)
11200 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
11201 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
11202 /* Free the attached stmt_vec_info and remove the stmt. */
11203 vinfo
->remove_stmt (next_stmt_info
);
11204 next_stmt_info
= tmp
;
11208 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11209 elements of type SCALAR_TYPE, or null if the target doesn't support
11212 If NUNITS is zero, return a vector type that contains elements of
11213 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11215 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11216 for this vectorization region and want to "autodetect" the best choice.
11217 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11218 and we want the new type to be interoperable with it. PREVAILING_MODE
11219 in this case can be a scalar integer mode or a vector mode; when it
11220 is a vector mode, the function acts like a tree-level version of
11221 related_vector_mode. */
11224 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
11225 tree scalar_type
, poly_uint64 nunits
)
11227 tree orig_scalar_type
= scalar_type
;
11228 scalar_mode inner_mode
;
11229 machine_mode simd_mode
;
11232 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
11233 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
11236 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
11238 /* For vector types of elements whose mode precision doesn't
11239 match their types precision we use a element type of mode
11240 precision. The vectorization routines will have to make sure
11241 they support the proper result truncation/extension.
11242 We also make sure to build vector types with INTEGER_TYPE
11243 component type only. */
11244 if (INTEGRAL_TYPE_P (scalar_type
)
11245 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
11246 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
11247 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
11248 TYPE_UNSIGNED (scalar_type
));
11250 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11251 When the component mode passes the above test simply use a type
11252 corresponding to that mode. The theory is that any use that
11253 would cause problems with this will disable vectorization anyway. */
11254 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
11255 && !INTEGRAL_TYPE_P (scalar_type
))
11256 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
11258 /* We can't build a vector type of elements with alignment bigger than
11260 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
11261 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
11262 TYPE_UNSIGNED (scalar_type
));
11264 /* If we felt back to using the mode fail if there was
11265 no scalar type for it. */
11266 if (scalar_type
== NULL_TREE
)
11269 /* If no prevailing mode was supplied, use the mode the target prefers.
11270 Otherwise lookup a vector mode based on the prevailing mode. */
11271 if (prevailing_mode
== VOIDmode
)
11273 gcc_assert (known_eq (nunits
, 0U));
11274 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
11275 if (SCALAR_INT_MODE_P (simd_mode
))
11277 /* Traditional behavior is not to take the integer mode
11278 literally, but simply to use it as a way of determining
11279 the vector size. It is up to mode_for_vector to decide
11280 what the TYPE_MODE should be.
11282 Note that nunits == 1 is allowed in order to support single
11283 element vector types. */
11284 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
11285 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11289 else if (SCALAR_INT_MODE_P (prevailing_mode
)
11290 || !related_vector_mode (prevailing_mode
,
11291 inner_mode
, nunits
).exists (&simd_mode
))
11293 /* Fall back to using mode_for_vector, mostly in the hope of being
11294 able to use an integer mode. */
11295 if (known_eq (nunits
, 0U)
11296 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
11299 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
11303 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
11305 /* In cases where the mode was chosen by mode_for_vector, check that
11306 the target actually supports the chosen mode, or that it at least
11307 allows the vector mode to be replaced by a like-sized integer. */
11308 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
11309 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
11312 /* Re-attach the address-space qualifier if we canonicalized the scalar
11314 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
11315 return build_qualified_type
11316 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
11321 /* Function get_vectype_for_scalar_type.
11323 Returns the vector type corresponding to SCALAR_TYPE as supported
11324 by the target. If GROUP_SIZE is nonzero and we're performing BB
11325 vectorization, make sure that the number of elements in the vector
11326 is no bigger than GROUP_SIZE. */
11329 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11330 unsigned int group_size
)
11332 /* For BB vectorization, we should always have a group size once we've
11333 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11334 are tentative requests during things like early data reference
11335 analysis and pattern recognition. */
11336 if (is_a
<bb_vec_info
> (vinfo
))
11337 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11341 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11343 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
11344 vinfo
->vector_mode
= TYPE_MODE (vectype
);
11346 /* Register the natural choice of vector type, before the group size
11347 has been applied. */
11349 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
11351 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11352 try again with an explicit number of elements. */
11355 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
11357 /* Start with the biggest number of units that fits within
11358 GROUP_SIZE and halve it until we find a valid vector type.
11359 Usually either the first attempt will succeed or all will
11360 fail (in the latter case because GROUP_SIZE is too small
11361 for the target), but it's possible that a target could have
11362 a hole between supported vector types.
11364 If GROUP_SIZE is not a power of 2, this has the effect of
11365 trying the largest power of 2 that fits within the group,
11366 even though the group is not a multiple of that vector size.
11367 The BB vectorizer will then try to carve up the group into
11369 unsigned int nunits
= 1 << floor_log2 (group_size
);
11372 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
11373 scalar_type
, nunits
);
11376 while (nunits
> 1 && !vectype
);
11382 /* Return the vector type corresponding to SCALAR_TYPE as supported
11383 by the target. NODE, if nonnull, is the SLP tree node that will
11384 use the returned vector type. */
11387 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
11389 unsigned int group_size
= 0;
11391 group_size
= SLP_TREE_LANES (node
);
11392 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11395 /* Function get_mask_type_for_scalar_type.
11397 Returns the mask type corresponding to a result of comparison
11398 of vectors of specified SCALAR_TYPE as supported by target.
11399 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11400 make sure that the number of elements in the vector is no bigger
11401 than GROUP_SIZE. */
11404 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
11405 unsigned int group_size
)
11407 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11412 return truth_type_for (vectype
);
11415 /* Function get_same_sized_vectype
11417 Returns a vector type corresponding to SCALAR_TYPE of size
11418 VECTOR_TYPE if supported by the target. */
11421 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11423 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11424 return truth_type_for (vector_type
);
11426 poly_uint64 nunits
;
11427 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
11428 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
11431 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
11432 scalar_type
, nunits
);
11435 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11436 would not change the chosen vector modes. */
11439 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
11441 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
11442 i
!= vinfo
->used_vector_modes
.end (); ++i
)
11443 if (!VECTOR_MODE_P (*i
)
11444 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
11449 /* Function vect_is_simple_use.
11452 VINFO - the vect info of the loop or basic block that is being vectorized.
11453 OPERAND - operand in the loop or bb.
11455 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11456 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11457 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11458 the definition could be anywhere in the function
11459 DT - the type of definition
11461 Returns whether a stmt with OPERAND can be vectorized.
11462 For loops, supportable operands are constants, loop invariants, and operands
11463 that are defined by the current iteration of the loop. Unsupportable
11464 operands are those that are defined by a previous iteration of the loop (as
11465 is the case in reduction/induction computations).
11466 For basic blocks, supportable operands are constants and bb invariants.
11467 For now, operands defined outside the basic block are not supported. */
11470 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11471 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11473 if (def_stmt_info_out
)
11474 *def_stmt_info_out
= NULL
;
11476 *def_stmt_out
= NULL
;
11477 *dt
= vect_unknown_def_type
;
11479 if (dump_enabled_p ())
11481 dump_printf_loc (MSG_NOTE
, vect_location
,
11482 "vect_is_simple_use: operand ");
11483 if (TREE_CODE (operand
) == SSA_NAME
11484 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11485 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11487 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11490 if (CONSTANT_CLASS_P (operand
))
11491 *dt
= vect_constant_def
;
11492 else if (is_gimple_min_invariant (operand
))
11493 *dt
= vect_external_def
;
11494 else if (TREE_CODE (operand
) != SSA_NAME
)
11495 *dt
= vect_unknown_def_type
;
11496 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11497 *dt
= vect_external_def
;
11500 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11501 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11503 *dt
= vect_external_def
;
11506 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11507 def_stmt
= stmt_vinfo
->stmt
;
11508 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11509 if (def_stmt_info_out
)
11510 *def_stmt_info_out
= stmt_vinfo
;
11513 *def_stmt_out
= def_stmt
;
11516 if (dump_enabled_p ())
11518 dump_printf (MSG_NOTE
, ", type of def: ");
11521 case vect_uninitialized_def
:
11522 dump_printf (MSG_NOTE
, "uninitialized\n");
11524 case vect_constant_def
:
11525 dump_printf (MSG_NOTE
, "constant\n");
11527 case vect_external_def
:
11528 dump_printf (MSG_NOTE
, "external\n");
11530 case vect_internal_def
:
11531 dump_printf (MSG_NOTE
, "internal\n");
11533 case vect_induction_def
:
11534 dump_printf (MSG_NOTE
, "induction\n");
11536 case vect_reduction_def
:
11537 dump_printf (MSG_NOTE
, "reduction\n");
11539 case vect_double_reduction_def
:
11540 dump_printf (MSG_NOTE
, "double reduction\n");
11542 case vect_nested_cycle
:
11543 dump_printf (MSG_NOTE
, "nested cycle\n");
11545 case vect_unknown_def_type
:
11546 dump_printf (MSG_NOTE
, "unknown\n");
11551 if (*dt
== vect_unknown_def_type
)
11553 if (dump_enabled_p ())
11554 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11555 "Unsupported pattern.\n");
11562 /* Function vect_is_simple_use.
11564 Same as vect_is_simple_use but also determines the vector operand
11565 type of OPERAND and stores it to *VECTYPE. If the definition of
11566 OPERAND is vect_uninitialized_def, vect_constant_def or
11567 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11568 is responsible to compute the best suited vector type for the
11572 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11573 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11574 gimple
**def_stmt_out
)
11576 stmt_vec_info def_stmt_info
;
11578 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11582 *def_stmt_out
= def_stmt
;
11583 if (def_stmt_info_out
)
11584 *def_stmt_info_out
= def_stmt_info
;
11586 /* Now get a vector type if the def is internal, otherwise supply
11587 NULL_TREE and leave it up to the caller to figure out a proper
11588 type for the use stmt. */
11589 if (*dt
== vect_internal_def
11590 || *dt
== vect_induction_def
11591 || *dt
== vect_reduction_def
11592 || *dt
== vect_double_reduction_def
11593 || *dt
== vect_nested_cycle
)
11595 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11596 gcc_assert (*vectype
!= NULL_TREE
);
11597 if (dump_enabled_p ())
11598 dump_printf_loc (MSG_NOTE
, vect_location
,
11599 "vect_is_simple_use: vectype %T\n", *vectype
);
11601 else if (*dt
== vect_uninitialized_def
11602 || *dt
== vect_constant_def
11603 || *dt
== vect_external_def
)
11604 *vectype
= NULL_TREE
;
11606 gcc_unreachable ();
11611 /* Function vect_is_simple_use.
11613 Same as vect_is_simple_use but determines the operand by operand
11614 position OPERAND from either STMT or SLP_NODE, filling in *OP
11615 and *SLP_DEF (when SLP_NODE is not NULL). */
11618 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
11619 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
11620 enum vect_def_type
*dt
,
11621 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
11625 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
11627 *vectype
= SLP_TREE_VECTYPE (child
);
11628 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
11630 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
11631 return vect_is_simple_use (*op
, vinfo
, dt
, def_stmt_info_out
);
11635 if (def_stmt_info_out
)
11636 *def_stmt_info_out
= NULL
;
11637 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
11638 *dt
= SLP_TREE_DEF_TYPE (child
);
11645 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
11647 if (gimple_assign_rhs_code (ass
) == COND_EXPR
11648 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
11651 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
11653 *op
= gimple_op (ass
, operand
);
11655 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
11656 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
11658 *op
= gimple_op (ass
, operand
+ 1);
11660 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
11661 *op
= gimple_call_arg (call
, operand
);
11663 gcc_unreachable ();
11664 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
11668 /* If OP is not NULL and is external or constant update its vector
11669 type with VECTYPE. Returns true if successful or false if not,
11670 for example when conflicting vector types are present. */
11673 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
11675 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
11677 if (SLP_TREE_VECTYPE (op
))
11678 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
11679 SLP_TREE_VECTYPE (op
) = vectype
;
11683 /* Function supportable_widening_operation
11685 Check whether an operation represented by the code CODE is a
11686 widening operation that is supported by the target platform in
11687 vector form (i.e., when operating on arguments of type VECTYPE_IN
11688 producing a result of type VECTYPE_OUT).
11690 Widening operations we currently support are NOP (CONVERT), FLOAT,
11691 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11692 are supported by the target platform either directly (via vector
11693 tree-codes), or via target builtins.
11696 - CODE1 and CODE2 are codes of vector operations to be used when
11697 vectorizing the operation, if available.
11698 - MULTI_STEP_CVT determines the number of required intermediate steps in
11699 case of multi-step conversion (like char->short->int - in that case
11700 MULTI_STEP_CVT will be 1).
11701 - INTERM_TYPES contains the intermediate type required to perform the
11702 widening operation (short in the above example). */
11705 supportable_widening_operation (vec_info
*vinfo
,
11706 enum tree_code code
, stmt_vec_info stmt_info
,
11707 tree vectype_out
, tree vectype_in
,
11708 enum tree_code
*code1
, enum tree_code
*code2
,
11709 int *multi_step_cvt
,
11710 vec
<tree
> *interm_types
)
11712 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
11713 class loop
*vect_loop
= NULL
;
11714 machine_mode vec_mode
;
11715 enum insn_code icode1
, icode2
;
11716 optab optab1
, optab2
;
11717 tree vectype
= vectype_in
;
11718 tree wide_vectype
= vectype_out
;
11719 enum tree_code c1
, c2
;
11721 tree prev_type
, intermediate_type
;
11722 machine_mode intermediate_mode
, prev_mode
;
11723 optab optab3
, optab4
;
11725 *multi_step_cvt
= 0;
11727 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11731 case WIDEN_MULT_EXPR
:
11732 /* The result of a vectorized widening operation usually requires
11733 two vectors (because the widened results do not fit into one vector).
11734 The generated vector results would normally be expected to be
11735 generated in the same order as in the original scalar computation,
11736 i.e. if 8 results are generated in each vector iteration, they are
11737 to be organized as follows:
11738 vect1: [res1,res2,res3,res4],
11739 vect2: [res5,res6,res7,res8].
11741 However, in the special case that the result of the widening
11742 operation is used in a reduction computation only, the order doesn't
11743 matter (because when vectorizing a reduction we change the order of
11744 the computation). Some targets can take advantage of this and
11745 generate more efficient code. For example, targets like Altivec,
11746 that support widen_mult using a sequence of {mult_even,mult_odd}
11747 generate the following vectors:
11748 vect1: [res1,res3,res5,res7],
11749 vect2: [res2,res4,res6,res8].
11751 When vectorizing outer-loops, we execute the inner-loop sequentially
11752 (each vectorized inner-loop iteration contributes to VF outer-loop
11753 iterations in parallel). We therefore don't allow to change the
11754 order of the computation in the inner-loop during outer-loop
11756 /* TODO: Another case in which order doesn't *really* matter is when we
11757 widen and then contract again, e.g. (short)((int)x * y >> 8).
11758 Normally, pack_trunc performs an even/odd permute, whereas the
11759 repack from an even/odd expansion would be an interleave, which
11760 would be significantly simpler for e.g. AVX2. */
11761 /* In any case, in order to avoid duplicating the code below, recurse
11762 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11763 are properly set up for the caller. If we fail, we'll continue with
11764 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11766 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
11767 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
11768 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
11769 stmt_info
, vectype_out
,
11770 vectype_in
, code1
, code2
,
11771 multi_step_cvt
, interm_types
))
11773 /* Elements in a vector with vect_used_by_reduction property cannot
11774 be reordered if the use chain with this property does not have the
11775 same operation. One such an example is s += a * b, where elements
11776 in a and b cannot be reordered. Here we check if the vector defined
11777 by STMT is only directly used in the reduction statement. */
11778 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
11779 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
11781 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
11784 c1
= VEC_WIDEN_MULT_LO_EXPR
;
11785 c2
= VEC_WIDEN_MULT_HI_EXPR
;
11788 case DOT_PROD_EXPR
:
11789 c1
= DOT_PROD_EXPR
;
11790 c2
= DOT_PROD_EXPR
;
11798 case VEC_WIDEN_MULT_EVEN_EXPR
:
11799 /* Support the recursion induced just above. */
11800 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
11801 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
11804 case WIDEN_LSHIFT_EXPR
:
11805 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
11806 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
11809 case WIDEN_PLUS_EXPR
:
11810 c1
= VEC_WIDEN_PLUS_LO_EXPR
;
11811 c2
= VEC_WIDEN_PLUS_HI_EXPR
;
11814 case WIDEN_MINUS_EXPR
:
11815 c1
= VEC_WIDEN_MINUS_LO_EXPR
;
11816 c2
= VEC_WIDEN_MINUS_HI_EXPR
;
11820 c1
= VEC_UNPACK_LO_EXPR
;
11821 c2
= VEC_UNPACK_HI_EXPR
;
11825 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
11826 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
11829 case FIX_TRUNC_EXPR
:
11830 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
11831 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
11835 gcc_unreachable ();
11838 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
11839 std::swap (c1
, c2
);
11841 if (code
== FIX_TRUNC_EXPR
)
11843 /* The signedness is determined from output operand. */
11844 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11845 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
11847 else if (CONVERT_EXPR_CODE_P (code
)
11848 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
11849 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11850 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
11851 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11853 /* If the input and result modes are the same, a different optab
11854 is needed where we pass in the number of units in vectype. */
11855 optab1
= vec_unpacks_sbool_lo_optab
;
11856 optab2
= vec_unpacks_sbool_hi_optab
;
11860 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11861 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
11864 if (!optab1
|| !optab2
)
11867 vec_mode
= TYPE_MODE (vectype
);
11868 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
11869 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
11875 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11876 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11878 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11880 /* For scalar masks we may have different boolean
11881 vector types having the same QImode. Thus we
11882 add additional check for elements number. */
11883 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
11884 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11888 /* Check if it's a multi-step conversion that can be done using intermediate
11891 prev_type
= vectype
;
11892 prev_mode
= vec_mode
;
11894 if (!CONVERT_EXPR_CODE_P (code
))
11897 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11898 intermediate steps in promotion sequence. We try
11899 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11901 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11902 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11904 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11905 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11907 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
11910 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
11911 TYPE_UNSIGNED (prev_type
));
11913 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11914 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11915 && intermediate_mode
== prev_mode
11916 && SCALAR_INT_MODE_P (prev_mode
))
11918 /* If the input and result modes are the same, a different optab
11919 is needed where we pass in the number of units in vectype. */
11920 optab3
= vec_unpacks_sbool_lo_optab
;
11921 optab4
= vec_unpacks_sbool_hi_optab
;
11925 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11926 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
11929 if (!optab3
|| !optab4
11930 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
11931 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
11932 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
11933 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
11934 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
11935 == CODE_FOR_nothing
)
11936 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
11937 == CODE_FOR_nothing
))
11940 interm_types
->quick_push (intermediate_type
);
11941 (*multi_step_cvt
)++;
11943 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11944 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11946 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11948 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
11949 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11953 prev_type
= intermediate_type
;
11954 prev_mode
= intermediate_mode
;
11957 interm_types
->release ();
11962 /* Function supportable_narrowing_operation
11964 Check whether an operation represented by the code CODE is a
11965 narrowing operation that is supported by the target platform in
11966 vector form (i.e., when operating on arguments of type VECTYPE_IN
11967 and producing a result of type VECTYPE_OUT).
11969 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11970 and FLOAT. This function checks if these operations are supported by
11971 the target platform directly via vector tree-codes.
11974 - CODE1 is the code of a vector operation to be used when
11975 vectorizing the operation, if available.
11976 - MULTI_STEP_CVT determines the number of required intermediate steps in
11977 case of multi-step conversion (like int->short->char - in that case
11978 MULTI_STEP_CVT will be 1).
11979 - INTERM_TYPES contains the intermediate type required to perform the
11980 narrowing operation (short in the above example). */
11983 supportable_narrowing_operation (enum tree_code code
,
11984 tree vectype_out
, tree vectype_in
,
11985 enum tree_code
*code1
, int *multi_step_cvt
,
11986 vec
<tree
> *interm_types
)
11988 machine_mode vec_mode
;
11989 enum insn_code icode1
;
11990 optab optab1
, interm_optab
;
11991 tree vectype
= vectype_in
;
11992 tree narrow_vectype
= vectype_out
;
11994 tree intermediate_type
, prev_type
;
11995 machine_mode intermediate_mode
, prev_mode
;
11999 *multi_step_cvt
= 0;
12003 c1
= VEC_PACK_TRUNC_EXPR
;
12004 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
12005 && VECTOR_BOOLEAN_TYPE_P (vectype
)
12006 && TYPE_MODE (narrow_vectype
) == TYPE_MODE (vectype
)
12007 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
12008 optab1
= vec_pack_sbool_trunc_optab
;
12010 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12013 case FIX_TRUNC_EXPR
:
12014 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
12015 /* The signedness is determined from output operand. */
12016 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
12020 c1
= VEC_PACK_FLOAT_EXPR
;
12021 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
12025 gcc_unreachable ();
12031 vec_mode
= TYPE_MODE (vectype
);
12032 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
12037 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12039 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12041 /* For scalar masks we may have different boolean
12042 vector types having the same QImode. Thus we
12043 add additional check for elements number. */
12044 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
12045 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12049 if (code
== FLOAT_EXPR
)
12052 /* Check if it's a multi-step conversion that can be done using intermediate
12054 prev_mode
= vec_mode
;
12055 prev_type
= vectype
;
12056 if (code
== FIX_TRUNC_EXPR
)
12057 uns
= TYPE_UNSIGNED (vectype_out
);
12059 uns
= TYPE_UNSIGNED (vectype
);
12061 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12062 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12063 costly than signed. */
12064 if (code
== FIX_TRUNC_EXPR
&& uns
)
12066 enum insn_code icode2
;
12069 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
12071 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
12072 if (interm_optab
!= unknown_optab
12073 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
12074 && insn_data
[icode1
].operand
[0].mode
12075 == insn_data
[icode2
].operand
[0].mode
)
12078 optab1
= interm_optab
;
12083 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12084 intermediate steps in promotion sequence. We try
12085 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12086 interm_types
->create (MAX_INTERM_CVT_STEPS
);
12087 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
12089 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
12090 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
12092 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
12095 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
12096 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
12097 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
12098 && intermediate_mode
== prev_mode
12099 && SCALAR_INT_MODE_P (prev_mode
))
12100 interm_optab
= vec_pack_sbool_trunc_optab
;
12103 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
12106 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
12107 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
12108 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
12109 == CODE_FOR_nothing
))
12112 interm_types
->quick_push (intermediate_type
);
12113 (*multi_step_cvt
)++;
12115 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
12117 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12119 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
12120 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
12124 prev_mode
= intermediate_mode
;
12125 prev_type
= intermediate_type
;
12126 optab1
= interm_optab
;
12129 interm_types
->release ();
12133 /* Generate and return a vector mask of MASK_TYPE such that
12134 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12135 Add the statements to SEQ. */
12138 vect_gen_while (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12139 tree end_index
, const char *name
)
12141 tree cmp_type
= TREE_TYPE (start_index
);
12142 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
12143 cmp_type
, mask_type
,
12144 OPTIMIZE_FOR_SPEED
));
12145 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
12146 start_index
, end_index
,
12147 build_zero_cst (mask_type
));
12150 tmp
= make_temp_ssa_name (mask_type
, NULL
, name
);
12152 tmp
= make_ssa_name (mask_type
);
12153 gimple_call_set_lhs (call
, tmp
);
12154 gimple_seq_add_stmt (seq
, call
);
12158 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12159 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12162 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
12165 tree tmp
= vect_gen_while (seq
, mask_type
, start_index
, end_index
);
12166 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
12169 /* Try to compute the vector types required to vectorize STMT_INFO,
12170 returning true on success and false if vectorization isn't possible.
12171 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12172 take sure that the number of elements in the vectors is no bigger
12177 - Set *STMT_VECTYPE_OUT to:
12178 - NULL_TREE if the statement doesn't need to be vectorized;
12179 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12181 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12182 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12183 statement does not help to determine the overall number of units. */
12186 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12187 tree
*stmt_vectype_out
,
12188 tree
*nunits_vectype_out
,
12189 unsigned int group_size
)
12191 gimple
*stmt
= stmt_info
->stmt
;
12193 /* For BB vectorization, we should always have a group size once we've
12194 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12195 are tentative requests during things like early data reference
12196 analysis and pattern recognition. */
12197 if (is_a
<bb_vec_info
> (vinfo
))
12198 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
12202 *stmt_vectype_out
= NULL_TREE
;
12203 *nunits_vectype_out
= NULL_TREE
;
12205 if (gimple_get_lhs (stmt
) == NULL_TREE
12206 /* MASK_STORE has no lhs, but is ok. */
12207 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12209 if (is_a
<gcall
*> (stmt
))
12211 /* Ignore calls with no lhs. These must be calls to
12212 #pragma omp simd functions, and what vectorization factor
12213 it really needs can't be determined until
12214 vectorizable_simd_clone_call. */
12215 if (dump_enabled_p ())
12216 dump_printf_loc (MSG_NOTE
, vect_location
,
12217 "defer to SIMD clone analysis.\n");
12218 return opt_result::success ();
12221 return opt_result::failure_at (stmt
,
12222 "not vectorized: irregular stmt.%G", stmt
);
12226 tree scalar_type
= NULL_TREE
;
12227 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
12229 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12230 if (dump_enabled_p ())
12231 dump_printf_loc (MSG_NOTE
, vect_location
,
12232 "precomputed vectype: %T\n", vectype
);
12234 else if (vect_use_mask_type_p (stmt_info
))
12236 unsigned int precision
= stmt_info
->mask_precision
;
12237 scalar_type
= build_nonstandard_integer_type (precision
, 1);
12238 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
12240 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
12241 " data-type %T\n", scalar_type
);
12242 if (dump_enabled_p ())
12243 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12247 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
12248 scalar_type
= TREE_TYPE (DR_REF (dr
));
12249 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
12250 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
12252 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
12254 if (dump_enabled_p ())
12257 dump_printf_loc (MSG_NOTE
, vect_location
,
12258 "get vectype for scalar type (group size %d):"
12259 " %T\n", group_size
, scalar_type
);
12261 dump_printf_loc (MSG_NOTE
, vect_location
,
12262 "get vectype for scalar type: %T\n", scalar_type
);
12264 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
12266 return opt_result::failure_at (stmt
,
12268 " unsupported data-type %T\n",
12271 if (dump_enabled_p ())
12272 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
12275 if (scalar_type
&& VECTOR_MODE_P (TYPE_MODE (scalar_type
)))
12276 return opt_result::failure_at (stmt
,
12277 "not vectorized: vector stmt in loop:%G",
12280 *stmt_vectype_out
= vectype
;
12282 /* Don't try to compute scalar types if the stmt produces a boolean
12283 vector; use the existing vector type instead. */
12284 tree nunits_vectype
= vectype
;
12285 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
12287 /* The number of units is set according to the smallest scalar
12288 type (or the largest vector size, but we only support one
12289 vector size per vectorization). */
12290 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
12291 TREE_TYPE (vectype
));
12292 if (scalar_type
!= TREE_TYPE (vectype
))
12294 if (dump_enabled_p ())
12295 dump_printf_loc (MSG_NOTE
, vect_location
,
12296 "get vectype for smallest scalar type: %T\n",
12298 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12300 if (!nunits_vectype
)
12301 return opt_result::failure_at
12302 (stmt
, "not vectorized: unsupported data-type %T\n",
12304 if (dump_enabled_p ())
12305 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
12310 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
12311 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)))
12312 return opt_result::failure_at (stmt
,
12313 "Not vectorized: Incompatible number "
12314 "of vector subparts between %T and %T\n",
12315 nunits_vectype
, *stmt_vectype_out
);
12317 if (dump_enabled_p ())
12319 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
12320 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
12321 dump_printf (MSG_NOTE
, "\n");
12324 *nunits_vectype_out
= nunits_vectype
;
12325 return opt_result::success ();
12328 /* Generate and return statement sequence that sets vector length LEN that is:
12330 min_of_start_and_end = min (START_INDEX, END_INDEX);
12331 left_len = END_INDEX - min_of_start_and_end;
12332 rhs = min (left_len, LEN_LIMIT);
12335 Note: the cost of the code generated by this function is modeled
12336 by vect_estimate_min_profitable_iters, so changes here may need
12337 corresponding changes there. */
12340 vect_gen_len (tree len
, tree start_index
, tree end_index
, tree len_limit
)
12342 gimple_seq stmts
= NULL
;
12343 tree len_type
= TREE_TYPE (len
);
12344 gcc_assert (TREE_TYPE (start_index
) == len_type
);
12346 tree min
= gimple_build (&stmts
, MIN_EXPR
, len_type
, start_index
, end_index
);
12347 tree left_len
= gimple_build (&stmts
, MINUS_EXPR
, len_type
, end_index
, min
);
12348 tree rhs
= gimple_build (&stmts
, MIN_EXPR
, len_type
, left_len
, len_limit
);
12349 gimple
* stmt
= gimple_build_assign (len
, rhs
);
12350 gimple_seq_add_stmt (&stmts
, stmt
);