1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
54 /* For lang_hooks.types.type_for_mode. */
55 #include "langhooks.h"
57 /* Says whether a statement is a load, a store of a vectorized statement
58 result, or a store of an invariant value. */
59 enum vec_load_store_type
{
65 /* Return the vectorized type for the given statement. */
68 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
70 return STMT_VINFO_VECTYPE (stmt_info
);
73 /* Return TRUE iff the given statement is in an inner loop relative to
74 the loop being vectorized. */
76 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
78 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
79 basic_block bb
= gimple_bb (stmt
);
80 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
86 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
88 return (bb
->loop_father
== loop
->inner
);
91 /* Record the cost of a statement, either by directly informing the
92 target model or by saving it in a vector for later processing.
93 Return a preliminary estimate of the statement's cost. */
96 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
97 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
98 int misalign
, enum vect_cost_model_location where
)
100 if ((kind
== vector_load
|| kind
== unaligned_load
)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
102 kind
= vector_gather_load
;
103 if ((kind
== vector_store
|| kind
== unaligned_store
)
104 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
105 kind
= vector_scatter_store
;
108 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
109 stmt_info_for_cost si
= { count
, kind
,
110 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
112 body_cost_vec
->safe_push (si
);
114 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
117 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
118 count
, kind
, stmt_info
, misalign
, where
);
121 /* Return a variable of type ELEM_TYPE[NELEMS]. */
124 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
126 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
130 /* ARRAY is an array of vectors created by create_vector_array.
131 Return an SSA_NAME for the vector in index N. The reference
132 is part of the vectorization of STMT and the vector is associated
133 with scalar destination SCALAR_DEST. */
136 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
137 tree array
, unsigned HOST_WIDE_INT n
)
139 tree vect_type
, vect
, vect_name
, array_ref
;
142 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
143 vect_type
= TREE_TYPE (TREE_TYPE (array
));
144 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
145 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
146 build_int_cst (size_type_node
, n
),
147 NULL_TREE
, NULL_TREE
);
149 new_stmt
= gimple_build_assign (vect
, array_ref
);
150 vect_name
= make_ssa_name (vect
, new_stmt
);
151 gimple_assign_set_lhs (new_stmt
, vect_name
);
152 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
157 /* ARRAY is an array of vectors created by create_vector_array.
158 Emit code to store SSA_NAME VECT in index N of the array.
159 The store is part of the vectorization of STMT. */
162 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
163 tree array
, unsigned HOST_WIDE_INT n
)
168 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
169 build_int_cst (size_type_node
, n
),
170 NULL_TREE
, NULL_TREE
);
172 new_stmt
= gimple_build_assign (array_ref
, vect
);
173 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
176 /* PTR is a pointer to an array of type TYPE. Return a representation
177 of *PTR. The memory reference replaces those in FIRST_DR
181 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
185 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
186 /* Arrays have the same alignment as their type. */
187 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
191 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193 /* Function vect_mark_relevant.
195 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
198 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
199 enum vect_relevant relevant
, bool live_p
)
201 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
202 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
203 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
204 gimple
*pattern_stmt
;
206 if (dump_enabled_p ())
208 dump_printf_loc (MSG_NOTE
, vect_location
,
209 "mark relevant %d, live %d: ", relevant
, live_p
);
210 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
213 /* If this stmt is an original stmt in a pattern, we might need to mark its
214 related pattern stmt instead of the original stmt. However, such stmts
215 may have their own uses that are not in any pattern, in such cases the
216 stmt itself should be marked. */
217 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
219 /* This is the last stmt in a sequence that was detected as a
220 pattern that can potentially be vectorized. Don't mark the stmt
221 as relevant/live because it's not going to be vectorized.
222 Instead mark the pattern-stmt that replaces it. */
224 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
226 if (dump_enabled_p ())
227 dump_printf_loc (MSG_NOTE
, vect_location
,
228 "last stmt in pattern. don't mark"
229 " relevant/live.\n");
230 stmt_info
= vinfo_for_stmt (pattern_stmt
);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
232 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
233 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
237 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
238 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
239 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
241 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
242 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE
, vect_location
,
246 "already marked relevant/live.\n");
250 worklist
->safe_push (stmt
);
254 /* Function is_simple_and_all_uses_invariant
256 Return true if STMT is simple and all uses of it are invariant. */
259 is_simple_and_all_uses_invariant (gimple
*stmt
, loop_vec_info loop_vinfo
)
265 if (!is_gimple_assign (stmt
))
268 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
270 enum vect_def_type dt
= vect_uninitialized_def
;
272 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
276 "use not simple.\n");
280 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
299 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
300 enum vect_relevant
*relevant
, bool *live_p
)
302 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
304 imm_use_iterator imm_iter
;
308 *relevant
= vect_unused_in_scope
;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt
)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
314 != loop_exit_ctrl_vec_info_type
)
315 *relevant
= vect_used_in_scope
;
317 /* changing memory. */
318 if (gimple_code (stmt
) != GIMPLE_PHI
)
319 if (gimple_vdef (stmt
)
320 && !gimple_clobber_p (stmt
))
322 if (dump_enabled_p ())
323 dump_printf_loc (MSG_NOTE
, vect_location
,
324 "vec_stmt_relevant_p: stmt has vdefs.\n");
325 *relevant
= vect_used_in_scope
;
328 /* uses outside the loop. */
329 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
331 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
333 basic_block bb
= gimple_bb (USE_STMT (use_p
));
334 if (!flow_bb_inside_loop_p (loop
, bb
))
336 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE
, vect_location
,
338 "vec_stmt_relevant_p: used out of loop.\n");
340 if (is_gimple_debug (USE_STMT (use_p
)))
343 /* We expect all such uses to be in the loop exit phis
344 (because of loop closed form) */
345 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
346 gcc_assert (bb
== single_exit (loop
)->dest
);
353 if (*live_p
&& *relevant
== vect_unused_in_scope
354 && !is_simple_and_all_uses_invariant (stmt
, loop_vinfo
))
356 if (dump_enabled_p ())
357 dump_printf_loc (MSG_NOTE
, vect_location
,
358 "vec_stmt_relevant_p: stmt live but not relevant.\n");
359 *relevant
= vect_used_only_live
;
362 return (*live_p
|| *relevant
);
366 /* Function exist_non_indexing_operands_for_use_p
368 USE is one of the uses attached to STMT. Check if USE is
369 used in STMT for anything other than indexing an array. */
372 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
375 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
377 /* USE corresponds to some operand in STMT. If there is no data
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info
))
383 /* STMT has a data_ref. FORNOW this means that its of one of
387 (This should have been verified in analyze_data_refs).
389 'var' in the second case corresponds to a def, not a use,
390 so USE cannot correspond to any operands that are not used
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
396 if (!gimple_assign_copy_p (stmt
))
398 if (is_gimple_call (stmt
)
399 && gimple_call_internal_p (stmt
))
400 switch (gimple_call_internal_fn (stmt
))
403 operand
= gimple_call_arg (stmt
, 3);
408 operand
= gimple_call_arg (stmt
, 2);
418 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
420 operand
= gimple_assign_rhs1 (stmt
);
421 if (TREE_CODE (operand
) != SSA_NAME
)
432 Function process_use.
435 - a USE in STMT in a loop represented by LOOP_VINFO
436 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
437 that defined USE. This is done by calling mark_relevant and passing it
438 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
439 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
443 Generally, LIVE_P and RELEVANT are used to define the liveness and
444 relevance info of the DEF_STMT of this USE:
445 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
446 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
448 - case 1: If USE is used only for address computations (e.g. array indexing),
449 which does not need to be directly vectorized, then the liveness/relevance
450 of the respective DEF_STMT is left unchanged.
451 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
452 skip DEF_STMT cause it had already been processed.
453 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
454 be modified accordingly.
456 Return true if everything is as expected. Return false otherwise. */
459 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
,
460 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
463 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
464 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
465 stmt_vec_info dstmt_vinfo
;
466 basic_block bb
, def_bb
;
468 enum vect_def_type dt
;
470 /* case 1: we are only interested in uses that need to be vectorized. Uses
471 that are used for address computation are not considered relevant. */
472 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
475 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
477 if (dump_enabled_p ())
478 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
479 "not vectorized: unsupported use in stmt.\n");
483 if (!def_stmt
|| gimple_nop_p (def_stmt
))
486 def_bb
= gimple_bb (def_stmt
);
487 if (!flow_bb_inside_loop_p (loop
, def_bb
))
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
500 bb
= gimple_bb (stmt
);
501 if (gimple_code (stmt
) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
503 && gimple_code (def_stmt
) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
505 && bb
->loop_father
== def_bb
->loop_father
)
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_NOTE
, vect_location
,
509 "reduc-stmt defining reduc-phi in the same nest.\n");
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
511 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
514 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
525 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
527 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE
, vect_location
,
529 "outer-loop def-stmt defining inner-loop stmt.\n");
533 case vect_unused_in_scope
:
534 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
535 vect_used_in_scope
: vect_unused_in_scope
;
538 case vect_used_in_outer_by_reduction
:
539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
540 relevant
= vect_used_by_reduction
;
543 case vect_used_in_outer
:
544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
545 relevant
= vect_used_in_scope
;
548 case vect_used_in_scope
:
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
563 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
565 if (dump_enabled_p ())
566 dump_printf_loc (MSG_NOTE
, vect_location
,
567 "inner-loop def-stmt defining outer-loop stmt.\n");
571 case vect_unused_in_scope
:
572 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
573 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
574 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
577 case vect_used_by_reduction
:
578 case vect_used_only_live
:
579 relevant
= vect_used_in_outer_by_reduction
;
582 case vect_used_in_scope
:
583 relevant
= vect_used_in_outer
;
590 /* We are also not interested in uses on loop PHI backedges that are
591 inductions. Otherwise we'll needlessly vectorize the IV increment
592 and cause hybrid SLP for SLP inductions. Unless the PHI is live
594 else if (gimple_code (stmt
) == GIMPLE_PHI
595 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
596 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
597 && (PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (bb
->loop_father
))
600 if (dump_enabled_p ())
601 dump_printf_loc (MSG_NOTE
, vect_location
,
602 "induction value on backedge.\n");
607 vect_mark_relevant (worklist
, def_stmt
, relevant
, false);
612 /* Function vect_mark_stmts_to_be_vectorized.
614 Not all stmts in the loop need to be vectorized. For example:
623 Stmt 1 and 3 do not need to be vectorized, because loop control and
624 addressing of vectorized data-refs are handled differently.
626 This pass detects such stmts. */
629 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
631 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
632 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
633 unsigned int nbbs
= loop
->num_nodes
;
634 gimple_stmt_iterator si
;
637 stmt_vec_info stmt_vinfo
;
641 enum vect_relevant relevant
;
643 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE
, vect_location
,
645 "=== vect_mark_stmts_to_be_vectorized ===\n");
647 auto_vec
<gimple
*, 64> worklist
;
649 /* 1. Init worklist. */
650 for (i
= 0; i
< nbbs
; i
++)
653 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
656 if (dump_enabled_p ())
658 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
659 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
662 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
663 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
665 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
667 stmt
= gsi_stmt (si
);
668 if (dump_enabled_p ())
670 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
671 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
674 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
675 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
679 /* 2. Process_worklist */
680 while (worklist
.length () > 0)
685 stmt
= worklist
.pop ();
686 if (dump_enabled_p ())
688 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
689 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
692 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
693 (DEF_STMT) as relevant/irrelevant according to the relevance property
695 stmt_vinfo
= vinfo_for_stmt (stmt
);
696 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
698 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
699 propagated as is to the DEF_STMTs of its USEs.
701 One exception is when STMT has been identified as defining a reduction
702 variable; in this case we set the relevance to vect_used_by_reduction.
703 This is because we distinguish between two kinds of relevant stmts -
704 those that are used by a reduction computation, and those that are
705 (also) used by a regular computation. This allows us later on to
706 identify stmts that are used solely by a reduction, and therefore the
707 order of the results that they produce does not have to be kept. */
709 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
711 case vect_reduction_def
:
712 gcc_assert (relevant
!= vect_unused_in_scope
);
713 if (relevant
!= vect_unused_in_scope
714 && relevant
!= vect_used_in_scope
715 && relevant
!= vect_used_by_reduction
716 && relevant
!= vect_used_only_live
)
718 if (dump_enabled_p ())
719 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
720 "unsupported use of reduction.\n");
725 case vect_nested_cycle
:
726 if (relevant
!= vect_unused_in_scope
727 && relevant
!= vect_used_in_outer_by_reduction
728 && relevant
!= vect_used_in_outer
)
730 if (dump_enabled_p ())
731 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
732 "unsupported use of nested cycle.\n");
738 case vect_double_reduction_def
:
739 if (relevant
!= vect_unused_in_scope
740 && relevant
!= vect_used_by_reduction
741 && relevant
!= vect_used_only_live
)
743 if (dump_enabled_p ())
744 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
745 "unsupported use of double reduction.\n");
755 if (is_pattern_stmt_p (stmt_vinfo
))
757 /* Pattern statements are not inserted into the code, so
758 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
759 have to scan the RHS or function arguments instead. */
760 if (is_gimple_assign (stmt
))
762 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
763 tree op
= gimple_assign_rhs1 (stmt
);
766 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
768 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
769 relevant
, &worklist
, false)
770 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
771 relevant
, &worklist
, false))
775 for (; i
< gimple_num_ops (stmt
); i
++)
777 op
= gimple_op (stmt
, i
);
778 if (TREE_CODE (op
) == SSA_NAME
779 && !process_use (stmt
, op
, loop_vinfo
, relevant
,
784 else if (is_gimple_call (stmt
))
786 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
788 tree arg
= gimple_call_arg (stmt
, i
);
789 if (!process_use (stmt
, arg
, loop_vinfo
, relevant
,
796 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
798 tree op
= USE_FROM_PTR (use_p
);
799 if (!process_use (stmt
, op
, loop_vinfo
, relevant
,
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
806 gather_scatter_info gs_info
;
807 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
))
809 if (!process_use (stmt
, gs_info
.offset
, loop_vinfo
, relevant
,
813 } /* while worklist */
819 /* Function vect_model_simple_cost.
821 Models cost for simple operations, i.e. those that only emit ncopies of a
822 single op. Right now, this does not account for multiple insns that could
823 be generated for the single vector op. We will handle that shortly. */
826 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
827 enum vect_def_type
*dt
,
829 stmt_vector_for_cost
*prologue_cost_vec
,
830 stmt_vector_for_cost
*body_cost_vec
)
833 int inside_cost
= 0, prologue_cost
= 0;
835 /* The SLP costs were already calculated during SLP tree build. */
836 if (PURE_SLP_STMT (stmt_info
))
839 /* Cost the "broadcast" of a scalar operand in to a vector operand.
840 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
842 for (i
= 0; i
< ndts
; i
++)
843 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
844 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
845 stmt_info
, 0, vect_prologue
);
847 /* Pass the inside-of-loop statements to the target-specific cost model. */
848 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
849 stmt_info
, 0, vect_body
);
851 if (dump_enabled_p ())
852 dump_printf_loc (MSG_NOTE
, vect_location
,
853 "vect_model_simple_cost: inside_cost = %d, "
854 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
858 /* Model cost for type demotion and promotion operations. PWR is normally
859 zero for single-step promotions and demotions. It will be one if
860 two-step promotion/demotion is required, and so on. Each additional
861 step doubles the number of instructions required. */
864 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
865 enum vect_def_type
*dt
, int pwr
)
868 int inside_cost
= 0, prologue_cost
= 0;
869 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
870 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
871 void *target_cost_data
;
873 /* The SLP costs were already calculated during SLP tree build. */
874 if (PURE_SLP_STMT (stmt_info
))
878 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
880 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
882 for (i
= 0; i
< pwr
+ 1; i
++)
884 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
886 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
887 vec_promote_demote
, stmt_info
, 0,
891 /* FORNOW: Assuming maximum 2 args per stmts. */
892 for (i
= 0; i
< 2; i
++)
893 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
894 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
895 stmt_info
, 0, vect_prologue
);
897 if (dump_enabled_p ())
898 dump_printf_loc (MSG_NOTE
, vect_location
,
899 "vect_model_promotion_demotion_cost: inside_cost = %d, "
900 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
903 /* Function vect_model_store_cost
905 Models cost for stores. In the case of grouped accesses, one access
906 has the overhead of the grouped access attributed to it. */
909 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
910 vect_memory_access_type memory_access_type
,
911 enum vect_def_type dt
, slp_tree slp_node
,
912 stmt_vector_for_cost
*prologue_cost_vec
,
913 stmt_vector_for_cost
*body_cost_vec
)
915 unsigned int inside_cost
= 0, prologue_cost
= 0;
916 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
917 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
918 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
920 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
921 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
922 stmt_info
, 0, vect_prologue
);
924 /* Grouped stores update all elements in the group at once,
925 so we want the DR for the first statement. */
926 if (!slp_node
&& grouped_access_p
)
928 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
929 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
932 /* True if we should include any once-per-group costs as well as
933 the cost of the statement itself. For SLP we only get called
934 once per group anyhow. */
935 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
937 /* We assume that the cost of a single store-lanes instruction is
938 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
939 access is instead being provided by a permute-and-store operation,
940 include the cost of the permutes. */
942 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
944 /* Uses a high and low interleave or shuffle operations for each
946 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
947 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
948 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
949 stmt_info
, 0, vect_body
);
951 if (dump_enabled_p ())
952 dump_printf_loc (MSG_NOTE
, vect_location
,
953 "vect_model_store_cost: strided group_size = %d .\n",
957 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
958 /* Costs of the stores. */
959 if (memory_access_type
== VMAT_ELEMENTWISE
960 || memory_access_type
== VMAT_GATHER_SCATTER
)
961 /* N scalar stores plus extracting the elements. */
962 inside_cost
+= record_stmt_cost (body_cost_vec
,
963 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
964 scalar_store
, stmt_info
, 0, vect_body
);
966 vect_get_store_cost (dr
, ncopies
, &inside_cost
, body_cost_vec
);
968 if (memory_access_type
== VMAT_ELEMENTWISE
969 || memory_access_type
== VMAT_STRIDED_SLP
)
970 inside_cost
+= record_stmt_cost (body_cost_vec
,
971 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
972 vec_to_scalar
, stmt_info
, 0, vect_body
);
974 if (dump_enabled_p ())
975 dump_printf_loc (MSG_NOTE
, vect_location
,
976 "vect_model_store_cost: inside_cost = %d, "
977 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
981 /* Calculate cost of DR's memory access. */
983 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
984 unsigned int *inside_cost
,
985 stmt_vector_for_cost
*body_cost_vec
)
987 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
988 gimple
*stmt
= DR_STMT (dr
);
989 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
991 switch (alignment_support_scheme
)
995 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
996 vector_store
, stmt_info
, 0,
999 if (dump_enabled_p ())
1000 dump_printf_loc (MSG_NOTE
, vect_location
,
1001 "vect_model_store_cost: aligned.\n");
1005 case dr_unaligned_supported
:
1007 /* Here, we assign an additional cost for the unaligned store. */
1008 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1009 unaligned_store
, stmt_info
,
1010 DR_MISALIGNMENT (dr
), vect_body
);
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE
, vect_location
,
1013 "vect_model_store_cost: unaligned supported by "
1018 case dr_unaligned_unsupported
:
1020 *inside_cost
= VECT_MAX_COST
;
1022 if (dump_enabled_p ())
1023 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1024 "vect_model_store_cost: unsupported access.\n");
1034 /* Function vect_model_load_cost
1036 Models cost for loads. In the case of grouped accesses, one access has
1037 the overhead of the grouped access attributed to it. Since unaligned
1038 accesses are supported for loads, we also account for the costs of the
1039 access scheme chosen. */
1042 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1043 vect_memory_access_type memory_access_type
,
1045 stmt_vector_for_cost
*prologue_cost_vec
,
1046 stmt_vector_for_cost
*body_cost_vec
)
1048 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
1049 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1050 unsigned int inside_cost
= 0, prologue_cost
= 0;
1051 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1053 /* Grouped loads read all elements in the group at once,
1054 so we want the DR for the first statement. */
1055 if (!slp_node
&& grouped_access_p
)
1057 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1058 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1061 /* True if we should include any once-per-group costs as well as
1062 the cost of the statement itself. For SLP we only get called
1063 once per group anyhow. */
1064 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
1066 /* We assume that the cost of a single load-lanes instruction is
1067 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1068 access is instead being provided by a load-and-permute operation,
1069 include the cost of the permutes. */
1071 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1073 /* Uses an even and odd extract operations or shuffle operations
1074 for each needed permute. */
1075 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1076 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1077 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1078 stmt_info
, 0, vect_body
);
1080 if (dump_enabled_p ())
1081 dump_printf_loc (MSG_NOTE
, vect_location
,
1082 "vect_model_load_cost: strided group_size = %d .\n",
1086 /* The loads themselves. */
1087 if (memory_access_type
== VMAT_ELEMENTWISE
1088 || memory_access_type
== VMAT_GATHER_SCATTER
)
1090 /* N scalar loads plus gathering them into a vector. */
1091 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1092 inside_cost
+= record_stmt_cost (body_cost_vec
,
1093 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1094 scalar_load
, stmt_info
, 0, vect_body
);
1097 vect_get_load_cost (dr
, ncopies
, first_stmt_p
,
1098 &inside_cost
, &prologue_cost
,
1099 prologue_cost_vec
, body_cost_vec
, true);
1100 if (memory_access_type
== VMAT_ELEMENTWISE
1101 || memory_access_type
== VMAT_STRIDED_SLP
)
1102 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1103 stmt_info
, 0, vect_body
);
1105 if (dump_enabled_p ())
1106 dump_printf_loc (MSG_NOTE
, vect_location
,
1107 "vect_model_load_cost: inside_cost = %d, "
1108 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1112 /* Calculate cost of DR's memory access. */
1114 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1115 bool add_realign_cost
, unsigned int *inside_cost
,
1116 unsigned int *prologue_cost
,
1117 stmt_vector_for_cost
*prologue_cost_vec
,
1118 stmt_vector_for_cost
*body_cost_vec
,
1119 bool record_prologue_costs
)
1121 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1122 gimple
*stmt
= DR_STMT (dr
);
1123 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1125 switch (alignment_support_scheme
)
1129 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1130 stmt_info
, 0, vect_body
);
1132 if (dump_enabled_p ())
1133 dump_printf_loc (MSG_NOTE
, vect_location
,
1134 "vect_model_load_cost: aligned.\n");
1138 case dr_unaligned_supported
:
1140 /* Here, we assign an additional cost for the unaligned load. */
1141 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1142 unaligned_load
, stmt_info
,
1143 DR_MISALIGNMENT (dr
), vect_body
);
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE
, vect_location
,
1147 "vect_model_load_cost: unaligned supported by "
1152 case dr_explicit_realign
:
1154 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1155 vector_load
, stmt_info
, 0, vect_body
);
1156 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1157 vec_perm
, stmt_info
, 0, vect_body
);
1159 /* FIXME: If the misalignment remains fixed across the iterations of
1160 the containing loop, the following cost should be added to the
1162 if (targetm
.vectorize
.builtin_mask_for_load
)
1163 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1164 stmt_info
, 0, vect_body
);
1166 if (dump_enabled_p ())
1167 dump_printf_loc (MSG_NOTE
, vect_location
,
1168 "vect_model_load_cost: explicit realign\n");
1172 case dr_explicit_realign_optimized
:
1174 if (dump_enabled_p ())
1175 dump_printf_loc (MSG_NOTE
, vect_location
,
1176 "vect_model_load_cost: unaligned software "
1179 /* Unaligned software pipeline has a load of an address, an initial
1180 load, and possibly a mask operation to "prime" the loop. However,
1181 if this is an access in a group of loads, which provide grouped
1182 access, then the above cost should only be considered for one
1183 access in the group. Inside the loop, there is a load op
1184 and a realignment op. */
1186 if (add_realign_cost
&& record_prologue_costs
)
1188 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1189 vector_stmt
, stmt_info
,
1191 if (targetm
.vectorize
.builtin_mask_for_load
)
1192 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1193 vector_stmt
, stmt_info
,
1197 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1198 stmt_info
, 0, vect_body
);
1199 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1200 stmt_info
, 0, vect_body
);
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE
, vect_location
,
1204 "vect_model_load_cost: explicit realign optimized"
1210 case dr_unaligned_unsupported
:
1212 *inside_cost
= VECT_MAX_COST
;
1214 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1216 "vect_model_load_cost: unsupported access.\n");
1225 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1226 the loop preheader for the vectorized stmt STMT. */
1229 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1232 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1235 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1236 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1240 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1244 if (nested_in_vect_loop_p (loop
, stmt
))
1247 pe
= loop_preheader_edge (loop
);
1248 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1249 gcc_assert (!new_bb
);
1253 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1255 gimple_stmt_iterator gsi_bb_start
;
1257 gcc_assert (bb_vinfo
);
1258 bb
= BB_VINFO_BB (bb_vinfo
);
1259 gsi_bb_start
= gsi_after_labels (bb
);
1260 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1264 if (dump_enabled_p ())
1266 dump_printf_loc (MSG_NOTE
, vect_location
,
1267 "created new init_stmt: ");
1268 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1272 /* Function vect_init_vector.
1274 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1275 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1276 vector type a vector with all elements equal to VAL is created first.
1277 Place the initialization at BSI if it is not NULL. Otherwise, place the
1278 initialization at the loop preheader.
1279 Return the DEF of INIT_STMT.
1280 It will be used in the vectorization of STMT. */
1283 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1288 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1289 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1291 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1292 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1294 /* Scalar boolean value should be transformed into
1295 all zeros or all ones value before building a vector. */
1296 if (VECTOR_BOOLEAN_TYPE_P (type
))
1298 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1299 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1301 if (CONSTANT_CLASS_P (val
))
1302 val
= integer_zerop (val
) ? false_val
: true_val
;
1305 new_temp
= make_ssa_name (TREE_TYPE (type
));
1306 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1307 val
, true_val
, false_val
);
1308 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1312 else if (CONSTANT_CLASS_P (val
))
1313 val
= fold_convert (TREE_TYPE (type
), val
);
1316 new_temp
= make_ssa_name (TREE_TYPE (type
));
1317 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1318 init_stmt
= gimple_build_assign (new_temp
,
1319 fold_build1 (VIEW_CONVERT_EXPR
,
1323 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1324 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1328 val
= build_vector_from_val (type
, val
);
1331 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1332 init_stmt
= gimple_build_assign (new_temp
, val
);
1333 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1337 /* Function vect_get_vec_def_for_operand_1.
1339 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1340 DT that will be used in the vectorized stmt. */
1343 vect_get_vec_def_for_operand_1 (gimple
*def_stmt
, enum vect_def_type dt
)
1347 stmt_vec_info def_stmt_info
= NULL
;
1351 /* operand is a constant or a loop invariant. */
1352 case vect_constant_def
:
1353 case vect_external_def
:
1354 /* Code should use vect_get_vec_def_for_operand. */
1357 /* operand is defined inside the loop. */
1358 case vect_internal_def
:
1360 /* Get the def from the vectorized stmt. */
1361 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1363 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1364 /* Get vectorized pattern statement. */
1366 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1367 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1368 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1369 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1370 gcc_assert (vec_stmt
);
1371 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1372 vec_oprnd
= PHI_RESULT (vec_stmt
);
1373 else if (is_gimple_call (vec_stmt
))
1374 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1376 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1380 /* operand is defined by a loop header phi. */
1381 case vect_reduction_def
:
1382 case vect_double_reduction_def
:
1383 case vect_nested_cycle
:
1384 case vect_induction_def
:
1386 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1388 /* Get the def from the vectorized stmt. */
1389 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1390 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1391 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1392 vec_oprnd
= PHI_RESULT (vec_stmt
);
1394 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1404 /* Function vect_get_vec_def_for_operand.
1406 OP is an operand in STMT. This function returns a (vector) def that will be
1407 used in the vectorized stmt for STMT.
1409 In the case that OP is an SSA_NAME which is defined in the loop, then
1410 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1412 In case OP is an invariant or constant, a new stmt that creates a vector def
1413 needs to be introduced. VECTYPE may be used to specify a required type for
1414 vector invariant. */
1417 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1420 enum vect_def_type dt
;
1422 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1423 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1425 if (dump_enabled_p ())
1427 dump_printf_loc (MSG_NOTE
, vect_location
,
1428 "vect_get_vec_def_for_operand: ");
1429 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1430 dump_printf (MSG_NOTE
, "\n");
1433 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1434 gcc_assert (is_simple_use
);
1435 if (def_stmt
&& dump_enabled_p ())
1437 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1438 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1441 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1443 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1447 vector_type
= vectype
;
1448 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1449 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1450 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1452 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1454 gcc_assert (vector_type
);
1455 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1458 return vect_get_vec_def_for_operand_1 (def_stmt
, dt
);
1462 /* Function vect_get_vec_def_for_stmt_copy
1464 Return a vector-def for an operand. This function is used when the
1465 vectorized stmt to be created (by the caller to this function) is a "copy"
1466 created in case the vectorized result cannot fit in one vector, and several
1467 copies of the vector-stmt are required. In this case the vector-def is
1468 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1469 of the stmt that defines VEC_OPRND.
1470 DT is the type of the vector def VEC_OPRND.
1473 In case the vectorization factor (VF) is bigger than the number
1474 of elements that can fit in a vectype (nunits), we have to generate
1475 more than one vector stmt to vectorize the scalar stmt. This situation
1476 arises when there are multiple data-types operated upon in the loop; the
1477 smallest data-type determines the VF, and as a result, when vectorizing
1478 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1479 vector stmt (each computing a vector of 'nunits' results, and together
1480 computing 'VF' results in each iteration). This function is called when
1481 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1482 which VF=16 and nunits=4, so the number of copies required is 4):
1484 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1486 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1487 VS1.1: vx.1 = memref1 VS1.2
1488 VS1.2: vx.2 = memref2 VS1.3
1489 VS1.3: vx.3 = memref3
1491 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1492 VSnew.1: vz1 = vx.1 + ... VSnew.2
1493 VSnew.2: vz2 = vx.2 + ... VSnew.3
1494 VSnew.3: vz3 = vx.3 + ...
1496 The vectorization of S1 is explained in vectorizable_load.
1497 The vectorization of S2:
1498 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1499 the function 'vect_get_vec_def_for_operand' is called to
1500 get the relevant vector-def for each operand of S2. For operand x it
1501 returns the vector-def 'vx.0'.
1503 To create the remaining copies of the vector-stmt (VSnew.j), this
1504 function is called to get the relevant vector-def for each operand. It is
1505 obtained from the respective VS1.j stmt, which is recorded in the
1506 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1508 For example, to obtain the vector-def 'vx.1' in order to create the
1509 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1510 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1511 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1512 and return its def ('vx.1').
1513 Overall, to create the above sequence this function will be called 3 times:
1514 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1515 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1516 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1519 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1521 gimple
*vec_stmt_for_operand
;
1522 stmt_vec_info def_stmt_info
;
1524 /* Do nothing; can reuse same def. */
1525 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1528 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1529 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1530 gcc_assert (def_stmt_info
);
1531 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1532 gcc_assert (vec_stmt_for_operand
);
1533 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1534 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1536 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1541 /* Get vectorized definitions for the operands to create a copy of an original
1542 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1545 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1546 vec
<tree
> *vec_oprnds0
,
1547 vec
<tree
> *vec_oprnds1
)
1549 tree vec_oprnd
= vec_oprnds0
->pop ();
1551 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1552 vec_oprnds0
->quick_push (vec_oprnd
);
1554 if (vec_oprnds1
&& vec_oprnds1
->length ())
1556 vec_oprnd
= vec_oprnds1
->pop ();
1557 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1558 vec_oprnds1
->quick_push (vec_oprnd
);
1563 /* Get vectorized definitions for OP0 and OP1. */
1566 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1567 vec
<tree
> *vec_oprnds0
,
1568 vec
<tree
> *vec_oprnds1
,
1573 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1574 auto_vec
<tree
> ops (nops
);
1575 auto_vec
<vec
<tree
> > vec_defs (nops
);
1577 ops
.quick_push (op0
);
1579 ops
.quick_push (op1
);
1581 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
1583 *vec_oprnds0
= vec_defs
[0];
1585 *vec_oprnds1
= vec_defs
[1];
1591 vec_oprnds0
->create (1);
1592 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1593 vec_oprnds0
->quick_push (vec_oprnd
);
1597 vec_oprnds1
->create (1);
1598 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1599 vec_oprnds1
->quick_push (vec_oprnd
);
1605 /* Function vect_finish_stmt_generation.
1607 Insert a new stmt. */
1610 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1611 gimple_stmt_iterator
*gsi
)
1613 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1614 vec_info
*vinfo
= stmt_info
->vinfo
;
1616 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1618 if (!gsi_end_p (*gsi
)
1619 && gimple_has_mem_ops (vec_stmt
))
1621 gimple
*at_stmt
= gsi_stmt (*gsi
);
1622 tree vuse
= gimple_vuse (at_stmt
);
1623 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1625 tree vdef
= gimple_vdef (at_stmt
);
1626 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1627 /* If we have an SSA vuse and insert a store, update virtual
1628 SSA form to avoid triggering the renamer. Do so only
1629 if we can easily see all uses - which is what almost always
1630 happens with the way vectorized stmts are inserted. */
1631 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1632 && ((is_gimple_assign (vec_stmt
)
1633 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1634 || (is_gimple_call (vec_stmt
)
1635 && !(gimple_call_flags (vec_stmt
)
1636 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1638 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1639 gimple_set_vdef (vec_stmt
, new_vdef
);
1640 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1644 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1646 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1648 if (dump_enabled_p ())
1650 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1651 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1654 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1656 /* While EH edges will generally prevent vectorization, stmt might
1657 e.g. be in a must-not-throw region. Ensure newly created stmts
1658 that could throw are part of the same region. */
1659 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1660 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1661 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1664 /* We want to vectorize a call to combined function CFN with function
1665 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1666 as the types of all inputs. Check whether this is possible using
1667 an internal function, returning its code if so or IFN_LAST if not. */
1670 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1671 tree vectype_out
, tree vectype_in
)
1674 if (internal_fn_p (cfn
))
1675 ifn
= as_internal_fn (cfn
);
1677 ifn
= associated_internal_fn (fndecl
);
1678 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1680 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1681 if (info
.vectorizable
)
1683 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1684 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1685 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1686 OPTIMIZE_FOR_SPEED
))
1694 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1695 gimple_stmt_iterator
*);
1697 /* STMT is a non-strided load or store, meaning that it accesses
1698 elements with a known constant step. Return -1 if that step
1699 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1702 compare_step_with_zero (gimple
*stmt
)
1704 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1705 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1706 return tree_int_cst_compare (vect_dr_behavior (dr
)->step
,
1710 /* If the target supports a permute mask that reverses the elements in
1711 a vector of type VECTYPE, return that mask, otherwise return null. */
1714 perm_mask_for_reverse (tree vectype
)
1718 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1720 /* The encoding has a single stepped pattern. */
1721 vec_perm_builder
sel (nunits
, 1, 3);
1722 for (i
= 0; i
< 3; ++i
)
1723 sel
.quick_push (nunits
- 1 - i
);
1725 vec_perm_indices
indices (sel
, 1, nunits
);
1726 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
1728 return vect_gen_perm_mask_checked (vectype
, indices
);
1731 /* A subroutine of get_load_store_type, with a subset of the same
1732 arguments. Handle the case where STMT is part of a grouped load
1735 For stores, the statements in the group are all consecutive
1736 and there is no gap at the end. For loads, the statements in the
1737 group might not be consecutive; there can be gaps between statements
1738 as well as at the end. */
1741 get_group_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1742 vec_load_store_type vls_type
,
1743 vect_memory_access_type
*memory_access_type
)
1745 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1746 vec_info
*vinfo
= stmt_info
->vinfo
;
1747 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1748 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
1749 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1750 data_reference
*first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1751 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1752 bool single_element_p
= (stmt
== first_stmt
1753 && !GROUP_NEXT_ELEMENT (stmt_info
));
1754 unsigned HOST_WIDE_INT gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
1755 unsigned nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1757 /* True if the vectorized statements would access beyond the last
1758 statement in the group. */
1759 bool overrun_p
= false;
1761 /* True if we can cope with such overrun by peeling for gaps, so that
1762 there is at least one final scalar iteration after the vector loop. */
1763 bool can_overrun_p
= (vls_type
== VLS_LOAD
&& loop_vinfo
&& !loop
->inner
);
1765 /* There can only be a gap at the end of the group if the stride is
1766 known at compile time. */
1767 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info
) || gap
== 0);
1769 /* Stores can't yet have gaps. */
1770 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
1774 if (STMT_VINFO_STRIDED_P (stmt_info
))
1776 /* Try to use consecutive accesses of GROUP_SIZE elements,
1777 separated by the stride, until we have a complete vector.
1778 Fall back to scalar accesses if that isn't possible. */
1779 if (nunits
% group_size
== 0)
1780 *memory_access_type
= VMAT_STRIDED_SLP
;
1782 *memory_access_type
= VMAT_ELEMENTWISE
;
1786 overrun_p
= loop_vinfo
&& gap
!= 0;
1787 if (overrun_p
&& vls_type
!= VLS_LOAD
)
1789 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1790 "Grouped store with gaps requires"
1791 " non-consecutive accesses\n");
1794 /* An overrun is fine if the trailing elements are smaller
1795 than the alignment boundary B. Every vector access will
1796 be a multiple of B and so we are guaranteed to access a
1797 non-gap element in the same B-sized block. */
1799 && gap
< (vect_known_alignment_in_bytes (first_dr
)
1800 / vect_get_scalar_dr_size (first_dr
)))
1802 if (overrun_p
&& !can_overrun_p
)
1804 if (dump_enabled_p ())
1805 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1806 "Peeling for outer loop is not supported\n");
1809 *memory_access_type
= VMAT_CONTIGUOUS
;
1814 /* We can always handle this case using elementwise accesses,
1815 but see if something more efficient is available. */
1816 *memory_access_type
= VMAT_ELEMENTWISE
;
1818 /* If there is a gap at the end of the group then these optimizations
1819 would access excess elements in the last iteration. */
1820 bool would_overrun_p
= (gap
!= 0);
1821 /* An overrun is fine if the trailing elements are smaller than the
1822 alignment boundary B. Every vector access will be a multiple of B
1823 and so we are guaranteed to access a non-gap element in the
1824 same B-sized block. */
1826 && gap
< (vect_known_alignment_in_bytes (first_dr
)
1827 / vect_get_scalar_dr_size (first_dr
)))
1828 would_overrun_p
= false;
1830 if (!STMT_VINFO_STRIDED_P (stmt_info
)
1831 && (can_overrun_p
|| !would_overrun_p
)
1832 && compare_step_with_zero (stmt
) > 0)
1834 /* First try using LOAD/STORE_LANES. */
1835 if (vls_type
== VLS_LOAD
1836 ? vect_load_lanes_supported (vectype
, group_size
)
1837 : vect_store_lanes_supported (vectype
, group_size
))
1839 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
1840 overrun_p
= would_overrun_p
;
1843 /* If that fails, try using permuting loads. */
1844 if (*memory_access_type
== VMAT_ELEMENTWISE
1845 && (vls_type
== VLS_LOAD
1846 ? vect_grouped_load_supported (vectype
, single_element_p
,
1848 : vect_grouped_store_supported (vectype
, group_size
)))
1850 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
1851 overrun_p
= would_overrun_p
;
1856 if (vls_type
!= VLS_LOAD
&& first_stmt
== stmt
)
1858 /* STMT is the leader of the group. Check the operands of all the
1859 stmts of the group. */
1860 gimple
*next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
1863 gcc_assert (gimple_assign_single_p (next_stmt
));
1864 tree op
= gimple_assign_rhs1 (next_stmt
);
1866 enum vect_def_type dt
;
1867 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
1869 if (dump_enabled_p ())
1870 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1871 "use not simple.\n");
1874 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
1880 gcc_assert (can_overrun_p
);
1881 if (dump_enabled_p ())
1882 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1883 "Data access with gaps requires scalar "
1885 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
1891 /* A subroutine of get_load_store_type, with a subset of the same
1892 arguments. Handle the case where STMT is a load or store that
1893 accesses consecutive elements with a negative step. */
1895 static vect_memory_access_type
1896 get_negative_load_store_type (gimple
*stmt
, tree vectype
,
1897 vec_load_store_type vls_type
,
1898 unsigned int ncopies
)
1900 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1901 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1902 dr_alignment_support alignment_support_scheme
;
1906 if (dump_enabled_p ())
1907 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1908 "multiple types with negative step.\n");
1909 return VMAT_ELEMENTWISE
;
1912 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1913 if (alignment_support_scheme
!= dr_aligned
1914 && alignment_support_scheme
!= dr_unaligned_supported
)
1916 if (dump_enabled_p ())
1917 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1918 "negative step but alignment required.\n");
1919 return VMAT_ELEMENTWISE
;
1922 if (vls_type
== VLS_STORE_INVARIANT
)
1924 if (dump_enabled_p ())
1925 dump_printf_loc (MSG_NOTE
, vect_location
,
1926 "negative step with invariant source;"
1927 " no permute needed.\n");
1928 return VMAT_CONTIGUOUS_DOWN
;
1931 if (!perm_mask_for_reverse (vectype
))
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1935 "negative step and reversing not supported.\n");
1936 return VMAT_ELEMENTWISE
;
1939 return VMAT_CONTIGUOUS_REVERSE
;
1942 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1943 if there is a memory access type that the vectorized form can use,
1944 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1945 or scatters, fill in GS_INFO accordingly.
1947 SLP says whether we're performing SLP rather than loop vectorization.
1948 VECTYPE is the vector type that the vectorized statements will use.
1949 NCOPIES is the number of vector statements that will be needed. */
1952 get_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
1953 vec_load_store_type vls_type
, unsigned int ncopies
,
1954 vect_memory_access_type
*memory_access_type
,
1955 gather_scatter_info
*gs_info
)
1957 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1958 vec_info
*vinfo
= stmt_info
->vinfo
;
1959 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1960 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1962 *memory_access_type
= VMAT_GATHER_SCATTER
;
1964 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, gs_info
))
1966 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
, &def_stmt
,
1967 &gs_info
->offset_dt
,
1968 &gs_info
->offset_vectype
))
1970 if (dump_enabled_p ())
1971 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1972 "%s index use not simple.\n",
1973 vls_type
== VLS_LOAD
? "gather" : "scatter");
1977 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1979 if (!get_group_load_store_type (stmt
, vectype
, slp
, vls_type
,
1980 memory_access_type
))
1983 else if (STMT_VINFO_STRIDED_P (stmt_info
))
1986 *memory_access_type
= VMAT_ELEMENTWISE
;
1990 int cmp
= compare_step_with_zero (stmt
);
1992 *memory_access_type
= get_negative_load_store_type
1993 (stmt
, vectype
, vls_type
, ncopies
);
1996 gcc_assert (vls_type
== VLS_LOAD
);
1997 *memory_access_type
= VMAT_INVARIANT
;
2000 *memory_access_type
= VMAT_CONTIGUOUS
;
2003 /* FIXME: At the moment the cost model seems to underestimate the
2004 cost of using elementwise accesses. This check preserves the
2005 traditional behavior until that can be fixed. */
2006 if (*memory_access_type
== VMAT_ELEMENTWISE
2007 && !STMT_VINFO_STRIDED_P (stmt_info
))
2009 if (dump_enabled_p ())
2010 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2011 "not falling back to elementwise accesses\n");
2017 /* Function vectorizable_mask_load_store.
2019 Check if STMT performs a conditional load or store that can be vectorized.
2020 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2021 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2022 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2025 vectorizable_mask_load_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2026 gimple
**vec_stmt
, slp_tree slp_node
)
2028 tree vec_dest
= NULL
;
2029 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2030 stmt_vec_info prev_stmt_info
;
2031 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2032 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2033 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
2034 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2035 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2036 tree rhs_vectype
= NULL_TREE
;
2041 tree dataref_ptr
= NULL_TREE
;
2043 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2047 gather_scatter_info gs_info
;
2048 vec_load_store_type vls_type
;
2051 enum vect_def_type dt
;
2053 if (slp_node
!= NULL
)
2056 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2057 gcc_assert (ncopies
>= 1);
2059 mask
= gimple_call_arg (stmt
, 2);
2061 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2064 /* FORNOW. This restriction should be relaxed. */
2065 if (nested_in_vect_loop
&& ncopies
> 1)
2067 if (dump_enabled_p ())
2068 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2069 "multiple types in nested loop.");
2073 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
2076 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2080 if (!STMT_VINFO_DATA_REF (stmt_info
))
2083 elem_type
= TREE_TYPE (vectype
);
2085 if (TREE_CODE (mask
) != SSA_NAME
)
2088 if (!vect_is_simple_use (mask
, loop_vinfo
, &def_stmt
, &dt
, &mask_vectype
))
2092 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2094 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
)
2095 || TYPE_VECTOR_SUBPARTS (mask_vectype
) != TYPE_VECTOR_SUBPARTS (vectype
))
2098 if (gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
2100 tree rhs
= gimple_call_arg (stmt
, 3);
2101 if (!vect_is_simple_use (rhs
, loop_vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
2103 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
2104 vls_type
= VLS_STORE_INVARIANT
;
2106 vls_type
= VLS_STORE
;
2109 vls_type
= VLS_LOAD
;
2111 vect_memory_access_type memory_access_type
;
2112 if (!get_load_store_type (stmt
, vectype
, false, vls_type
, ncopies
,
2113 &memory_access_type
, &gs_info
))
2116 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2118 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2120 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
2121 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2123 if (dump_enabled_p ())
2124 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2125 "masked gather with integer mask not supported.");
2129 else if (memory_access_type
!= VMAT_CONTIGUOUS
)
2131 if (dump_enabled_p ())
2132 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2133 "unsupported access type for masked %s.\n",
2134 vls_type
== VLS_LOAD
? "load" : "store");
2137 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
2138 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
),
2139 TYPE_MODE (mask_vectype
),
2140 vls_type
== VLS_LOAD
)
2142 && !useless_type_conversion_p (vectype
, rhs_vectype
)))
2145 if (!vec_stmt
) /* transformation not required. */
2147 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
2148 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2149 if (vls_type
== VLS_LOAD
)
2150 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
2153 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
,
2154 dt
, NULL
, NULL
, NULL
);
2157 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
2161 if (memory_access_type
== VMAT_GATHER_SCATTER
)
2163 tree vec_oprnd0
= NULL_TREE
, op
;
2164 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
2165 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
2166 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
2167 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
2168 tree mask_perm_mask
= NULL_TREE
;
2169 edge pe
= loop_preheader_edge (loop
);
2172 enum { NARROW
, NONE
, WIDEN
} modifier
;
2173 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
2175 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
2176 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2177 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2178 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2179 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2180 scaletype
= TREE_VALUE (arglist
);
2181 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2182 && types_compatible_p (srctype
, masktype
));
2184 if (nunits
== gather_off_nunits
)
2186 else if (nunits
== gather_off_nunits
/ 2)
2190 vec_perm_builder
sel (gather_off_nunits
, gather_off_nunits
, 1);
2191 for (i
= 0; i
< gather_off_nunits
; ++i
)
2192 sel
.quick_push (i
| nunits
);
2194 vec_perm_indices
indices (sel
, 1, gather_off_nunits
);
2195 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
2198 else if (nunits
== gather_off_nunits
* 2)
2202 vec_perm_builder
sel (nunits
, nunits
, 1);
2203 sel
.quick_grow (nunits
);
2204 for (i
= 0; i
< nunits
; ++i
)
2205 sel
[i
] = i
< gather_off_nunits
2206 ? i
: i
+ nunits
- gather_off_nunits
;
2207 vec_perm_indices
indices (sel
, 2, nunits
);
2208 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2212 for (i
= 0; i
< nunits
; ++i
)
2213 sel
[i
] = i
| gather_off_nunits
;
2214 indices
.new_vector (sel
, 2, gather_off_nunits
);
2215 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2220 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2222 ptr
= fold_convert (ptrtype
, gs_info
.base
);
2223 if (!is_gimple_min_invariant (ptr
))
2225 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2226 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2227 gcc_assert (!new_bb
);
2230 scale
= build_int_cst (scaletype
, gs_info
.scale
);
2232 prev_stmt_info
= NULL
;
2233 for (j
= 0; j
< ncopies
; ++j
)
2235 if (modifier
== WIDEN
&& (j
& 1))
2236 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2237 perm_mask
, stmt
, gsi
);
2240 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
2243 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
2245 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2247 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
2248 == TYPE_VECTOR_SUBPARTS (idxtype
));
2249 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2250 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2252 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2253 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2257 if (mask_perm_mask
&& (j
& 1))
2258 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2259 mask_perm_mask
, stmt
, gsi
);
2263 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2266 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2267 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2271 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2273 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
2274 == TYPE_VECTOR_SUBPARTS (masktype
));
2275 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2276 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2278 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2279 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2285 = gimple_build_call (gs_info
.decl
, 5, mask_op
, ptr
, op
, mask_op
,
2288 if (!useless_type_conversion_p (vectype
, rettype
))
2290 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2291 == TYPE_VECTOR_SUBPARTS (rettype
));
2292 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2293 gimple_call_set_lhs (new_stmt
, op
);
2294 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2295 var
= make_ssa_name (vec_dest
);
2296 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2297 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2301 var
= make_ssa_name (vec_dest
, new_stmt
);
2302 gimple_call_set_lhs (new_stmt
, var
);
2305 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2307 if (modifier
== NARROW
)
2314 var
= permute_vec_elements (prev_res
, var
,
2315 perm_mask
, stmt
, gsi
);
2316 new_stmt
= SSA_NAME_DEF_STMT (var
);
2319 if (prev_stmt_info
== NULL
)
2320 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2322 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2323 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2326 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2328 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2330 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2331 stmt_info
= vinfo_for_stmt (stmt
);
2333 tree lhs
= gimple_call_lhs (stmt
);
2334 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2335 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2336 set_vinfo_for_stmt (stmt
, NULL
);
2337 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2338 gsi_replace (gsi
, new_stmt
, true);
2341 else if (vls_type
!= VLS_LOAD
)
2343 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2344 prev_stmt_info
= NULL
;
2345 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
2346 for (i
= 0; i
< ncopies
; i
++)
2348 unsigned align
, misalign
;
2352 tree rhs
= gimple_call_arg (stmt
, 3);
2353 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
);
2354 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
2356 /* We should have catched mismatched types earlier. */
2357 gcc_assert (useless_type_conversion_p (vectype
,
2358 TREE_TYPE (vec_rhs
)));
2359 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2360 NULL_TREE
, &dummy
, gsi
,
2361 &ptr_incr
, false, &inv_p
);
2362 gcc_assert (!inv_p
);
2366 vect_is_simple_use (vec_rhs
, loop_vinfo
, &def_stmt
, &dt
);
2367 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2368 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2369 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2370 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2371 TYPE_SIZE_UNIT (vectype
));
2374 align
= DR_TARGET_ALIGNMENT (dr
);
2375 if (aligned_access_p (dr
))
2377 else if (DR_MISALIGNMENT (dr
) == -1)
2379 align
= TYPE_ALIGN_UNIT (elem_type
);
2383 misalign
= DR_MISALIGNMENT (dr
);
2384 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2386 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2387 misalign
? least_bit_hwi (misalign
) : align
);
2389 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2390 ptr
, vec_mask
, vec_rhs
);
2391 gimple_call_set_nothrow (call
, true);
2393 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2395 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2397 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2398 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2403 tree vec_mask
= NULL_TREE
;
2404 prev_stmt_info
= NULL
;
2405 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2406 for (i
= 0; i
< ncopies
; i
++)
2408 unsigned align
, misalign
;
2412 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
2414 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2415 NULL_TREE
, &dummy
, gsi
,
2416 &ptr_incr
, false, &inv_p
);
2417 gcc_assert (!inv_p
);
2421 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2422 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2423 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2424 TYPE_SIZE_UNIT (vectype
));
2427 align
= DR_TARGET_ALIGNMENT (dr
);
2428 if (aligned_access_p (dr
))
2430 else if (DR_MISALIGNMENT (dr
) == -1)
2432 align
= TYPE_ALIGN_UNIT (elem_type
);
2436 misalign
= DR_MISALIGNMENT (dr
);
2437 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2439 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2440 misalign
? least_bit_hwi (misalign
) : align
);
2442 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2444 gimple_call_set_lhs (call
, make_ssa_name (vec_dest
));
2445 gimple_call_set_nothrow (call
, true);
2446 vect_finish_stmt_generation (stmt
, call
, gsi
);
2448 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= call
;
2450 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = call
;
2451 prev_stmt_info
= vinfo_for_stmt (call
);
2455 if (vls_type
== VLS_LOAD
)
2457 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2459 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2461 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2462 stmt_info
= vinfo_for_stmt (stmt
);
2464 tree lhs
= gimple_call_lhs (stmt
);
2465 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2466 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2467 set_vinfo_for_stmt (stmt
, NULL
);
2468 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2469 gsi_replace (gsi
, new_stmt
, true);
2475 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2478 vectorizable_bswap (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2479 gimple
**vec_stmt
, slp_tree slp_node
,
2480 tree vectype_in
, enum vect_def_type
*dt
)
2483 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2484 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2485 unsigned ncopies
, nunits
;
2487 op
= gimple_call_arg (stmt
, 0);
2488 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2489 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2491 /* Multiple types in SLP are handled by creating the appropriate number of
2492 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2497 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2499 gcc_assert (ncopies
>= 1);
2501 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
2505 unsigned int num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
2506 unsigned word_bytes
= num_bytes
/ nunits
;
2508 /* The encoding uses one stepped pattern for each byte in the word. */
2509 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
2510 for (unsigned i
= 0; i
< 3; ++i
)
2511 for (unsigned j
= 0; j
< word_bytes
; ++j
)
2512 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
2514 vec_perm_indices
indices (elts
, 1, num_bytes
);
2515 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
2520 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2521 if (dump_enabled_p ())
2522 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_bswap ==="
2524 if (! PURE_SLP_STMT (stmt_info
))
2526 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2527 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
2528 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2529 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
2534 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
2537 vec
<tree
> vec_oprnds
= vNULL
;
2538 gimple
*new_stmt
= NULL
;
2539 stmt_vec_info prev_stmt_info
= NULL
;
2540 for (unsigned j
= 0; j
< ncopies
; j
++)
2544 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
2546 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2548 /* Arguments are ready. create the new vector stmt. */
2551 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
2553 tree tem
= make_ssa_name (char_vectype
);
2554 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2555 char_vectype
, vop
));
2556 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2557 tree tem2
= make_ssa_name (char_vectype
);
2558 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
2559 tem
, tem
, bswap_vconst
);
2560 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2561 tem
= make_ssa_name (vectype
);
2562 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2564 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2566 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2573 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2575 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2577 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2580 vec_oprnds
.release ();
2584 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2585 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2586 in a single step. On success, store the binary pack code in
2590 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2591 tree_code
*convert_code
)
2593 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2594 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2598 int multi_step_cvt
= 0;
2599 auto_vec
<tree
, 8> interm_types
;
2600 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2601 &code
, &multi_step_cvt
,
2606 *convert_code
= code
;
2610 /* Function vectorizable_call.
2612 Check if GS performs a function call that can be vectorized.
2613 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2614 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2615 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2618 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2625 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2626 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2627 tree vectype_out
, vectype_in
;
2630 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2631 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2632 vec_info
*vinfo
= stmt_info
->vinfo
;
2633 tree fndecl
, new_temp
, rhs_type
;
2635 enum vect_def_type dt
[3]
2636 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2638 gimple
*new_stmt
= NULL
;
2640 vec
<tree
> vargs
= vNULL
;
2641 enum { NARROW
, NONE
, WIDEN
} modifier
;
2645 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2648 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2652 /* Is GS a vectorizable call? */
2653 stmt
= dyn_cast
<gcall
*> (gs
);
2657 if (gimple_call_internal_p (stmt
)
2658 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2659 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2660 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2663 if (gimple_call_lhs (stmt
) == NULL_TREE
2664 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2667 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2669 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2671 /* Process function arguments. */
2672 rhs_type
= NULL_TREE
;
2673 vectype_in
= NULL_TREE
;
2674 nargs
= gimple_call_num_args (stmt
);
2676 /* Bail out if the function has more than three arguments, we do not have
2677 interesting builtin functions to vectorize with more than two arguments
2678 except for fma. No arguments is also not good. */
2679 if (nargs
== 0 || nargs
> 3)
2682 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2683 if (gimple_call_internal_p (stmt
)
2684 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2687 rhs_type
= unsigned_type_node
;
2690 for (i
= 0; i
< nargs
; i
++)
2694 op
= gimple_call_arg (stmt
, i
);
2696 /* We can only handle calls with arguments of the same type. */
2698 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2700 if (dump_enabled_p ())
2701 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2702 "argument types differ.\n");
2706 rhs_type
= TREE_TYPE (op
);
2708 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
2710 if (dump_enabled_p ())
2711 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2712 "use not simple.\n");
2717 vectype_in
= opvectype
;
2719 && opvectype
!= vectype_in
)
2721 if (dump_enabled_p ())
2722 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2723 "argument vector types differ.\n");
2727 /* If all arguments are external or constant defs use a vector type with
2728 the same size as the output vector type. */
2730 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2732 gcc_assert (vectype_in
);
2735 if (dump_enabled_p ())
2737 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2738 "no vectype for scalar type ");
2739 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2740 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2747 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2748 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2749 if (nunits_in
== nunits_out
/ 2)
2751 else if (nunits_out
== nunits_in
)
2753 else if (nunits_out
== nunits_in
/ 2)
2758 /* We only handle functions that do not read or clobber memory. */
2759 if (gimple_vuse (stmt
))
2761 if (dump_enabled_p ())
2762 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2763 "function reads from or writes to memory.\n");
2767 /* For now, we only vectorize functions if a target specific builtin
2768 is available. TODO -- in some cases, it might be profitable to
2769 insert the calls for pieces of the vector, in order to be able
2770 to vectorize other operations in the loop. */
2772 internal_fn ifn
= IFN_LAST
;
2773 combined_fn cfn
= gimple_call_combined_fn (stmt
);
2774 tree callee
= gimple_call_fndecl (stmt
);
2776 /* First try using an internal function. */
2777 tree_code convert_code
= ERROR_MARK
;
2779 && (modifier
== NONE
2780 || (modifier
== NARROW
2781 && simple_integer_narrowing (vectype_out
, vectype_in
,
2783 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
2786 /* If that fails, try asking for a target-specific built-in function. */
2787 if (ifn
== IFN_LAST
)
2789 if (cfn
!= CFN_LAST
)
2790 fndecl
= targetm
.vectorize
.builtin_vectorized_function
2791 (cfn
, vectype_out
, vectype_in
);
2793 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
2794 (callee
, vectype_out
, vectype_in
);
2797 if (ifn
== IFN_LAST
&& !fndecl
)
2799 if (cfn
== CFN_GOMP_SIMD_LANE
2802 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2803 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2804 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2805 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2807 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2808 { 0, 1, 2, ... vf - 1 } vector. */
2809 gcc_assert (nargs
== 0);
2811 else if (modifier
== NONE
2812 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
2813 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
2814 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
2815 return vectorizable_bswap (stmt
, gsi
, vec_stmt
, slp_node
,
2819 if (dump_enabled_p ())
2820 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2821 "function is not vectorizable.\n");
2828 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
2829 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
2831 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
2833 /* Sanity check: make sure that at least one copy of the vectorized stmt
2834 needs to be generated. */
2835 gcc_assert (ncopies
>= 1);
2837 if (!vec_stmt
) /* transformation not required. */
2839 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2840 if (dump_enabled_p ())
2841 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2843 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
2844 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
2845 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
2846 vec_promote_demote
, stmt_info
, 0, vect_body
);
2853 if (dump_enabled_p ())
2854 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2857 scalar_dest
= gimple_call_lhs (stmt
);
2858 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2860 prev_stmt_info
= NULL
;
2861 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
2863 tree prev_res
= NULL_TREE
;
2864 for (j
= 0; j
< ncopies
; ++j
)
2866 /* Build argument list for the vectorized call. */
2868 vargs
.create (nargs
);
2874 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2875 vec
<tree
> vec_oprnds0
;
2877 for (i
= 0; i
< nargs
; i
++)
2878 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2879 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
2880 vec_oprnds0
= vec_defs
[0];
2882 /* Arguments are ready. Create the new vector stmt. */
2883 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2886 for (k
= 0; k
< nargs
; k
++)
2888 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2889 vargs
[k
] = vec_oprndsk
[i
];
2891 if (modifier
== NARROW
)
2893 tree half_res
= make_ssa_name (vectype_in
);
2895 = gimple_build_call_internal_vec (ifn
, vargs
);
2896 gimple_call_set_lhs (call
, half_res
);
2897 gimple_call_set_nothrow (call
, true);
2899 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2902 prev_res
= half_res
;
2905 new_temp
= make_ssa_name (vec_dest
);
2906 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2907 prev_res
, half_res
);
2912 if (ifn
!= IFN_LAST
)
2913 call
= gimple_build_call_internal_vec (ifn
, vargs
);
2915 call
= gimple_build_call_vec (fndecl
, vargs
);
2916 new_temp
= make_ssa_name (vec_dest
, call
);
2917 gimple_call_set_lhs (call
, new_temp
);
2918 gimple_call_set_nothrow (call
, true);
2921 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2922 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2925 for (i
= 0; i
< nargs
; i
++)
2927 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2928 vec_oprndsi
.release ();
2933 for (i
= 0; i
< nargs
; i
++)
2935 op
= gimple_call_arg (stmt
, i
);
2938 = vect_get_vec_def_for_operand (op
, stmt
);
2941 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2943 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2946 vargs
.quick_push (vec_oprnd0
);
2949 if (gimple_call_internal_p (stmt
)
2950 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2952 tree_vector_builder
v (vectype_out
, 1, 3);
2953 for (int k
= 0; k
< 3; ++k
)
2954 v
.quick_push (build_int_cst (unsigned_type_node
,
2955 j
* nunits_out
+ k
));
2956 tree cst
= v
.build ();
2958 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
2959 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
2960 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2961 new_temp
= make_ssa_name (vec_dest
);
2962 new_stmt
= gimple_build_assign (new_temp
, new_var
);
2964 else if (modifier
== NARROW
)
2966 tree half_res
= make_ssa_name (vectype_in
);
2967 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
2968 gimple_call_set_lhs (call
, half_res
);
2969 gimple_call_set_nothrow (call
, true);
2971 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2974 prev_res
= half_res
;
2977 new_temp
= make_ssa_name (vec_dest
);
2978 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2979 prev_res
, half_res
);
2984 if (ifn
!= IFN_LAST
)
2985 call
= gimple_build_call_internal_vec (ifn
, vargs
);
2987 call
= gimple_build_call_vec (fndecl
, vargs
);
2988 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2989 gimple_call_set_lhs (call
, new_temp
);
2990 gimple_call_set_nothrow (call
, true);
2993 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2995 if (j
== (modifier
== NARROW
? 1 : 0))
2996 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2998 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3000 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3003 else if (modifier
== NARROW
)
3005 for (j
= 0; j
< ncopies
; ++j
)
3007 /* Build argument list for the vectorized call. */
3009 vargs
.create (nargs
* 2);
3015 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3016 vec
<tree
> vec_oprnds0
;
3018 for (i
= 0; i
< nargs
; i
++)
3019 vargs
.quick_push (gimple_call_arg (stmt
, i
));
3020 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3021 vec_oprnds0
= vec_defs
[0];
3023 /* Arguments are ready. Create the new vector stmt. */
3024 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3028 for (k
= 0; k
< nargs
; k
++)
3030 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3031 vargs
.quick_push (vec_oprndsk
[i
]);
3032 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3035 if (ifn
!= IFN_LAST
)
3036 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3038 call
= gimple_build_call_vec (fndecl
, vargs
);
3039 new_temp
= make_ssa_name (vec_dest
, call
);
3040 gimple_call_set_lhs (call
, new_temp
);
3041 gimple_call_set_nothrow (call
, true);
3043 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3044 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3047 for (i
= 0; i
< nargs
; i
++)
3049 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3050 vec_oprndsi
.release ();
3055 for (i
= 0; i
< nargs
; i
++)
3057 op
= gimple_call_arg (stmt
, i
);
3061 = vect_get_vec_def_for_operand (op
, stmt
);
3063 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3067 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
3069 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
3071 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3074 vargs
.quick_push (vec_oprnd0
);
3075 vargs
.quick_push (vec_oprnd1
);
3078 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3079 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3080 gimple_call_set_lhs (new_stmt
, new_temp
);
3081 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3084 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3086 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3088 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3091 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3094 /* No current target implements this case. */
3099 /* The call in STMT might prevent it from being removed in dce.
3100 We however cannot remove it here, due to the way the ssa name
3101 it defines is mapped to the new definition. So just replace
3102 rhs of the statement with something harmless. */
3107 type
= TREE_TYPE (scalar_dest
);
3108 if (is_pattern_stmt_p (stmt_info
))
3109 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3111 lhs
= gimple_call_lhs (stmt
);
3113 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3114 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3115 set_vinfo_for_stmt (stmt
, NULL
);
3116 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3117 gsi_replace (gsi
, new_stmt
, false);
3123 struct simd_call_arg_info
3127 HOST_WIDE_INT linear_step
;
3128 enum vect_def_type dt
;
3130 bool simd_lane_linear
;
3133 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3134 is linear within simd lane (but not within whole loop), note it in
3138 vect_simd_lane_linear (tree op
, struct loop
*loop
,
3139 struct simd_call_arg_info
*arginfo
)
3141 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3143 if (!is_gimple_assign (def_stmt
)
3144 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3145 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3148 tree base
= gimple_assign_rhs1 (def_stmt
);
3149 HOST_WIDE_INT linear_step
= 0;
3150 tree v
= gimple_assign_rhs2 (def_stmt
);
3151 while (TREE_CODE (v
) == SSA_NAME
)
3154 def_stmt
= SSA_NAME_DEF_STMT (v
);
3155 if (is_gimple_assign (def_stmt
))
3156 switch (gimple_assign_rhs_code (def_stmt
))
3159 t
= gimple_assign_rhs2 (def_stmt
);
3160 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3162 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3163 v
= gimple_assign_rhs1 (def_stmt
);
3166 t
= gimple_assign_rhs2 (def_stmt
);
3167 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3169 linear_step
= tree_to_shwi (t
);
3170 v
= gimple_assign_rhs1 (def_stmt
);
3173 t
= gimple_assign_rhs1 (def_stmt
);
3174 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3175 || (TYPE_PRECISION (TREE_TYPE (v
))
3176 < TYPE_PRECISION (TREE_TYPE (t
))))
3185 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3187 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3188 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3193 arginfo
->linear_step
= linear_step
;
3195 arginfo
->simd_lane_linear
= true;
3201 /* Function vectorizable_simd_clone_call.
3203 Check if STMT performs a function call that can be vectorized
3204 by calling a simd clone of the function.
3205 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3206 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3207 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3210 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3211 gimple
**vec_stmt
, slp_tree slp_node
)
3216 tree vec_oprnd0
= NULL_TREE
;
3217 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
3219 unsigned int nunits
;
3220 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3221 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3222 vec_info
*vinfo
= stmt_info
->vinfo
;
3223 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3224 tree fndecl
, new_temp
;
3226 gimple
*new_stmt
= NULL
;
3228 auto_vec
<simd_call_arg_info
> arginfo
;
3229 vec
<tree
> vargs
= vNULL
;
3231 tree lhs
, rtype
, ratype
;
3232 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3234 /* Is STMT a vectorizable call? */
3235 if (!is_gimple_call (stmt
))
3238 fndecl
= gimple_call_fndecl (stmt
);
3239 if (fndecl
== NULL_TREE
)
3242 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3243 if (node
== NULL
|| node
->simd_clones
== NULL
)
3246 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3249 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3253 if (gimple_call_lhs (stmt
)
3254 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3257 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3259 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3261 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
3268 /* Process function arguments. */
3269 nargs
= gimple_call_num_args (stmt
);
3271 /* Bail out if the function has zero arguments. */
3275 arginfo
.reserve (nargs
, true);
3277 for (i
= 0; i
< nargs
; i
++)
3279 simd_call_arg_info thisarginfo
;
3282 thisarginfo
.linear_step
= 0;
3283 thisarginfo
.align
= 0;
3284 thisarginfo
.op
= NULL_TREE
;
3285 thisarginfo
.simd_lane_linear
= false;
3287 op
= gimple_call_arg (stmt
, i
);
3288 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
3289 &thisarginfo
.vectype
)
3290 || thisarginfo
.dt
== vect_uninitialized_def
)
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3294 "use not simple.\n");
3298 if (thisarginfo
.dt
== vect_constant_def
3299 || thisarginfo
.dt
== vect_external_def
)
3300 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3302 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3304 /* For linear arguments, the analyze phase should have saved
3305 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3306 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3307 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3309 gcc_assert (vec_stmt
);
3310 thisarginfo
.linear_step
3311 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3313 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3314 thisarginfo
.simd_lane_linear
3315 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3316 == boolean_true_node
);
3317 /* If loop has been peeled for alignment, we need to adjust it. */
3318 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3319 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3320 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3322 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3323 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3324 tree opt
= TREE_TYPE (thisarginfo
.op
);
3325 bias
= fold_convert (TREE_TYPE (step
), bias
);
3326 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3328 = fold_build2 (POINTER_TYPE_P (opt
)
3329 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3330 thisarginfo
.op
, bias
);
3334 && thisarginfo
.dt
!= vect_constant_def
3335 && thisarginfo
.dt
!= vect_external_def
3337 && TREE_CODE (op
) == SSA_NAME
3338 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3340 && tree_fits_shwi_p (iv
.step
))
3342 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3343 thisarginfo
.op
= iv
.base
;
3345 else if ((thisarginfo
.dt
== vect_constant_def
3346 || thisarginfo
.dt
== vect_external_def
)
3347 && POINTER_TYPE_P (TREE_TYPE (op
)))
3348 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3349 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3351 if (POINTER_TYPE_P (TREE_TYPE (op
))
3352 && !thisarginfo
.linear_step
3354 && thisarginfo
.dt
!= vect_constant_def
3355 && thisarginfo
.dt
!= vect_external_def
3358 && TREE_CODE (op
) == SSA_NAME
)
3359 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3361 arginfo
.quick_push (thisarginfo
);
3364 unsigned int badness
= 0;
3365 struct cgraph_node
*bestn
= NULL
;
3366 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3367 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3369 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3370 n
= n
->simdclone
->next_clone
)
3372 unsigned int this_badness
= 0;
3373 if (n
->simdclone
->simdlen
3374 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
3375 || n
->simdclone
->nargs
!= nargs
)
3377 if (n
->simdclone
->simdlen
3378 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3379 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
3380 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3381 if (n
->simdclone
->inbranch
)
3382 this_badness
+= 2048;
3383 int target_badness
= targetm
.simd_clone
.usable (n
);
3384 if (target_badness
< 0)
3386 this_badness
+= target_badness
* 512;
3387 /* FORNOW: Have to add code to add the mask argument. */
3388 if (n
->simdclone
->inbranch
)
3390 for (i
= 0; i
< nargs
; i
++)
3392 switch (n
->simdclone
->args
[i
].arg_type
)
3394 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3395 if (!useless_type_conversion_p
3396 (n
->simdclone
->args
[i
].orig_type
,
3397 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3399 else if (arginfo
[i
].dt
== vect_constant_def
3400 || arginfo
[i
].dt
== vect_external_def
3401 || arginfo
[i
].linear_step
)
3404 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3405 if (arginfo
[i
].dt
!= vect_constant_def
3406 && arginfo
[i
].dt
!= vect_external_def
)
3409 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3410 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3411 if (arginfo
[i
].dt
== vect_constant_def
3412 || arginfo
[i
].dt
== vect_external_def
3413 || (arginfo
[i
].linear_step
3414 != n
->simdclone
->args
[i
].linear_step
))
3417 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3418 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3419 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3420 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3421 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3422 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3426 case SIMD_CLONE_ARG_TYPE_MASK
:
3429 if (i
== (size_t) -1)
3431 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3436 if (arginfo
[i
].align
)
3437 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3438 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3440 if (i
== (size_t) -1)
3442 if (bestn
== NULL
|| this_badness
< badness
)
3445 badness
= this_badness
;
3452 for (i
= 0; i
< nargs
; i
++)
3453 if ((arginfo
[i
].dt
== vect_constant_def
3454 || arginfo
[i
].dt
== vect_external_def
)
3455 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3458 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3460 if (arginfo
[i
].vectype
== NULL
3461 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3462 > bestn
->simdclone
->simdlen
))
3466 fndecl
= bestn
->decl
;
3467 nunits
= bestn
->simdclone
->simdlen
;
3468 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3470 /* If the function isn't const, only allow it in simd loops where user
3471 has asserted that at least nunits consecutive iterations can be
3472 performed using SIMD instructions. */
3473 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3474 && gimple_vuse (stmt
))
3477 /* Sanity check: make sure that at least one copy of the vectorized stmt
3478 needs to be generated. */
3479 gcc_assert (ncopies
>= 1);
3481 if (!vec_stmt
) /* transformation not required. */
3483 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3484 for (i
= 0; i
< nargs
; i
++)
3485 if ((bestn
->simdclone
->args
[i
].arg_type
3486 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3487 || (bestn
->simdclone
->args
[i
].arg_type
3488 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3490 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3492 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3493 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3494 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3495 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3496 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3497 tree sll
= arginfo
[i
].simd_lane_linear
3498 ? boolean_true_node
: boolean_false_node
;
3499 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3501 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3502 if (dump_enabled_p ())
3503 dump_printf_loc (MSG_NOTE
, vect_location
,
3504 "=== vectorizable_simd_clone_call ===\n");
3505 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3511 if (dump_enabled_p ())
3512 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3515 scalar_dest
= gimple_call_lhs (stmt
);
3516 vec_dest
= NULL_TREE
;
3521 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3522 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3523 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3526 rtype
= TREE_TYPE (ratype
);
3530 prev_stmt_info
= NULL
;
3531 for (j
= 0; j
< ncopies
; ++j
)
3533 /* Build argument list for the vectorized call. */
3535 vargs
.create (nargs
);
3539 for (i
= 0; i
< nargs
; i
++)
3541 unsigned int k
, l
, m
, o
;
3543 op
= gimple_call_arg (stmt
, i
);
3544 switch (bestn
->simdclone
->args
[i
].arg_type
)
3546 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3547 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3548 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
3549 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3551 if (TYPE_VECTOR_SUBPARTS (atype
)
3552 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
3554 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3555 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3556 / TYPE_VECTOR_SUBPARTS (atype
));
3557 gcc_assert ((k
& (k
- 1)) == 0);
3560 = vect_get_vec_def_for_operand (op
, stmt
);
3563 vec_oprnd0
= arginfo
[i
].op
;
3564 if ((m
& (k
- 1)) == 0)
3566 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3569 arginfo
[i
].op
= vec_oprnd0
;
3571 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3573 bitsize_int ((m
& (k
- 1)) * prec
));
3575 = gimple_build_assign (make_ssa_name (atype
),
3577 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3578 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3582 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3583 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3584 gcc_assert ((k
& (k
- 1)) == 0);
3585 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3587 vec_alloc (ctor_elts
, k
);
3590 for (l
= 0; l
< k
; l
++)
3592 if (m
== 0 && l
== 0)
3594 = vect_get_vec_def_for_operand (op
, stmt
);
3597 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3599 arginfo
[i
].op
= vec_oprnd0
;
3602 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3606 vargs
.safe_push (vec_oprnd0
);
3609 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3611 = gimple_build_assign (make_ssa_name (atype
),
3613 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3614 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3619 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3620 vargs
.safe_push (op
);
3622 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3623 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3628 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3633 edge pe
= loop_preheader_edge (loop
);
3634 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3635 gcc_assert (!new_bb
);
3637 if (arginfo
[i
].simd_lane_linear
)
3639 vargs
.safe_push (arginfo
[i
].op
);
3642 tree phi_res
= copy_ssa_name (op
);
3643 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3644 set_vinfo_for_stmt (new_phi
,
3645 new_stmt_vec_info (new_phi
, loop_vinfo
));
3646 add_phi_arg (new_phi
, arginfo
[i
].op
,
3647 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3649 = POINTER_TYPE_P (TREE_TYPE (op
))
3650 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3651 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3652 ? sizetype
: TREE_TYPE (op
);
3654 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3656 tree tcst
= wide_int_to_tree (type
, cst
);
3657 tree phi_arg
= copy_ssa_name (op
);
3659 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3660 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3661 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3662 set_vinfo_for_stmt (new_stmt
,
3663 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3664 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3666 arginfo
[i
].op
= phi_res
;
3667 vargs
.safe_push (phi_res
);
3672 = POINTER_TYPE_P (TREE_TYPE (op
))
3673 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3674 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3675 ? sizetype
: TREE_TYPE (op
);
3677 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3679 tree tcst
= wide_int_to_tree (type
, cst
);
3680 new_temp
= make_ssa_name (TREE_TYPE (op
));
3681 new_stmt
= gimple_build_assign (new_temp
, code
,
3682 arginfo
[i
].op
, tcst
);
3683 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3684 vargs
.safe_push (new_temp
);
3687 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3688 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3689 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3690 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3691 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3692 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3698 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3701 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3703 new_temp
= create_tmp_var (ratype
);
3704 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3705 == TYPE_VECTOR_SUBPARTS (rtype
))
3706 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3708 new_temp
= make_ssa_name (rtype
, new_stmt
);
3709 gimple_call_set_lhs (new_stmt
, new_temp
);
3711 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3715 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3718 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3719 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3720 gcc_assert ((k
& (k
- 1)) == 0);
3721 for (l
= 0; l
< k
; l
++)
3726 t
= build_fold_addr_expr (new_temp
);
3727 t
= build2 (MEM_REF
, vectype
, t
,
3728 build_int_cst (TREE_TYPE (t
),
3729 l
* prec
/ BITS_PER_UNIT
));
3732 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3733 bitsize_int (prec
), bitsize_int (l
* prec
));
3735 = gimple_build_assign (make_ssa_name (vectype
), t
);
3736 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3737 if (j
== 0 && l
== 0)
3738 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3740 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3742 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3747 tree clobber
= build_constructor (ratype
, NULL
);
3748 TREE_THIS_VOLATILE (clobber
) = 1;
3749 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3750 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3754 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3756 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3757 / TYPE_VECTOR_SUBPARTS (rtype
));
3758 gcc_assert ((k
& (k
- 1)) == 0);
3759 if ((j
& (k
- 1)) == 0)
3760 vec_alloc (ret_ctor_elts
, k
);
3763 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3764 for (m
= 0; m
< o
; m
++)
3766 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3767 size_int (m
), NULL_TREE
, NULL_TREE
);
3769 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3770 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3771 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3772 gimple_assign_lhs (new_stmt
));
3774 tree clobber
= build_constructor (ratype
, NULL
);
3775 TREE_THIS_VOLATILE (clobber
) = 1;
3776 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3777 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3780 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3781 if ((j
& (k
- 1)) != k
- 1)
3783 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3785 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3786 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3788 if ((unsigned) j
== k
- 1)
3789 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3791 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3793 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3798 tree t
= build_fold_addr_expr (new_temp
);
3799 t
= build2 (MEM_REF
, vectype
, t
,
3800 build_int_cst (TREE_TYPE (t
), 0));
3802 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3803 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3804 tree clobber
= build_constructor (ratype
, NULL
);
3805 TREE_THIS_VOLATILE (clobber
) = 1;
3806 vect_finish_stmt_generation (stmt
,
3807 gimple_build_assign (new_temp
,
3813 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3815 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3817 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3822 /* The call in STMT might prevent it from being removed in dce.
3823 We however cannot remove it here, due to the way the ssa name
3824 it defines is mapped to the new definition. So just replace
3825 rhs of the statement with something harmless. */
3832 type
= TREE_TYPE (scalar_dest
);
3833 if (is_pattern_stmt_p (stmt_info
))
3834 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3836 lhs
= gimple_call_lhs (stmt
);
3837 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3840 new_stmt
= gimple_build_nop ();
3841 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3842 set_vinfo_for_stmt (stmt
, NULL
);
3843 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3844 gsi_replace (gsi
, new_stmt
, true);
3845 unlink_stmt_vdef (stmt
);
3851 /* Function vect_gen_widened_results_half
3853 Create a vector stmt whose code, type, number of arguments, and result
3854 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3855 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3856 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3857 needs to be created (DECL is a function-decl of a target-builtin).
3858 STMT is the original scalar stmt that we are vectorizing. */
3861 vect_gen_widened_results_half (enum tree_code code
,
3863 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3864 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3870 /* Generate half of the widened result: */
3871 if (code
== CALL_EXPR
)
3873 /* Target specific support */
3874 if (op_type
== binary_op
)
3875 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3877 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3878 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3879 gimple_call_set_lhs (new_stmt
, new_temp
);
3883 /* Generic support */
3884 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3885 if (op_type
!= binary_op
)
3887 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3888 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3889 gimple_assign_set_lhs (new_stmt
, new_temp
);
3891 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3897 /* Get vectorized definitions for loop-based vectorization. For the first
3898 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3899 scalar operand), and for the rest we get a copy with
3900 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3901 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3902 The vectors are collected into VEC_OPRNDS. */
3905 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
3906 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3910 /* Get first vector operand. */
3911 /* All the vector operands except the very first one (that is scalar oprnd)
3913 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3914 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
3916 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3918 vec_oprnds
->quick_push (vec_oprnd
);
3920 /* Get second vector operand. */
3921 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3922 vec_oprnds
->quick_push (vec_oprnd
);
3926 /* For conversion in multiple steps, continue to get operands
3929 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3933 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3934 For multi-step conversions store the resulting vectors and call the function
3938 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3939 int multi_step_cvt
, gimple
*stmt
,
3941 gimple_stmt_iterator
*gsi
,
3942 slp_tree slp_node
, enum tree_code code
,
3943 stmt_vec_info
*prev_stmt_info
)
3946 tree vop0
, vop1
, new_tmp
, vec_dest
;
3948 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3950 vec_dest
= vec_dsts
.pop ();
3952 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3954 /* Create demotion operation. */
3955 vop0
= (*vec_oprnds
)[i
];
3956 vop1
= (*vec_oprnds
)[i
+ 1];
3957 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3958 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3959 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3960 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3963 /* Store the resulting vector for next recursive call. */
3964 (*vec_oprnds
)[i
/2] = new_tmp
;
3967 /* This is the last step of the conversion sequence. Store the
3968 vectors in SLP_NODE or in vector info of the scalar statement
3969 (or in STMT_VINFO_RELATED_STMT chain). */
3971 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3974 if (!*prev_stmt_info
)
3975 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3977 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3979 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3984 /* For multi-step demotion operations we first generate demotion operations
3985 from the source type to the intermediate types, and then combine the
3986 results (stored in VEC_OPRNDS) in demotion operation to the destination
3990 /* At each level of recursion we have half of the operands we had at the
3992 vec_oprnds
->truncate ((i
+1)/2);
3993 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3994 stmt
, vec_dsts
, gsi
, slp_node
,
3995 VEC_PACK_TRUNC_EXPR
,
3999 vec_dsts
.quick_push (vec_dest
);
4003 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4004 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4005 the resulting vectors and call the function recursively. */
4008 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4009 vec
<tree
> *vec_oprnds1
,
4010 gimple
*stmt
, tree vec_dest
,
4011 gimple_stmt_iterator
*gsi
,
4012 enum tree_code code1
,
4013 enum tree_code code2
, tree decl1
,
4014 tree decl2
, int op_type
)
4017 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4018 gimple
*new_stmt1
, *new_stmt2
;
4019 vec
<tree
> vec_tmp
= vNULL
;
4021 vec_tmp
.create (vec_oprnds0
->length () * 2);
4022 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4024 if (op_type
== binary_op
)
4025 vop1
= (*vec_oprnds1
)[i
];
4029 /* Generate the two halves of promotion operation. */
4030 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4031 op_type
, vec_dest
, gsi
, stmt
);
4032 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4033 op_type
, vec_dest
, gsi
, stmt
);
4034 if (is_gimple_call (new_stmt1
))
4036 new_tmp1
= gimple_call_lhs (new_stmt1
);
4037 new_tmp2
= gimple_call_lhs (new_stmt2
);
4041 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4042 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4045 /* Store the results for the next step. */
4046 vec_tmp
.quick_push (new_tmp1
);
4047 vec_tmp
.quick_push (new_tmp2
);
4050 vec_oprnds0
->release ();
4051 *vec_oprnds0
= vec_tmp
;
4055 /* Check if STMT performs a conversion operation, that can be vectorized.
4056 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4057 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4058 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4061 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4062 gimple
**vec_stmt
, slp_tree slp_node
)
4066 tree op0
, op1
= NULL_TREE
;
4067 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4068 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4069 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4070 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4071 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4072 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4075 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4077 gimple
*new_stmt
= NULL
;
4078 stmt_vec_info prev_stmt_info
;
4081 tree vectype_out
, vectype_in
;
4083 tree lhs_type
, rhs_type
;
4084 enum { NARROW
, NONE
, WIDEN
} modifier
;
4085 vec
<tree
> vec_oprnds0
= vNULL
;
4086 vec
<tree
> vec_oprnds1
= vNULL
;
4088 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4089 vec_info
*vinfo
= stmt_info
->vinfo
;
4090 int multi_step_cvt
= 0;
4091 vec
<tree
> interm_types
= vNULL
;
4092 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4094 unsigned short fltsz
;
4096 /* Is STMT a vectorizable conversion? */
4098 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4101 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4105 if (!is_gimple_assign (stmt
))
4108 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4111 code
= gimple_assign_rhs_code (stmt
);
4112 if (!CONVERT_EXPR_CODE_P (code
)
4113 && code
!= FIX_TRUNC_EXPR
4114 && code
!= FLOAT_EXPR
4115 && code
!= WIDEN_MULT_EXPR
4116 && code
!= WIDEN_LSHIFT_EXPR
)
4119 op_type
= TREE_CODE_LENGTH (code
);
4121 /* Check types of lhs and rhs. */
4122 scalar_dest
= gimple_assign_lhs (stmt
);
4123 lhs_type
= TREE_TYPE (scalar_dest
);
4124 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4126 op0
= gimple_assign_rhs1 (stmt
);
4127 rhs_type
= TREE_TYPE (op0
);
4129 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4130 && !((INTEGRAL_TYPE_P (lhs_type
)
4131 && INTEGRAL_TYPE_P (rhs_type
))
4132 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4133 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4136 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4137 && ((INTEGRAL_TYPE_P (lhs_type
)
4138 && !type_has_mode_precision_p (lhs_type
))
4139 || (INTEGRAL_TYPE_P (rhs_type
)
4140 && !type_has_mode_precision_p (rhs_type
))))
4142 if (dump_enabled_p ())
4143 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4144 "type conversion to/from bit-precision unsupported."
4149 /* Check the operands of the operation. */
4150 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4152 if (dump_enabled_p ())
4153 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4154 "use not simple.\n");
4157 if (op_type
== binary_op
)
4161 op1
= gimple_assign_rhs2 (stmt
);
4162 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4163 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4165 if (CONSTANT_CLASS_P (op0
))
4166 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
4168 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
4172 if (dump_enabled_p ())
4173 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4174 "use not simple.\n");
4179 /* If op0 is an external or constant defs use a vector type of
4180 the same size as the output vector type. */
4182 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4184 gcc_assert (vectype_in
);
4187 if (dump_enabled_p ())
4189 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4190 "no vectype for scalar type ");
4191 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4192 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4198 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4199 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4201 if (dump_enabled_p ())
4203 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4204 "can't convert between boolean and non "
4206 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4207 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4213 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4214 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4215 if (nunits_in
< nunits_out
)
4217 else if (nunits_out
== nunits_in
)
4222 /* Multiple types in SLP are handled by creating the appropriate number of
4223 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4227 else if (modifier
== NARROW
)
4228 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4230 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4232 /* Sanity check: make sure that at least one copy of the vectorized stmt
4233 needs to be generated. */
4234 gcc_assert (ncopies
>= 1);
4236 bool found_mode
= false;
4237 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4238 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4239 opt_scalar_mode rhs_mode_iter
;
4241 /* Supportable by target? */
4245 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4247 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4252 if (dump_enabled_p ())
4253 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4254 "conversion not supported by target.\n");
4258 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
4259 &code1
, &code2
, &multi_step_cvt
,
4262 /* Binary widening operation can only be supported directly by the
4264 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4268 if (code
!= FLOAT_EXPR
4269 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4272 fltsz
= GET_MODE_SIZE (lhs_mode
);
4273 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4275 rhs_mode
= rhs_mode_iter
.require ();
4276 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4280 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4281 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4282 if (cvt_type
== NULL_TREE
)
4285 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4287 if (!supportable_convert_operation (code
, vectype_out
,
4288 cvt_type
, &decl1
, &codecvt1
))
4291 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
4292 cvt_type
, &codecvt1
,
4293 &codecvt2
, &multi_step_cvt
,
4297 gcc_assert (multi_step_cvt
== 0);
4299 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
4300 vectype_in
, &code1
, &code2
,
4301 &multi_step_cvt
, &interm_types
))
4311 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4312 codecvt2
= ERROR_MARK
;
4316 interm_types
.safe_push (cvt_type
);
4317 cvt_type
= NULL_TREE
;
4322 gcc_assert (op_type
== unary_op
);
4323 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4324 &code1
, &multi_step_cvt
,
4328 if (code
!= FIX_TRUNC_EXPR
4329 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4333 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4334 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4335 if (cvt_type
== NULL_TREE
)
4337 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4340 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4341 &code1
, &multi_step_cvt
,
4350 if (!vec_stmt
) /* transformation not required. */
4352 if (dump_enabled_p ())
4353 dump_printf_loc (MSG_NOTE
, vect_location
,
4354 "=== vectorizable_conversion ===\n");
4355 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4357 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4358 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
4360 else if (modifier
== NARROW
)
4362 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4363 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4367 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4368 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4370 interm_types
.release ();
4375 if (dump_enabled_p ())
4376 dump_printf_loc (MSG_NOTE
, vect_location
,
4377 "transform conversion. ncopies = %d.\n", ncopies
);
4379 if (op_type
== binary_op
)
4381 if (CONSTANT_CLASS_P (op0
))
4382 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4383 else if (CONSTANT_CLASS_P (op1
))
4384 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4387 /* In case of multi-step conversion, we first generate conversion operations
4388 to the intermediate types, and then from that types to the final one.
4389 We create vector destinations for the intermediate type (TYPES) received
4390 from supportable_*_operation, and store them in the correct order
4391 for future use in vect_create_vectorized_*_stmts (). */
4392 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4393 vec_dest
= vect_create_destination_var (scalar_dest
,
4394 (cvt_type
&& modifier
== WIDEN
)
4395 ? cvt_type
: vectype_out
);
4396 vec_dsts
.quick_push (vec_dest
);
4400 for (i
= interm_types
.length () - 1;
4401 interm_types
.iterate (i
, &intermediate_type
); i
--)
4403 vec_dest
= vect_create_destination_var (scalar_dest
,
4405 vec_dsts
.quick_push (vec_dest
);
4410 vec_dest
= vect_create_destination_var (scalar_dest
,
4412 ? vectype_out
: cvt_type
);
4416 if (modifier
== WIDEN
)
4418 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
4419 if (op_type
== binary_op
)
4420 vec_oprnds1
.create (1);
4422 else if (modifier
== NARROW
)
4423 vec_oprnds0
.create (
4424 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4426 else if (code
== WIDEN_LSHIFT_EXPR
)
4427 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4430 prev_stmt_info
= NULL
;
4434 for (j
= 0; j
< ncopies
; j
++)
4437 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
);
4439 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
4441 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4443 /* Arguments are ready, create the new vector stmt. */
4444 if (code1
== CALL_EXPR
)
4446 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4447 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4448 gimple_call_set_lhs (new_stmt
, new_temp
);
4452 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4453 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4454 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4455 gimple_assign_set_lhs (new_stmt
, new_temp
);
4458 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4460 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4463 if (!prev_stmt_info
)
4464 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4466 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4467 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4474 /* In case the vectorization factor (VF) is bigger than the number
4475 of elements that we can fit in a vectype (nunits), we have to
4476 generate more than one vector stmt - i.e - we need to "unroll"
4477 the vector stmt by a factor VF/nunits. */
4478 for (j
= 0; j
< ncopies
; j
++)
4485 if (code
== WIDEN_LSHIFT_EXPR
)
4490 /* Store vec_oprnd1 for every vector stmt to be created
4491 for SLP_NODE. We check during the analysis that all
4492 the shift arguments are the same. */
4493 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4494 vec_oprnds1
.quick_push (vec_oprnd1
);
4496 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4500 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4501 &vec_oprnds1
, slp_node
);
4505 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4506 vec_oprnds0
.quick_push (vec_oprnd0
);
4507 if (op_type
== binary_op
)
4509 if (code
== WIDEN_LSHIFT_EXPR
)
4512 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4513 vec_oprnds1
.quick_push (vec_oprnd1
);
4519 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4520 vec_oprnds0
.truncate (0);
4521 vec_oprnds0
.quick_push (vec_oprnd0
);
4522 if (op_type
== binary_op
)
4524 if (code
== WIDEN_LSHIFT_EXPR
)
4527 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4529 vec_oprnds1
.truncate (0);
4530 vec_oprnds1
.quick_push (vec_oprnd1
);
4534 /* Arguments are ready. Create the new vector stmts. */
4535 for (i
= multi_step_cvt
; i
>= 0; i
--)
4537 tree this_dest
= vec_dsts
[i
];
4538 enum tree_code c1
= code1
, c2
= code2
;
4539 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4544 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4546 stmt
, this_dest
, gsi
,
4547 c1
, c2
, decl1
, decl2
,
4551 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4555 if (codecvt1
== CALL_EXPR
)
4557 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4558 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4559 gimple_call_set_lhs (new_stmt
, new_temp
);
4563 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4564 new_temp
= make_ssa_name (vec_dest
);
4565 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4569 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4572 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4575 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4578 if (!prev_stmt_info
)
4579 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4581 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4582 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4587 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4591 /* In case the vectorization factor (VF) is bigger than the number
4592 of elements that we can fit in a vectype (nunits), we have to
4593 generate more than one vector stmt - i.e - we need to "unroll"
4594 the vector stmt by a factor VF/nunits. */
4595 for (j
= 0; j
< ncopies
; j
++)
4599 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4603 vec_oprnds0
.truncate (0);
4604 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4605 vect_pow2 (multi_step_cvt
) - 1);
4608 /* Arguments are ready. Create the new vector stmts. */
4610 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4612 if (codecvt1
== CALL_EXPR
)
4614 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4615 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4616 gimple_call_set_lhs (new_stmt
, new_temp
);
4620 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4621 new_temp
= make_ssa_name (vec_dest
);
4622 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4626 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4627 vec_oprnds0
[i
] = new_temp
;
4630 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4631 stmt
, vec_dsts
, gsi
,
4636 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4640 vec_oprnds0
.release ();
4641 vec_oprnds1
.release ();
4642 interm_types
.release ();
4648 /* Function vectorizable_assignment.
4650 Check if STMT performs an assignment (copy) that can be vectorized.
4651 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4652 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4653 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4656 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4657 gimple
**vec_stmt
, slp_tree slp_node
)
4662 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4663 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4666 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
4670 vec
<tree
> vec_oprnds
= vNULL
;
4672 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4673 vec_info
*vinfo
= stmt_info
->vinfo
;
4674 gimple
*new_stmt
= NULL
;
4675 stmt_vec_info prev_stmt_info
= NULL
;
4676 enum tree_code code
;
4679 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4682 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4686 /* Is vectorizable assignment? */
4687 if (!is_gimple_assign (stmt
))
4690 scalar_dest
= gimple_assign_lhs (stmt
);
4691 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4694 code
= gimple_assign_rhs_code (stmt
);
4695 if (gimple_assign_single_p (stmt
)
4696 || code
== PAREN_EXPR
4697 || CONVERT_EXPR_CODE_P (code
))
4698 op
= gimple_assign_rhs1 (stmt
);
4702 if (code
== VIEW_CONVERT_EXPR
)
4703 op
= TREE_OPERAND (op
, 0);
4705 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4706 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4708 /* Multiple types in SLP are handled by creating the appropriate number of
4709 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4714 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
4716 gcc_assert (ncopies
>= 1);
4718 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4720 if (dump_enabled_p ())
4721 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4722 "use not simple.\n");
4726 /* We can handle NOP_EXPR conversions that do not change the number
4727 of elements or the vector size. */
4728 if ((CONVERT_EXPR_CODE_P (code
)
4729 || code
== VIEW_CONVERT_EXPR
)
4731 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4732 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4733 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4736 /* We do not handle bit-precision changes. */
4737 if ((CONVERT_EXPR_CODE_P (code
)
4738 || code
== VIEW_CONVERT_EXPR
)
4739 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4740 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
4741 || !type_has_mode_precision_p (TREE_TYPE (op
)))
4742 /* But a conversion that does not change the bit-pattern is ok. */
4743 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4744 > TYPE_PRECISION (TREE_TYPE (op
)))
4745 && TYPE_UNSIGNED (TREE_TYPE (op
)))
4746 /* Conversion between boolean types of different sizes is
4747 a simple assignment in case their vectypes are same
4749 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
4750 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
4752 if (dump_enabled_p ())
4753 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4754 "type conversion to/from bit-precision "
4759 if (!vec_stmt
) /* transformation not required. */
4761 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4762 if (dump_enabled_p ())
4763 dump_printf_loc (MSG_NOTE
, vect_location
,
4764 "=== vectorizable_assignment ===\n");
4765 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
4770 if (dump_enabled_p ())
4771 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4774 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4777 for (j
= 0; j
< ncopies
; j
++)
4781 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
4783 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4785 /* Arguments are ready. create the new vector stmt. */
4786 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4788 if (CONVERT_EXPR_CODE_P (code
)
4789 || code
== VIEW_CONVERT_EXPR
)
4790 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4791 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4792 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4793 gimple_assign_set_lhs (new_stmt
, new_temp
);
4794 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4796 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4803 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4805 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4807 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4810 vec_oprnds
.release ();
4815 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4816 either as shift by a scalar or by a vector. */
4819 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4822 machine_mode vec_mode
;
4827 vectype
= get_vectype_for_scalar_type (scalar_type
);
4831 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4833 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4835 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4837 || (optab_handler (optab
, TYPE_MODE (vectype
))
4838 == CODE_FOR_nothing
))
4842 vec_mode
= TYPE_MODE (vectype
);
4843 icode
= (int) optab_handler (optab
, vec_mode
);
4844 if (icode
== CODE_FOR_nothing
)
4851 /* Function vectorizable_shift.
4853 Check if STMT performs a shift operation that can be vectorized.
4854 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4855 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4856 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4859 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4860 gimple
**vec_stmt
, slp_tree slp_node
)
4864 tree op0
, op1
= NULL
;
4865 tree vec_oprnd1
= NULL_TREE
;
4866 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4868 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4869 enum tree_code code
;
4870 machine_mode vec_mode
;
4874 machine_mode optab_op2_mode
;
4876 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4878 gimple
*new_stmt
= NULL
;
4879 stmt_vec_info prev_stmt_info
;
4886 vec
<tree
> vec_oprnds0
= vNULL
;
4887 vec
<tree
> vec_oprnds1
= vNULL
;
4890 bool scalar_shift_arg
= true;
4891 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4892 vec_info
*vinfo
= stmt_info
->vinfo
;
4894 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4897 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4901 /* Is STMT a vectorizable binary/unary operation? */
4902 if (!is_gimple_assign (stmt
))
4905 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4908 code
= gimple_assign_rhs_code (stmt
);
4910 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4911 || code
== RROTATE_EXPR
))
4914 scalar_dest
= gimple_assign_lhs (stmt
);
4915 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4916 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
4918 if (dump_enabled_p ())
4919 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4920 "bit-precision shifts not supported.\n");
4924 op0
= gimple_assign_rhs1 (stmt
);
4925 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4927 if (dump_enabled_p ())
4928 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4929 "use not simple.\n");
4932 /* If op0 is an external or constant def use a vector type with
4933 the same size as the output vector type. */
4935 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4937 gcc_assert (vectype
);
4940 if (dump_enabled_p ())
4941 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4942 "no vectype for scalar type\n");
4946 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4947 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4948 if (nunits_out
!= nunits_in
)
4951 op1
= gimple_assign_rhs2 (stmt
);
4952 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
4954 if (dump_enabled_p ())
4955 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4956 "use not simple.\n");
4960 /* Multiple types in SLP are handled by creating the appropriate number of
4961 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4966 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
4968 gcc_assert (ncopies
>= 1);
4970 /* Determine whether the shift amount is a vector, or scalar. If the
4971 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4973 if ((dt
[1] == vect_internal_def
4974 || dt
[1] == vect_induction_def
)
4976 scalar_shift_arg
= false;
4977 else if (dt
[1] == vect_constant_def
4978 || dt
[1] == vect_external_def
4979 || dt
[1] == vect_internal_def
)
4981 /* In SLP, need to check whether the shift count is the same,
4982 in loops if it is a constant or invariant, it is always
4986 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4989 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4990 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4991 scalar_shift_arg
= false;
4994 /* If the shift amount is computed by a pattern stmt we cannot
4995 use the scalar amount directly thus give up and use a vector
4997 if (dt
[1] == vect_internal_def
)
4999 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
5000 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
5001 scalar_shift_arg
= false;
5006 if (dump_enabled_p ())
5007 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5008 "operand mode requires invariant argument.\n");
5012 /* Vector shifted by vector. */
5013 if (!scalar_shift_arg
)
5015 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5016 if (dump_enabled_p ())
5017 dump_printf_loc (MSG_NOTE
, vect_location
,
5018 "vector/vector shift/rotate found.\n");
5021 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
5022 if (op1_vectype
== NULL_TREE
5023 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
5025 if (dump_enabled_p ())
5026 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5027 "unusable type for last operand in"
5028 " vector/vector shift/rotate.\n");
5032 /* See if the machine has a vector shifted by scalar insn and if not
5033 then see if it has a vector shifted by vector insn. */
5036 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5038 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5040 if (dump_enabled_p ())
5041 dump_printf_loc (MSG_NOTE
, vect_location
,
5042 "vector/scalar shift/rotate found.\n");
5046 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5048 && (optab_handler (optab
, TYPE_MODE (vectype
))
5049 != CODE_FOR_nothing
))
5051 scalar_shift_arg
= false;
5053 if (dump_enabled_p ())
5054 dump_printf_loc (MSG_NOTE
, vect_location
,
5055 "vector/vector shift/rotate found.\n");
5057 /* Unlike the other binary operators, shifts/rotates have
5058 the rhs being int, instead of the same type as the lhs,
5059 so make sure the scalar is the right type if we are
5060 dealing with vectors of long long/long/short/char. */
5061 if (dt
[1] == vect_constant_def
)
5062 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5063 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5067 && TYPE_MODE (TREE_TYPE (vectype
))
5068 != TYPE_MODE (TREE_TYPE (op1
)))
5070 if (dump_enabled_p ())
5071 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5072 "unusable type for last operand in"
5073 " vector/vector shift/rotate.\n");
5076 if (vec_stmt
&& !slp_node
)
5078 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5079 op1
= vect_init_vector (stmt
, op1
,
5080 TREE_TYPE (vectype
), NULL
);
5087 /* Supportable by target? */
5090 if (dump_enabled_p ())
5091 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5095 vec_mode
= TYPE_MODE (vectype
);
5096 icode
= (int) optab_handler (optab
, vec_mode
);
5097 if (icode
== CODE_FOR_nothing
)
5099 if (dump_enabled_p ())
5100 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5101 "op not supported by target.\n");
5102 /* Check only during analysis. */
5103 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5105 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5107 if (dump_enabled_p ())
5108 dump_printf_loc (MSG_NOTE
, vect_location
,
5109 "proceeding using word mode.\n");
5112 /* Worthwhile without SIMD support? Check only during analysis. */
5114 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5115 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5117 if (dump_enabled_p ())
5118 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5119 "not worthwhile without SIMD support.\n");
5123 if (!vec_stmt
) /* transformation not required. */
5125 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5126 if (dump_enabled_p ())
5127 dump_printf_loc (MSG_NOTE
, vect_location
,
5128 "=== vectorizable_shift ===\n");
5129 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5135 if (dump_enabled_p ())
5136 dump_printf_loc (MSG_NOTE
, vect_location
,
5137 "transform binary/unary operation.\n");
5140 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5142 prev_stmt_info
= NULL
;
5143 for (j
= 0; j
< ncopies
; j
++)
5148 if (scalar_shift_arg
)
5150 /* Vector shl and shr insn patterns can be defined with scalar
5151 operand 2 (shift operand). In this case, use constant or loop
5152 invariant op1 directly, without extending it to vector mode
5154 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5155 if (!VECTOR_MODE_P (optab_op2_mode
))
5157 if (dump_enabled_p ())
5158 dump_printf_loc (MSG_NOTE
, vect_location
,
5159 "operand 1 using scalar mode.\n");
5161 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5162 vec_oprnds1
.quick_push (vec_oprnd1
);
5165 /* Store vec_oprnd1 for every vector stmt to be created
5166 for SLP_NODE. We check during the analysis that all
5167 the shift arguments are the same.
5168 TODO: Allow different constants for different vector
5169 stmts generated for an SLP instance. */
5170 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5171 vec_oprnds1
.quick_push (vec_oprnd1
);
5176 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5177 (a special case for certain kind of vector shifts); otherwise,
5178 operand 1 should be of a vector type (the usual case). */
5180 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5183 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5187 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5189 /* Arguments are ready. Create the new vector stmt. */
5190 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5192 vop1
= vec_oprnds1
[i
];
5193 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5194 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5195 gimple_assign_set_lhs (new_stmt
, new_temp
);
5196 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5198 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5205 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5207 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5208 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5211 vec_oprnds0
.release ();
5212 vec_oprnds1
.release ();
5218 /* Function vectorizable_operation.
5220 Check if STMT performs a binary, unary or ternary operation that can
5222 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5223 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5224 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5227 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5228 gimple
**vec_stmt
, slp_tree slp_node
)
5232 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5233 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5235 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5236 enum tree_code code
, orig_code
;
5237 machine_mode vec_mode
;
5241 bool target_support_p
;
5243 enum vect_def_type dt
[3]
5244 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5246 gimple
*new_stmt
= NULL
;
5247 stmt_vec_info prev_stmt_info
;
5253 vec
<tree
> vec_oprnds0
= vNULL
;
5254 vec
<tree
> vec_oprnds1
= vNULL
;
5255 vec
<tree
> vec_oprnds2
= vNULL
;
5256 tree vop0
, vop1
, vop2
;
5257 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5258 vec_info
*vinfo
= stmt_info
->vinfo
;
5260 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5263 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5267 /* Is STMT a vectorizable binary/unary operation? */
5268 if (!is_gimple_assign (stmt
))
5271 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5274 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5276 /* For pointer addition and subtraction, we should use the normal
5277 plus and minus for the vector operation. */
5278 if (code
== POINTER_PLUS_EXPR
)
5280 if (code
== POINTER_DIFF_EXPR
)
5283 /* Support only unary or binary operations. */
5284 op_type
= TREE_CODE_LENGTH (code
);
5285 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5287 if (dump_enabled_p ())
5288 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5289 "num. args = %d (not unary/binary/ternary op).\n",
5294 scalar_dest
= gimple_assign_lhs (stmt
);
5295 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5297 /* Most operations cannot handle bit-precision types without extra
5299 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5300 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5301 /* Exception are bitwise binary operations. */
5302 && code
!= BIT_IOR_EXPR
5303 && code
!= BIT_XOR_EXPR
5304 && code
!= BIT_AND_EXPR
)
5306 if (dump_enabled_p ())
5307 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5308 "bit-precision arithmetic not supported.\n");
5312 op0
= gimple_assign_rhs1 (stmt
);
5313 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
5315 if (dump_enabled_p ())
5316 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5317 "use not simple.\n");
5320 /* If op0 is an external or constant def use a vector type with
5321 the same size as the output vector type. */
5324 /* For boolean type we cannot determine vectype by
5325 invariant value (don't know whether it is a vector
5326 of booleans or vector of integers). We use output
5327 vectype because operations on boolean don't change
5329 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5331 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5333 if (dump_enabled_p ())
5334 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5335 "not supported operation on bool value.\n");
5338 vectype
= vectype_out
;
5341 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5344 gcc_assert (vectype
);
5347 if (dump_enabled_p ())
5349 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5350 "no vectype for scalar type ");
5351 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5353 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5359 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5360 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5361 if (nunits_out
!= nunits_in
)
5364 if (op_type
== binary_op
|| op_type
== ternary_op
)
5366 op1
= gimple_assign_rhs2 (stmt
);
5367 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
5369 if (dump_enabled_p ())
5370 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5371 "use not simple.\n");
5375 if (op_type
== ternary_op
)
5377 op2
= gimple_assign_rhs3 (stmt
);
5378 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
5380 if (dump_enabled_p ())
5381 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5382 "use not simple.\n");
5387 /* Multiple types in SLP are handled by creating the appropriate number of
5388 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5393 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5395 gcc_assert (ncopies
>= 1);
5397 /* Shifts are handled in vectorizable_shift (). */
5398 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5399 || code
== RROTATE_EXPR
)
5402 /* Supportable by target? */
5404 vec_mode
= TYPE_MODE (vectype
);
5405 if (code
== MULT_HIGHPART_EXPR
)
5406 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5409 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5412 if (dump_enabled_p ())
5413 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5417 target_support_p
= (optab_handler (optab
, vec_mode
)
5418 != CODE_FOR_nothing
);
5421 if (!target_support_p
)
5423 if (dump_enabled_p ())
5424 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5425 "op not supported by target.\n");
5426 /* Check only during analysis. */
5427 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5428 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
5430 if (dump_enabled_p ())
5431 dump_printf_loc (MSG_NOTE
, vect_location
,
5432 "proceeding using word mode.\n");
5435 /* Worthwhile without SIMD support? Check only during analysis. */
5436 if (!VECTOR_MODE_P (vec_mode
)
5438 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5440 if (dump_enabled_p ())
5441 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5442 "not worthwhile without SIMD support.\n");
5446 if (!vec_stmt
) /* transformation not required. */
5448 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5449 if (dump_enabled_p ())
5450 dump_printf_loc (MSG_NOTE
, vect_location
,
5451 "=== vectorizable_operation ===\n");
5452 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5458 if (dump_enabled_p ())
5459 dump_printf_loc (MSG_NOTE
, vect_location
,
5460 "transform binary/unary operation.\n");
5463 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5465 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5466 vectors with unsigned elements, but the result is signed. So, we
5467 need to compute the MINUS_EXPR into vectype temporary and
5468 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5469 tree vec_cvt_dest
= NULL_TREE
;
5470 if (orig_code
== POINTER_DIFF_EXPR
)
5471 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
5473 /* In case the vectorization factor (VF) is bigger than the number
5474 of elements that we can fit in a vectype (nunits), we have to generate
5475 more than one vector stmt - i.e - we need to "unroll" the
5476 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5477 from one copy of the vector stmt to the next, in the field
5478 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5479 stages to find the correct vector defs to be used when vectorizing
5480 stmts that use the defs of the current stmt. The example below
5481 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5482 we need to create 4 vectorized stmts):
5484 before vectorization:
5485 RELATED_STMT VEC_STMT
5489 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5491 RELATED_STMT VEC_STMT
5492 VS1_0: vx0 = memref0 VS1_1 -
5493 VS1_1: vx1 = memref1 VS1_2 -
5494 VS1_2: vx2 = memref2 VS1_3 -
5495 VS1_3: vx3 = memref3 - -
5496 S1: x = load - VS1_0
5499 step2: vectorize stmt S2 (done here):
5500 To vectorize stmt S2 we first need to find the relevant vector
5501 def for the first operand 'x'. This is, as usual, obtained from
5502 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5503 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5504 relevant vector def 'vx0'. Having found 'vx0' we can generate
5505 the vector stmt VS2_0, and as usual, record it in the
5506 STMT_VINFO_VEC_STMT of stmt S2.
5507 When creating the second copy (VS2_1), we obtain the relevant vector
5508 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5509 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5510 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5511 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5512 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5513 chain of stmts and pointers:
5514 RELATED_STMT VEC_STMT
5515 VS1_0: vx0 = memref0 VS1_1 -
5516 VS1_1: vx1 = memref1 VS1_2 -
5517 VS1_2: vx2 = memref2 VS1_3 -
5518 VS1_3: vx3 = memref3 - -
5519 S1: x = load - VS1_0
5520 VS2_0: vz0 = vx0 + v1 VS2_1 -
5521 VS2_1: vz1 = vx1 + v1 VS2_2 -
5522 VS2_2: vz2 = vx2 + v1 VS2_3 -
5523 VS2_3: vz3 = vx3 + v1 - -
5524 S2: z = x + 1 - VS2_0 */
5526 prev_stmt_info
= NULL
;
5527 for (j
= 0; j
< ncopies
; j
++)
5532 if (op_type
== binary_op
|| op_type
== ternary_op
)
5533 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5536 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5538 if (op_type
== ternary_op
)
5539 vect_get_vec_defs (op2
, NULL_TREE
, stmt
, &vec_oprnds2
, NULL
,
5544 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5545 if (op_type
== ternary_op
)
5547 tree vec_oprnd
= vec_oprnds2
.pop ();
5548 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5553 /* Arguments are ready. Create the new vector stmt. */
5554 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5556 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5557 ? vec_oprnds1
[i
] : NULL_TREE
);
5558 vop2
= ((op_type
== ternary_op
)
5559 ? vec_oprnds2
[i
] : NULL_TREE
);
5560 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5561 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5562 gimple_assign_set_lhs (new_stmt
, new_temp
);
5563 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5566 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
5567 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
5569 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
5570 gimple_assign_set_lhs (new_stmt
, new_temp
);
5571 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5574 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5581 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5583 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5584 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5587 vec_oprnds0
.release ();
5588 vec_oprnds1
.release ();
5589 vec_oprnds2
.release ();
5594 /* A helper function to ensure data reference DR's base alignment. */
5597 ensure_base_align (struct data_reference
*dr
)
5602 if (DR_VECT_AUX (dr
)->base_misaligned
)
5604 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5606 unsigned int align_base_to
= DR_TARGET_ALIGNMENT (dr
) * BITS_PER_UNIT
;
5608 if (decl_in_symtab_p (base_decl
))
5609 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
5612 SET_DECL_ALIGN (base_decl
, align_base_to
);
5613 DECL_USER_ALIGN (base_decl
) = 1;
5615 DR_VECT_AUX (dr
)->base_misaligned
= false;
5620 /* Function get_group_alias_ptr_type.
5622 Return the alias type for the group starting at FIRST_STMT. */
5625 get_group_alias_ptr_type (gimple
*first_stmt
)
5627 struct data_reference
*first_dr
, *next_dr
;
5630 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5631 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt
));
5634 next_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt
));
5635 if (get_alias_set (DR_REF (first_dr
))
5636 != get_alias_set (DR_REF (next_dr
)))
5638 if (dump_enabled_p ())
5639 dump_printf_loc (MSG_NOTE
, vect_location
,
5640 "conflicting alias set types.\n");
5641 return ptr_type_node
;
5643 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5645 return reference_alias_ptr_type (DR_REF (first_dr
));
5649 /* Function vectorizable_store.
5651 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5653 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5654 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5655 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5658 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5664 tree vec_oprnd
= NULL_TREE
;
5665 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5666 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5668 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5669 struct loop
*loop
= NULL
;
5670 machine_mode vec_mode
;
5672 enum dr_alignment_support alignment_support_scheme
;
5674 enum vect_def_type dt
;
5675 stmt_vec_info prev_stmt_info
= NULL
;
5676 tree dataref_ptr
= NULL_TREE
;
5677 tree dataref_offset
= NULL_TREE
;
5678 gimple
*ptr_incr
= NULL
;
5681 gimple
*next_stmt
, *first_stmt
;
5683 unsigned int group_size
, i
;
5684 vec
<tree
> oprnds
= vNULL
;
5685 vec
<tree
> result_chain
= vNULL
;
5687 tree offset
= NULL_TREE
;
5688 vec
<tree
> vec_oprnds
= vNULL
;
5689 bool slp
= (slp_node
!= NULL
);
5690 unsigned int vec_num
;
5691 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5692 vec_info
*vinfo
= stmt_info
->vinfo
;
5694 gather_scatter_info gs_info
;
5695 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5698 vec_load_store_type vls_type
;
5701 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5704 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5708 /* Is vectorizable store? */
5710 if (!is_gimple_assign (stmt
))
5713 scalar_dest
= gimple_assign_lhs (stmt
);
5714 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5715 && is_pattern_stmt_p (stmt_info
))
5716 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5717 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5718 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5719 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5720 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5721 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5722 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5723 && TREE_CODE (scalar_dest
) != MEM_REF
)
5726 /* Cannot have hybrid store SLP -- that would mean storing to the
5727 same location twice. */
5728 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
5730 gcc_assert (gimple_assign_single_p (stmt
));
5732 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
5733 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5737 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5738 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5743 /* Multiple types in SLP are handled by creating the appropriate number of
5744 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5749 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5751 gcc_assert (ncopies
>= 1);
5753 /* FORNOW. This restriction should be relaxed. */
5754 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5756 if (dump_enabled_p ())
5757 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5758 "multiple types in nested loop.\n");
5762 op
= gimple_assign_rhs1 (stmt
);
5764 /* In the case this is a store from a constant make sure
5765 native_encode_expr can handle it. */
5766 if (CONSTANT_CLASS_P (op
) && native_encode_expr (op
, NULL
, 64) == 0)
5769 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
5771 if (dump_enabled_p ())
5772 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5773 "use not simple.\n");
5777 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
5778 vls_type
= VLS_STORE_INVARIANT
;
5780 vls_type
= VLS_STORE
;
5782 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
5785 elem_type
= TREE_TYPE (vectype
);
5786 vec_mode
= TYPE_MODE (vectype
);
5788 /* FORNOW. In some cases can vectorize even if data-type not supported
5789 (e.g. - array initialization with 0). */
5790 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5793 if (!STMT_VINFO_DATA_REF (stmt_info
))
5796 vect_memory_access_type memory_access_type
;
5797 if (!get_load_store_type (stmt
, vectype
, slp
, vls_type
, ncopies
,
5798 &memory_access_type
, &gs_info
))
5801 if (!vec_stmt
) /* transformation not required. */
5803 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
5804 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5805 /* The SLP costs are calculated during SLP analysis. */
5806 if (!PURE_SLP_STMT (stmt_info
))
5807 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
, dt
,
5811 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
5815 ensure_base_align (dr
);
5817 if (memory_access_type
== VMAT_GATHER_SCATTER
)
5819 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, op
, src
;
5820 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
5821 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5822 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5823 edge pe
= loop_preheader_edge (loop
);
5826 enum { NARROW
, NONE
, WIDEN
} modifier
;
5827 int scatter_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
5829 if (nunits
== (unsigned int) scatter_off_nunits
)
5831 else if (nunits
== (unsigned int) scatter_off_nunits
/ 2)
5835 vec_perm_builder
sel (scatter_off_nunits
, scatter_off_nunits
, 1);
5836 for (i
= 0; i
< (unsigned int) scatter_off_nunits
; ++i
)
5837 sel
.quick_push (i
| nunits
);
5839 vec_perm_indices
indices (sel
, 1, scatter_off_nunits
);
5840 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
5842 gcc_assert (perm_mask
!= NULL_TREE
);
5844 else if (nunits
== (unsigned int) scatter_off_nunits
* 2)
5848 vec_perm_builder
sel (nunits
, nunits
, 1);
5849 for (i
= 0; i
< (unsigned int) nunits
; ++i
)
5850 sel
.quick_push (i
| scatter_off_nunits
);
5852 vec_perm_indices
indices (sel
, 2, nunits
);
5853 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
5854 gcc_assert (perm_mask
!= NULL_TREE
);
5860 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
5861 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5862 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5863 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5864 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5865 scaletype
= TREE_VALUE (arglist
);
5867 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
5868 && TREE_CODE (rettype
) == VOID_TYPE
);
5870 ptr
= fold_convert (ptrtype
, gs_info
.base
);
5871 if (!is_gimple_min_invariant (ptr
))
5873 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5874 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5875 gcc_assert (!new_bb
);
5878 /* Currently we support only unconditional scatter stores,
5879 so mask should be all ones. */
5880 mask
= build_int_cst (masktype
, -1);
5881 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5883 scale
= build_int_cst (scaletype
, gs_info
.scale
);
5885 prev_stmt_info
= NULL
;
5886 for (j
= 0; j
< ncopies
; ++j
)
5891 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt
), stmt
);
5893 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
5895 else if (modifier
!= NONE
&& (j
& 1))
5897 if (modifier
== WIDEN
)
5900 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5901 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
5904 else if (modifier
== NARROW
)
5906 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
5909 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5918 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5920 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
5924 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
5926 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
))
5927 == TYPE_VECTOR_SUBPARTS (srctype
));
5928 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
5929 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
5930 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
5931 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5935 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5937 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5938 == TYPE_VECTOR_SUBPARTS (idxtype
));
5939 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
5940 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5941 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
5942 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5947 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask
, op
, src
, scale
);
5949 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5951 if (prev_stmt_info
== NULL
)
5952 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5954 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5955 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5960 grouped_store
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
5963 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5964 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5965 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5967 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5970 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5972 /* We vectorize all the stmts of the interleaving group when we
5973 reach the last stmt in the group. */
5974 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5975 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5984 grouped_store
= false;
5985 /* VEC_NUM is the number of vect stmts to be created for this
5987 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5988 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5989 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
5990 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5991 op
= gimple_assign_rhs1 (first_stmt
);
5994 /* VEC_NUM is the number of vect stmts to be created for this
5996 vec_num
= group_size
;
5998 ref_type
= get_group_alias_ptr_type (first_stmt
);
6004 group_size
= vec_num
= 1;
6005 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
6008 if (dump_enabled_p ())
6009 dump_printf_loc (MSG_NOTE
, vect_location
,
6010 "transform store. ncopies = %d\n", ncopies
);
6012 if (memory_access_type
== VMAT_ELEMENTWISE
6013 || memory_access_type
== VMAT_STRIDED_SLP
)
6015 gimple_stmt_iterator incr_gsi
;
6021 gimple_seq stmts
= NULL
;
6022 tree stride_base
, stride_step
, alias_off
;
6026 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
6029 = fold_build_pointer_plus
6030 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
6031 size_binop (PLUS_EXPR
,
6032 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
6033 convert_to_ptrofftype (DR_INIT (first_dr
))));
6034 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
6036 /* For a store with loop-invariant (but other than power-of-2)
6037 stride (i.e. not a grouped access) like so:
6039 for (i = 0; i < n; i += stride)
6042 we generate a new induction variable and new stores from
6043 the components of the (vectorized) rhs:
6045 for (j = 0; ; j += VF*stride)
6050 array[j + stride] = tmp2;
6054 unsigned nstores
= nunits
;
6056 tree ltype
= elem_type
;
6057 tree lvectype
= vectype
;
6060 if (group_size
< nunits
6061 && nunits
% group_size
== 0)
6063 nstores
= nunits
/ group_size
;
6065 ltype
= build_vector_type (elem_type
, group_size
);
6068 /* First check if vec_extract optab doesn't support extraction
6069 of vector elts directly. */
6070 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
6072 if (!mode_for_vector (elmode
, group_size
).exists (&vmode
)
6073 || !VECTOR_MODE_P (vmode
)
6074 || (convert_optab_handler (vec_extract_optab
,
6075 TYPE_MODE (vectype
), vmode
)
6076 == CODE_FOR_nothing
))
6078 /* Try to avoid emitting an extract of vector elements
6079 by performing the extracts using an integer type of the
6080 same size, extracting from a vector of those and then
6081 re-interpreting it as the original vector type if
6084 = group_size
* GET_MODE_BITSIZE (elmode
);
6085 elmode
= int_mode_for_size (lsize
, 0).require ();
6086 /* If we can't construct such a vector fall back to
6087 element extracts from the original vector type and
6088 element size stores. */
6089 if (mode_for_vector (elmode
,
6090 nunits
/ group_size
).exists (&vmode
)
6091 && VECTOR_MODE_P (vmode
)
6092 && (convert_optab_handler (vec_extract_optab
,
6094 != CODE_FOR_nothing
))
6096 nstores
= nunits
/ group_size
;
6098 ltype
= build_nonstandard_integer_type (lsize
, 1);
6099 lvectype
= build_vector_type (ltype
, nstores
);
6101 /* Else fall back to vector extraction anyway.
6102 Fewer stores are more important than avoiding spilling
6103 of the vector we extract from. Compared to the
6104 construction case in vectorizable_load no store-forwarding
6105 issue exists here for reasonable archs. */
6108 else if (group_size
>= nunits
6109 && group_size
% nunits
== 0)
6116 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
6117 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6120 ivstep
= stride_step
;
6121 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6122 build_int_cst (TREE_TYPE (ivstep
), vf
));
6124 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6126 create_iv (stride_base
, ivstep
, NULL
,
6127 loop
, &incr_gsi
, insert_after
,
6129 incr
= gsi_stmt (incr_gsi
);
6130 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6132 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6134 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6136 prev_stmt_info
= NULL
;
6137 alias_off
= build_int_cst (ref_type
, 0);
6138 next_stmt
= first_stmt
;
6139 for (g
= 0; g
< group_size
; g
++)
6141 running_off
= offvar
;
6144 tree size
= TYPE_SIZE_UNIT (ltype
);
6145 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
6147 tree newoff
= copy_ssa_name (running_off
, NULL
);
6148 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6150 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6151 running_off
= newoff
;
6153 unsigned int group_el
= 0;
6154 unsigned HOST_WIDE_INT
6155 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6156 for (j
= 0; j
< ncopies
; j
++)
6158 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6159 and first_stmt == stmt. */
6164 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
6166 vec_oprnd
= vec_oprnds
[0];
6170 gcc_assert (gimple_assign_single_p (next_stmt
));
6171 op
= gimple_assign_rhs1 (next_stmt
);
6172 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6178 vec_oprnd
= vec_oprnds
[j
];
6181 vect_is_simple_use (vec_oprnd
, vinfo
, &def_stmt
, &dt
);
6182 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
6185 /* Pun the vector to extract from if necessary. */
6186 if (lvectype
!= vectype
)
6188 tree tem
= make_ssa_name (lvectype
);
6190 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
6191 lvectype
, vec_oprnd
));
6192 vect_finish_stmt_generation (stmt
, pun
, gsi
);
6195 for (i
= 0; i
< nstores
; i
++)
6197 tree newref
, newoff
;
6198 gimple
*incr
, *assign
;
6199 tree size
= TYPE_SIZE (ltype
);
6200 /* Extract the i'th component. */
6201 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
6202 bitsize_int (i
), size
);
6203 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
6206 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
6210 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6212 newref
= build2 (MEM_REF
, ltype
,
6213 running_off
, this_off
);
6215 /* And store it to *running_off. */
6216 assign
= gimple_build_assign (newref
, elem
);
6217 vect_finish_stmt_generation (stmt
, assign
, gsi
);
6221 || group_el
== group_size
)
6223 newoff
= copy_ssa_name (running_off
, NULL
);
6224 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6225 running_off
, stride_step
);
6226 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6228 running_off
= newoff
;
6231 if (g
== group_size
- 1
6234 if (j
== 0 && i
== 0)
6235 STMT_VINFO_VEC_STMT (stmt_info
)
6236 = *vec_stmt
= assign
;
6238 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
6239 prev_stmt_info
= vinfo_for_stmt (assign
);
6243 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6248 vec_oprnds
.release ();
6252 auto_vec
<tree
> dr_chain (group_size
);
6253 oprnds
.create (group_size
);
6255 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6256 gcc_assert (alignment_support_scheme
);
6257 /* Targets with store-lane instructions must not require explicit
6259 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
6260 || alignment_support_scheme
== dr_aligned
6261 || alignment_support_scheme
== dr_unaligned_supported
);
6263 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
6264 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6265 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6267 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6268 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6270 aggr_type
= vectype
;
6272 /* In case the vectorization factor (VF) is bigger than the number
6273 of elements that we can fit in a vectype (nunits), we have to generate
6274 more than one vector stmt - i.e - we need to "unroll" the
6275 vector stmt by a factor VF/nunits. For more details see documentation in
6276 vect_get_vec_def_for_copy_stmt. */
6278 /* In case of interleaving (non-unit grouped access):
6285 We create vectorized stores starting from base address (the access of the
6286 first stmt in the chain (S2 in the above example), when the last store stmt
6287 of the chain (S4) is reached:
6290 VS2: &base + vec_size*1 = vx0
6291 VS3: &base + vec_size*2 = vx1
6292 VS4: &base + vec_size*3 = vx3
6294 Then permutation statements are generated:
6296 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6297 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6300 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6301 (the order of the data-refs in the output of vect_permute_store_chain
6302 corresponds to the order of scalar stmts in the interleaving chain - see
6303 the documentation of vect_permute_store_chain()).
6305 In case of both multiple types and interleaving, above vector stores and
6306 permutation stmts are created for every copy. The result vector stmts are
6307 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6308 STMT_VINFO_RELATED_STMT for the next copies.
6311 prev_stmt_info
= NULL
;
6312 for (j
= 0; j
< ncopies
; j
++)
6319 /* Get vectorized arguments for SLP_NODE. */
6320 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
6323 vec_oprnd
= vec_oprnds
[0];
6327 /* For interleaved stores we collect vectorized defs for all the
6328 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6329 used as an input to vect_permute_store_chain(), and OPRNDS as
6330 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6332 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6333 OPRNDS are of size 1. */
6334 next_stmt
= first_stmt
;
6335 for (i
= 0; i
< group_size
; i
++)
6337 /* Since gaps are not supported for interleaved stores,
6338 GROUP_SIZE is the exact number of stmts in the chain.
6339 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6340 there is no interleaving, GROUP_SIZE is 1, and only one
6341 iteration of the loop will be executed. */
6342 gcc_assert (next_stmt
6343 && gimple_assign_single_p (next_stmt
));
6344 op
= gimple_assign_rhs1 (next_stmt
);
6346 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6347 dr_chain
.quick_push (vec_oprnd
);
6348 oprnds
.quick_push (vec_oprnd
);
6349 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6353 /* We should have catched mismatched types earlier. */
6354 gcc_assert (useless_type_conversion_p (vectype
,
6355 TREE_TYPE (vec_oprnd
)));
6356 bool simd_lane_access_p
6357 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6358 if (simd_lane_access_p
6359 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6360 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6361 && integer_zerop (DR_OFFSET (first_dr
))
6362 && integer_zerop (DR_INIT (first_dr
))
6363 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6364 get_alias_set (TREE_TYPE (ref_type
))))
6366 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6367 dataref_offset
= build_int_cst (ref_type
, 0);
6372 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
6373 simd_lane_access_p
? loop
: NULL
,
6374 offset
, &dummy
, gsi
, &ptr_incr
,
6375 simd_lane_access_p
, &inv_p
);
6376 gcc_assert (bb_vinfo
|| !inv_p
);
6380 /* For interleaved stores we created vectorized defs for all the
6381 defs stored in OPRNDS in the previous iteration (previous copy).
6382 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6383 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6385 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6386 OPRNDS are of size 1. */
6387 for (i
= 0; i
< group_size
; i
++)
6390 vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
);
6391 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
6392 dr_chain
[i
] = vec_oprnd
;
6393 oprnds
[i
] = vec_oprnd
;
6397 = int_const_binop (PLUS_EXPR
, dataref_offset
,
6398 TYPE_SIZE_UNIT (aggr_type
));
6400 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6401 TYPE_SIZE_UNIT (aggr_type
));
6404 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6408 /* Combine all the vectors into an array. */
6409 vec_array
= create_vector_array (vectype
, vec_num
);
6410 for (i
= 0; i
< vec_num
; i
++)
6412 vec_oprnd
= dr_chain
[i
];
6413 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
6417 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6418 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
6419 gcall
*call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
6421 gimple_call_set_lhs (call
, data_ref
);
6422 gimple_call_set_nothrow (call
, true);
6424 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6432 result_chain
.create (group_size
);
6434 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
6438 next_stmt
= first_stmt
;
6439 for (i
= 0; i
< vec_num
; i
++)
6441 unsigned align
, misalign
;
6444 /* Bump the vector pointer. */
6445 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6449 vec_oprnd
= vec_oprnds
[i
];
6450 else if (grouped_store
)
6451 /* For grouped stores vectorized defs are interleaved in
6452 vect_permute_store_chain(). */
6453 vec_oprnd
= result_chain
[i
];
6455 data_ref
= fold_build2 (MEM_REF
, vectype
,
6459 : build_int_cst (ref_type
, 0));
6460 align
= DR_TARGET_ALIGNMENT (first_dr
);
6461 if (aligned_access_p (first_dr
))
6463 else if (DR_MISALIGNMENT (first_dr
) == -1)
6465 align
= dr_alignment (vect_dr_behavior (first_dr
));
6467 TREE_TYPE (data_ref
)
6468 = build_aligned_type (TREE_TYPE (data_ref
),
6469 align
* BITS_PER_UNIT
);
6473 TREE_TYPE (data_ref
)
6474 = build_aligned_type (TREE_TYPE (data_ref
),
6475 TYPE_ALIGN (elem_type
));
6476 misalign
= DR_MISALIGNMENT (first_dr
);
6478 if (dataref_offset
== NULL_TREE
6479 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6480 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
6483 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6485 tree perm_mask
= perm_mask_for_reverse (vectype
);
6487 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
6489 tree new_temp
= make_ssa_name (perm_dest
);
6491 /* Generate the permute statement. */
6493 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
6494 vec_oprnd
, perm_mask
);
6495 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6497 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6498 vec_oprnd
= new_temp
;
6501 /* Arguments are ready. Create the new vector stmt. */
6502 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
6503 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6508 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6516 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6518 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6519 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6524 result_chain
.release ();
6525 vec_oprnds
.release ();
6530 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6531 VECTOR_CST mask. No checks are made that the target platform supports the
6532 mask, so callers may wish to test can_vec_perm_const_p separately, or use
6533 vect_gen_perm_mask_checked. */
6536 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
6540 unsigned int nunits
= sel
.length ();
6541 gcc_assert (nunits
== TYPE_VECTOR_SUBPARTS (vectype
));
6543 mask_type
= build_vector_type (ssizetype
, nunits
);
6544 return vec_perm_indices_to_tree (mask_type
, sel
);
6547 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
6548 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6551 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
6553 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
6554 return vect_gen_perm_mask_any (vectype
, sel
);
6557 /* Given a vector variable X and Y, that was generated for the scalar
6558 STMT, generate instructions to permute the vector elements of X and Y
6559 using permutation mask MASK_VEC, insert them at *GSI and return the
6560 permuted vector variable. */
6563 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6564 gimple_stmt_iterator
*gsi
)
6566 tree vectype
= TREE_TYPE (x
);
6567 tree perm_dest
, data_ref
;
6570 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
6571 data_ref
= make_ssa_name (perm_dest
);
6573 /* Generate the permute statement. */
6574 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6575 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6580 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6581 inserting them on the loops preheader edge. Returns true if we
6582 were successful in doing so (and thus STMT can be moved then),
6583 otherwise returns false. */
6586 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6592 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6594 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6595 if (!gimple_nop_p (def_stmt
)
6596 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6598 /* Make sure we don't need to recurse. While we could do
6599 so in simple cases when there are more complex use webs
6600 we don't have an easy way to preserve stmt order to fulfil
6601 dependencies within them. */
6604 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6606 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6608 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6609 if (!gimple_nop_p (def_stmt2
)
6610 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6620 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6622 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6623 if (!gimple_nop_p (def_stmt
)
6624 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6626 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6627 gsi_remove (&gsi
, false);
6628 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6635 /* vectorizable_load.
6637 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6639 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6640 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6641 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6644 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6645 slp_tree slp_node
, slp_instance slp_node_instance
)
6648 tree vec_dest
= NULL
;
6649 tree data_ref
= NULL
;
6650 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6651 stmt_vec_info prev_stmt_info
;
6652 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6653 struct loop
*loop
= NULL
;
6654 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6655 bool nested_in_vect_loop
= false;
6656 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6660 gimple
*new_stmt
= NULL
;
6662 enum dr_alignment_support alignment_support_scheme
;
6663 tree dataref_ptr
= NULL_TREE
;
6664 tree dataref_offset
= NULL_TREE
;
6665 gimple
*ptr_incr
= NULL
;
6667 int i
, j
, group_size
, group_gap_adj
;
6668 tree msq
= NULL_TREE
, lsq
;
6669 tree offset
= NULL_TREE
;
6670 tree byte_offset
= NULL_TREE
;
6671 tree realignment_token
= NULL_TREE
;
6673 vec
<tree
> dr_chain
= vNULL
;
6674 bool grouped_load
= false;
6676 gimple
*first_stmt_for_drptr
= NULL
;
6678 bool compute_in_loop
= false;
6679 struct loop
*at_loop
;
6681 bool slp
= (slp_node
!= NULL
);
6682 bool slp_perm
= false;
6683 enum tree_code code
;
6684 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6687 gather_scatter_info gs_info
;
6688 vec_info
*vinfo
= stmt_info
->vinfo
;
6691 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6694 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6698 /* Is vectorizable load? */
6699 if (!is_gimple_assign (stmt
))
6702 scalar_dest
= gimple_assign_lhs (stmt
);
6703 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6706 code
= gimple_assign_rhs_code (stmt
);
6707 if (code
!= ARRAY_REF
6708 && code
!= BIT_FIELD_REF
6709 && code
!= INDIRECT_REF
6710 && code
!= COMPONENT_REF
6711 && code
!= IMAGPART_EXPR
6712 && code
!= REALPART_EXPR
6714 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6717 if (!STMT_VINFO_DATA_REF (stmt_info
))
6720 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6721 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6725 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6726 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6727 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6732 /* Multiple types in SLP are handled by creating the appropriate number of
6733 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6738 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6740 gcc_assert (ncopies
>= 1);
6742 /* FORNOW. This restriction should be relaxed. */
6743 if (nested_in_vect_loop
&& ncopies
> 1)
6745 if (dump_enabled_p ())
6746 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6747 "multiple types in nested loop.\n");
6751 /* Invalidate assumptions made by dependence analysis when vectorization
6752 on the unrolled body effectively re-orders stmts. */
6754 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6755 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6756 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6758 if (dump_enabled_p ())
6759 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6760 "cannot perform implicit CSE when unrolling "
6761 "with negative dependence distance\n");
6765 elem_type
= TREE_TYPE (vectype
);
6766 mode
= TYPE_MODE (vectype
);
6768 /* FORNOW. In some cases can vectorize even if data-type not supported
6769 (e.g. - data copies). */
6770 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
6772 if (dump_enabled_p ())
6773 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6774 "Aligned load, but unsupported type.\n");
6778 /* Check if the load is a part of an interleaving chain. */
6779 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6781 grouped_load
= true;
6783 gcc_assert (!nested_in_vect_loop
);
6784 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
6786 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6787 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6789 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6792 /* Invalidate assumptions made by dependence analysis when vectorization
6793 on the unrolled body effectively re-orders stmts. */
6794 if (!PURE_SLP_STMT (stmt_info
)
6795 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6796 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6797 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6799 if (dump_enabled_p ())
6800 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6801 "cannot perform implicit CSE when performing "
6802 "group loads with negative dependence distance\n");
6806 /* Similarly when the stmt is a load that is both part of a SLP
6807 instance and a loop vectorized stmt via the same-dr mechanism
6808 we have to give up. */
6809 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6810 && (STMT_SLP_TYPE (stmt_info
)
6811 != STMT_SLP_TYPE (vinfo_for_stmt
6812 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6814 if (dump_enabled_p ())
6815 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6816 "conflicting SLP types for CSEd load\n");
6821 vect_memory_access_type memory_access_type
;
6822 if (!get_load_store_type (stmt
, vectype
, slp
, VLS_LOAD
, ncopies
,
6823 &memory_access_type
, &gs_info
))
6826 if (!vec_stmt
) /* transformation not required. */
6829 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
6830 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6831 /* The SLP costs are calculated during SLP analysis. */
6832 if (!PURE_SLP_STMT (stmt_info
))
6833 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
6839 gcc_assert (memory_access_type
6840 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
6842 if (dump_enabled_p ())
6843 dump_printf_loc (MSG_NOTE
, vect_location
,
6844 "transform load. ncopies = %d\n", ncopies
);
6848 ensure_base_align (dr
);
6850 if (memory_access_type
== VMAT_GATHER_SCATTER
)
6852 tree vec_oprnd0
= NULL_TREE
, op
;
6853 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
6854 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6855 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6856 edge pe
= loop_preheader_edge (loop
);
6859 enum { NARROW
, NONE
, WIDEN
} modifier
;
6860 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
6862 if (nunits
== gather_off_nunits
)
6864 else if (nunits
== gather_off_nunits
/ 2)
6868 vec_perm_builder
sel (gather_off_nunits
, gather_off_nunits
, 1);
6869 for (i
= 0; i
< gather_off_nunits
; ++i
)
6870 sel
.quick_push (i
| nunits
);
6872 vec_perm_indices
indices (sel
, 1, gather_off_nunits
);
6873 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
6876 else if (nunits
== gather_off_nunits
* 2)
6880 vec_perm_builder
sel (nunits
, nunits
, 1);
6881 for (i
= 0; i
< nunits
; ++i
)
6882 sel
.quick_push (i
< gather_off_nunits
6883 ? i
: i
+ nunits
- gather_off_nunits
);
6885 vec_perm_indices
indices (sel
, 2, nunits
);
6886 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
6892 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6893 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6894 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6895 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6896 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6897 scaletype
= TREE_VALUE (arglist
);
6898 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6900 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6902 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6903 if (!is_gimple_min_invariant (ptr
))
6905 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6906 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6907 gcc_assert (!new_bb
);
6910 /* Currently we support only unconditional gather loads,
6911 so mask should be all ones. */
6912 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6913 mask
= build_int_cst (masktype
, -1);
6914 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6916 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6917 mask
= build_vector_from_val (masktype
, mask
);
6918 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6920 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6924 for (j
= 0; j
< 6; ++j
)
6926 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6927 mask
= build_real (TREE_TYPE (masktype
), r
);
6928 mask
= build_vector_from_val (masktype
, mask
);
6929 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6934 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6936 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6937 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6938 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6942 for (j
= 0; j
< 6; ++j
)
6944 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6945 merge
= build_real (TREE_TYPE (rettype
), r
);
6949 merge
= build_vector_from_val (rettype
, merge
);
6950 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6952 prev_stmt_info
= NULL
;
6953 for (j
= 0; j
< ncopies
; ++j
)
6955 if (modifier
== WIDEN
&& (j
& 1))
6956 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6957 perm_mask
, stmt
, gsi
);
6960 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
6963 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
, vec_oprnd0
);
6965 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6967 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6968 == TYPE_VECTOR_SUBPARTS (idxtype
));
6969 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6970 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6972 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6973 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6978 = gimple_build_call (gs_info
.decl
, 5, merge
, ptr
, op
, mask
, scale
);
6980 if (!useless_type_conversion_p (vectype
, rettype
))
6982 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6983 == TYPE_VECTOR_SUBPARTS (rettype
));
6984 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
6985 gimple_call_set_lhs (new_stmt
, op
);
6986 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6987 var
= make_ssa_name (vec_dest
);
6988 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6990 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6994 var
= make_ssa_name (vec_dest
, new_stmt
);
6995 gimple_call_set_lhs (new_stmt
, var
);
6998 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7000 if (modifier
== NARROW
)
7007 var
= permute_vec_elements (prev_res
, var
,
7008 perm_mask
, stmt
, gsi
);
7009 new_stmt
= SSA_NAME_DEF_STMT (var
);
7012 if (prev_stmt_info
== NULL
)
7013 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7015 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7016 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7021 if (memory_access_type
== VMAT_ELEMENTWISE
7022 || memory_access_type
== VMAT_STRIDED_SLP
)
7024 gimple_stmt_iterator incr_gsi
;
7030 vec
<constructor_elt
, va_gc
> *v
= NULL
;
7031 gimple_seq stmts
= NULL
;
7032 tree stride_base
, stride_step
, alias_off
;
7034 gcc_assert (!nested_in_vect_loop
);
7036 if (slp
&& grouped_load
)
7038 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7039 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7040 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7041 ref_type
= get_group_alias_ptr_type (first_stmt
);
7048 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7052 = fold_build_pointer_plus
7053 (DR_BASE_ADDRESS (first_dr
),
7054 size_binop (PLUS_EXPR
,
7055 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
7056 convert_to_ptrofftype (DR_INIT (first_dr
))));
7057 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
7059 /* For a load with loop-invariant (but other than power-of-2)
7060 stride (i.e. not a grouped access) like so:
7062 for (i = 0; i < n; i += stride)
7065 we generate a new induction variable and new accesses to
7066 form a new vector (or vectors, depending on ncopies):
7068 for (j = 0; ; j += VF*stride)
7070 tmp2 = array[j + stride];
7072 vectemp = {tmp1, tmp2, ...}
7075 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
7076 build_int_cst (TREE_TYPE (stride_step
), vf
));
7078 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7080 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
7081 loop
, &incr_gsi
, insert_after
,
7083 incr
= gsi_stmt (incr_gsi
);
7084 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
7086 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
7087 &stmts
, true, NULL_TREE
);
7089 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
7091 prev_stmt_info
= NULL
;
7092 running_off
= offvar
;
7093 alias_off
= build_int_cst (ref_type
, 0);
7094 int nloads
= nunits
;
7096 tree ltype
= TREE_TYPE (vectype
);
7097 tree lvectype
= vectype
;
7098 auto_vec
<tree
> dr_chain
;
7099 if (memory_access_type
== VMAT_STRIDED_SLP
)
7101 if (group_size
< nunits
)
7103 /* First check if vec_init optab supports construction from
7104 vector elts directly. */
7105 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
7107 if (mode_for_vector (elmode
, group_size
).exists (&vmode
)
7108 && VECTOR_MODE_P (vmode
)
7109 && (convert_optab_handler (vec_init_optab
,
7110 TYPE_MODE (vectype
), vmode
)
7111 != CODE_FOR_nothing
))
7113 nloads
= nunits
/ group_size
;
7115 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
7119 /* Otherwise avoid emitting a constructor of vector elements
7120 by performing the loads using an integer type of the same
7121 size, constructing a vector of those and then
7122 re-interpreting it as the original vector type.
7123 This avoids a huge runtime penalty due to the general
7124 inability to perform store forwarding from smaller stores
7125 to a larger load. */
7127 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
7128 elmode
= int_mode_for_size (lsize
, 0).require ();
7129 /* If we can't construct such a vector fall back to
7130 element loads of the original vector type. */
7131 if (mode_for_vector (elmode
,
7132 nunits
/ group_size
).exists (&vmode
)
7133 && VECTOR_MODE_P (vmode
)
7134 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
7135 != CODE_FOR_nothing
))
7137 nloads
= nunits
/ group_size
;
7139 ltype
= build_nonstandard_integer_type (lsize
, 1);
7140 lvectype
= build_vector_type (ltype
, nloads
);
7150 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
7154 /* For SLP permutation support we need to load the whole group,
7155 not only the number of vector stmts the permutation result
7159 ncopies
= (group_size
* vf
+ nunits
- 1) / nunits
;
7160 dr_chain
.create (ncopies
);
7163 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7166 unsigned HOST_WIDE_INT
7167 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7168 for (j
= 0; j
< ncopies
; j
++)
7171 vec_alloc (v
, nloads
);
7172 for (i
= 0; i
< nloads
; i
++)
7174 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7176 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
7177 build2 (MEM_REF
, ltype
,
7178 running_off
, this_off
));
7179 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7181 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
7182 gimple_assign_lhs (new_stmt
));
7186 || group_el
== group_size
)
7188 tree newoff
= copy_ssa_name (running_off
);
7189 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7190 running_off
, stride_step
);
7191 vect_finish_stmt_generation (stmt
, incr
, gsi
);
7193 running_off
= newoff
;
7199 tree vec_inv
= build_constructor (lvectype
, v
);
7200 new_temp
= vect_init_vector (stmt
, vec_inv
, lvectype
, gsi
);
7201 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7202 if (lvectype
!= vectype
)
7204 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
7206 build1 (VIEW_CONVERT_EXPR
,
7207 vectype
, new_temp
));
7208 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7215 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
7217 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7222 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7224 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7225 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7231 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7232 slp_node_instance
, false, &n_perms
);
7239 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7240 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7241 /* For SLP vectorization we directly vectorize a subchain
7242 without permutation. */
7243 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7244 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7245 /* For BB vectorization always use the first stmt to base
7246 the data ref pointer on. */
7248 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7250 /* Check if the chain of loads is already vectorized. */
7251 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
7252 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7253 ??? But we can only do so if there is exactly one
7254 as we have no way to get at the rest. Leave the CSE
7256 ??? With the group load eventually participating
7257 in multiple different permutations (having multiple
7258 slp nodes which refer to the same group) the CSE
7259 is even wrong code. See PR56270. */
7262 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7265 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7268 /* VEC_NUM is the number of vect stmts to be created for this group. */
7271 grouped_load
= false;
7272 /* For SLP permutation support we need to load the whole group,
7273 not only the number of vector stmts the permutation result
7277 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
7278 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
7282 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7284 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
7288 vec_num
= group_size
;
7290 ref_type
= get_group_alias_ptr_type (first_stmt
);
7296 group_size
= vec_num
= 1;
7298 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7301 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
7302 gcc_assert (alignment_support_scheme
);
7303 /* Targets with load-lane instructions must not require explicit
7305 gcc_assert (memory_access_type
!= VMAT_LOAD_STORE_LANES
7306 || alignment_support_scheme
== dr_aligned
7307 || alignment_support_scheme
== dr_unaligned_supported
);
7309 /* In case the vectorization factor (VF) is bigger than the number
7310 of elements that we can fit in a vectype (nunits), we have to generate
7311 more than one vector stmt - i.e - we need to "unroll" the
7312 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7313 from one copy of the vector stmt to the next, in the field
7314 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7315 stages to find the correct vector defs to be used when vectorizing
7316 stmts that use the defs of the current stmt. The example below
7317 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7318 need to create 4 vectorized stmts):
7320 before vectorization:
7321 RELATED_STMT VEC_STMT
7325 step 1: vectorize stmt S1:
7326 We first create the vector stmt VS1_0, and, as usual, record a
7327 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7328 Next, we create the vector stmt VS1_1, and record a pointer to
7329 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7330 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7332 RELATED_STMT VEC_STMT
7333 VS1_0: vx0 = memref0 VS1_1 -
7334 VS1_1: vx1 = memref1 VS1_2 -
7335 VS1_2: vx2 = memref2 VS1_3 -
7336 VS1_3: vx3 = memref3 - -
7337 S1: x = load - VS1_0
7340 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7341 information we recorded in RELATED_STMT field is used to vectorize
7344 /* In case of interleaving (non-unit grouped access):
7351 Vectorized loads are created in the order of memory accesses
7352 starting from the access of the first stmt of the chain:
7355 VS2: vx1 = &base + vec_size*1
7356 VS3: vx3 = &base + vec_size*2
7357 VS4: vx4 = &base + vec_size*3
7359 Then permutation statements are generated:
7361 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7362 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7365 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7366 (the order of the data-refs in the output of vect_permute_load_chain
7367 corresponds to the order of scalar stmts in the interleaving chain - see
7368 the documentation of vect_permute_load_chain()).
7369 The generation of permutation stmts and recording them in
7370 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7372 In case of both multiple types and interleaving, the vector loads and
7373 permutation stmts above are created for every copy. The result vector
7374 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7375 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7377 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7378 on a target that supports unaligned accesses (dr_unaligned_supported)
7379 we generate the following code:
7383 p = p + indx * vectype_size;
7388 Otherwise, the data reference is potentially unaligned on a target that
7389 does not support unaligned accesses (dr_explicit_realign_optimized) -
7390 then generate the following code, in which the data in each iteration is
7391 obtained by two vector loads, one from the previous iteration, and one
7392 from the current iteration:
7394 msq_init = *(floor(p1))
7395 p2 = initial_addr + VS - 1;
7396 realignment_token = call target_builtin;
7399 p2 = p2 + indx * vectype_size
7401 vec_dest = realign_load (msq, lsq, realignment_token)
7406 /* If the misalignment remains the same throughout the execution of the
7407 loop, we can create the init_addr and permutation mask at the loop
7408 preheader. Otherwise, it needs to be created inside the loop.
7409 This can only occur when vectorizing memory accesses in the inner-loop
7410 nested within an outer-loop that is being vectorized. */
7412 if (nested_in_vect_loop
7413 && (DR_STEP_ALIGNMENT (dr
) % GET_MODE_SIZE (TYPE_MODE (vectype
))) != 0)
7415 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
7416 compute_in_loop
= true;
7419 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7420 || alignment_support_scheme
== dr_explicit_realign
)
7421 && !compute_in_loop
)
7423 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7424 alignment_support_scheme
, NULL_TREE
,
7426 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7428 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7429 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7436 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7437 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7439 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7440 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7442 aggr_type
= vectype
;
7444 prev_stmt_info
= NULL
;
7446 for (j
= 0; j
< ncopies
; j
++)
7448 /* 1. Create the vector or array pointer update chain. */
7451 bool simd_lane_access_p
7452 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7453 if (simd_lane_access_p
7454 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7455 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7456 && integer_zerop (DR_OFFSET (first_dr
))
7457 && integer_zerop (DR_INIT (first_dr
))
7458 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7459 get_alias_set (TREE_TYPE (ref_type
)))
7460 && (alignment_support_scheme
== dr_aligned
7461 || alignment_support_scheme
== dr_unaligned_supported
))
7463 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7464 dataref_offset
= build_int_cst (ref_type
, 0);
7467 else if (first_stmt_for_drptr
7468 && first_stmt
!= first_stmt_for_drptr
)
7471 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
7472 at_loop
, offset
, &dummy
, gsi
,
7473 &ptr_incr
, simd_lane_access_p
,
7474 &inv_p
, byte_offset
);
7475 /* Adjust the pointer by the difference to first_stmt. */
7476 data_reference_p ptrdr
7477 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
7478 tree diff
= fold_convert (sizetype
,
7479 size_binop (MINUS_EXPR
,
7482 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7487 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
7488 offset
, &dummy
, gsi
, &ptr_incr
,
7489 simd_lane_access_p
, &inv_p
,
7492 else if (dataref_offset
)
7493 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
7494 TYPE_SIZE_UNIT (aggr_type
));
7496 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7497 TYPE_SIZE_UNIT (aggr_type
));
7499 if (grouped_load
|| slp_perm
)
7500 dr_chain
.create (vec_num
);
7502 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7506 vec_array
= create_vector_array (vectype
, vec_num
);
7509 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7510 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7511 gcall
*call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1,
7513 gimple_call_set_lhs (call
, vec_array
);
7514 gimple_call_set_nothrow (call
, true);
7516 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7518 /* Extract each vector into an SSA_NAME. */
7519 for (i
= 0; i
< vec_num
; i
++)
7521 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
7523 dr_chain
.quick_push (new_temp
);
7526 /* Record the mapping between SSA_NAMEs and statements. */
7527 vect_record_grouped_load_vectors (stmt
, dr_chain
);
7531 for (i
= 0; i
< vec_num
; i
++)
7534 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7537 /* 2. Create the vector-load in the loop. */
7538 switch (alignment_support_scheme
)
7541 case dr_unaligned_supported
:
7543 unsigned int align
, misalign
;
7546 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
7549 : build_int_cst (ref_type
, 0));
7550 align
= DR_TARGET_ALIGNMENT (dr
);
7551 if (alignment_support_scheme
== dr_aligned
)
7553 gcc_assert (aligned_access_p (first_dr
));
7556 else if (DR_MISALIGNMENT (first_dr
) == -1)
7558 align
= dr_alignment (vect_dr_behavior (first_dr
));
7560 TREE_TYPE (data_ref
)
7561 = build_aligned_type (TREE_TYPE (data_ref
),
7562 align
* BITS_PER_UNIT
);
7566 TREE_TYPE (data_ref
)
7567 = build_aligned_type (TREE_TYPE (data_ref
),
7568 TYPE_ALIGN (elem_type
));
7569 misalign
= DR_MISALIGNMENT (first_dr
);
7571 if (dataref_offset
== NULL_TREE
7572 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7573 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7577 case dr_explicit_realign
:
7581 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7583 if (compute_in_loop
)
7584 msq
= vect_setup_realignment (first_stmt
, gsi
,
7586 dr_explicit_realign
,
7589 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7590 ptr
= copy_ssa_name (dataref_ptr
);
7592 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7593 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
7594 new_stmt
= gimple_build_assign
7595 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7597 (TREE_TYPE (dataref_ptr
),
7598 -(HOST_WIDE_INT
) align
));
7599 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7601 = build2 (MEM_REF
, vectype
, ptr
,
7602 build_int_cst (ref_type
, 0));
7603 vec_dest
= vect_create_destination_var (scalar_dest
,
7605 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7606 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7607 gimple_assign_set_lhs (new_stmt
, new_temp
);
7608 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7609 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7610 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7613 bump
= size_binop (MULT_EXPR
, vs
,
7614 TYPE_SIZE_UNIT (elem_type
));
7615 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7616 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7617 new_stmt
= gimple_build_assign
7618 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7620 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
7621 ptr
= copy_ssa_name (ptr
, new_stmt
);
7622 gimple_assign_set_lhs (new_stmt
, ptr
);
7623 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7625 = build2 (MEM_REF
, vectype
, ptr
,
7626 build_int_cst (ref_type
, 0));
7629 case dr_explicit_realign_optimized
:
7631 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7632 new_temp
= copy_ssa_name (dataref_ptr
);
7634 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7635 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
7636 new_stmt
= gimple_build_assign
7637 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7638 build_int_cst (TREE_TYPE (dataref_ptr
),
7639 -(HOST_WIDE_INT
) align
));
7640 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7642 = build2 (MEM_REF
, vectype
, new_temp
,
7643 build_int_cst (ref_type
, 0));
7649 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7650 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7651 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7652 gimple_assign_set_lhs (new_stmt
, new_temp
);
7653 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7655 /* 3. Handle explicit realignment if necessary/supported.
7657 vec_dest = realign_load (msq, lsq, realignment_token) */
7658 if (alignment_support_scheme
== dr_explicit_realign_optimized
7659 || alignment_support_scheme
== dr_explicit_realign
)
7661 lsq
= gimple_assign_lhs (new_stmt
);
7662 if (!realignment_token
)
7663 realignment_token
= dataref_ptr
;
7664 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7665 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7666 msq
, lsq
, realignment_token
);
7667 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7668 gimple_assign_set_lhs (new_stmt
, new_temp
);
7669 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7671 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7674 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7675 add_phi_arg (phi
, lsq
,
7676 loop_latch_edge (containing_loop
),
7682 /* 4. Handle invariant-load. */
7683 if (inv_p
&& !bb_vinfo
)
7685 gcc_assert (!grouped_load
);
7686 /* If we have versioned for aliasing or the loop doesn't
7687 have any data dependencies that would preclude this,
7688 then we are sure this is a loop invariant load and
7689 thus we can insert it on the preheader edge. */
7690 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7691 && !nested_in_vect_loop
7692 && hoist_defs_of_uses (stmt
, loop
))
7694 if (dump_enabled_p ())
7696 dump_printf_loc (MSG_NOTE
, vect_location
,
7697 "hoisting out of the vectorized "
7699 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7701 tree tem
= copy_ssa_name (scalar_dest
);
7702 gsi_insert_on_edge_immediate
7703 (loop_preheader_edge (loop
),
7704 gimple_build_assign (tem
,
7706 (gimple_assign_rhs1 (stmt
))));
7707 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7708 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7709 set_vinfo_for_stmt (new_stmt
,
7710 new_stmt_vec_info (new_stmt
, vinfo
));
7714 gimple_stmt_iterator gsi2
= *gsi
;
7716 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7718 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7722 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7724 tree perm_mask
= perm_mask_for_reverse (vectype
);
7725 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7726 perm_mask
, stmt
, gsi
);
7727 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7730 /* Collect vector loads and later create their permutation in
7731 vect_transform_grouped_load (). */
7732 if (grouped_load
|| slp_perm
)
7733 dr_chain
.quick_push (new_temp
);
7735 /* Store vector loads in the corresponding SLP_NODE. */
7736 if (slp
&& !slp_perm
)
7737 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7739 /* With SLP permutation we load the gaps as well, without
7740 we need to skip the gaps after we manage to fully load
7741 all elements. group_gap_adj is GROUP_SIZE here. */
7742 group_elt
+= nunits
;
7743 if (group_gap_adj
!= 0 && ! slp_perm
7744 && group_elt
== group_size
- group_gap_adj
)
7746 wide_int bump_val
= (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
7748 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
7749 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7754 /* Bump the vector pointer to account for a gap or for excess
7755 elements loaded for a permuted SLP load. */
7756 if (group_gap_adj
!= 0 && slp_perm
)
7758 wide_int bump_val
= (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
7760 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
7761 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7766 if (slp
&& !slp_perm
)
7772 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7773 slp_node_instance
, false,
7776 dr_chain
.release ();
7784 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
7785 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7786 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7791 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7793 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7794 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7797 dr_chain
.release ();
7803 /* Function vect_is_simple_cond.
7806 LOOP - the loop that is being vectorized.
7807 COND - Condition that is checked for simple use.
7810 *COMP_VECTYPE - the vector type for the comparison.
7811 *DTS - The def types for the arguments of the comparison
7813 Returns whether a COND can be vectorized. Checks whether
7814 condition operands are supportable using vec_is_simple_use. */
7817 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
7818 tree
*comp_vectype
, enum vect_def_type
*dts
,
7822 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7825 if (TREE_CODE (cond
) == SSA_NAME
7826 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
7828 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
7829 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
7830 &dts
[0], comp_vectype
)
7832 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
7837 if (!COMPARISON_CLASS_P (cond
))
7840 lhs
= TREE_OPERAND (cond
, 0);
7841 rhs
= TREE_OPERAND (cond
, 1);
7843 if (TREE_CODE (lhs
) == SSA_NAME
)
7845 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
7846 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dts
[0], &vectype1
))
7849 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
7850 || TREE_CODE (lhs
) == FIXED_CST
)
7851 dts
[0] = vect_constant_def
;
7855 if (TREE_CODE (rhs
) == SSA_NAME
)
7857 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7858 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dts
[1], &vectype2
))
7861 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
7862 || TREE_CODE (rhs
) == FIXED_CST
)
7863 dts
[1] = vect_constant_def
;
7867 if (vectype1
&& vectype2
7868 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7871 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7872 /* Invariant comparison. */
7873 if (! *comp_vectype
)
7875 tree scalar_type
= TREE_TYPE (lhs
);
7876 /* If we can widen the comparison to match vectype do so. */
7877 if (INTEGRAL_TYPE_P (scalar_type
)
7878 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
7879 TYPE_SIZE (TREE_TYPE (vectype
))))
7880 scalar_type
= build_nonstandard_integer_type
7881 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype
))),
7882 TYPE_UNSIGNED (scalar_type
));
7883 *comp_vectype
= get_vectype_for_scalar_type (scalar_type
);
7889 /* vectorizable_condition.
7891 Check if STMT is conditional modify expression that can be vectorized.
7892 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7893 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7896 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7897 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7898 else clause if it is 2).
7900 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7903 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7904 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
7907 tree scalar_dest
= NULL_TREE
;
7908 tree vec_dest
= NULL_TREE
;
7909 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
7910 tree then_clause
, else_clause
;
7911 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7912 tree comp_vectype
= NULL_TREE
;
7913 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7914 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7917 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7918 enum vect_def_type dts
[4]
7919 = {vect_unknown_def_type
, vect_unknown_def_type
,
7920 vect_unknown_def_type
, vect_unknown_def_type
};
7923 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
7924 stmt_vec_info prev_stmt_info
= NULL
;
7926 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7927 vec
<tree
> vec_oprnds0
= vNULL
;
7928 vec
<tree
> vec_oprnds1
= vNULL
;
7929 vec
<tree
> vec_oprnds2
= vNULL
;
7930 vec
<tree
> vec_oprnds3
= vNULL
;
7932 bool masked
= false;
7934 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7937 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == TREE_CODE_REDUCTION
)
7939 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7942 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7943 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7947 /* FORNOW: not yet supported. */
7948 if (STMT_VINFO_LIVE_P (stmt_info
))
7950 if (dump_enabled_p ())
7951 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7952 "value used after loop.\n");
7957 /* Is vectorizable conditional operation? */
7958 if (!is_gimple_assign (stmt
))
7961 code
= gimple_assign_rhs_code (stmt
);
7963 if (code
!= COND_EXPR
)
7966 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7967 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7972 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7974 gcc_assert (ncopies
>= 1);
7975 if (reduc_index
&& ncopies
> 1)
7976 return false; /* FORNOW */
7978 cond_expr
= gimple_assign_rhs1 (stmt
);
7979 then_clause
= gimple_assign_rhs2 (stmt
);
7980 else_clause
= gimple_assign_rhs3 (stmt
);
7982 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
7983 &comp_vectype
, &dts
[0], vectype
)
7988 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dts
[2],
7991 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dts
[3],
7995 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
7998 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
8001 masked
= !COMPARISON_CLASS_P (cond_expr
);
8002 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
8004 if (vec_cmp_type
== NULL_TREE
)
8007 cond_code
= TREE_CODE (cond_expr
);
8010 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
8011 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
8014 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
8016 /* Boolean values may have another representation in vectors
8017 and therefore we prefer bit operations over comparison for
8018 them (which also works for scalar masks). We store opcodes
8019 to use in bitop1 and bitop2. Statement is vectorized as
8020 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8021 depending on bitop1 and bitop2 arity. */
8025 bitop1
= BIT_NOT_EXPR
;
8026 bitop2
= BIT_AND_EXPR
;
8029 bitop1
= BIT_NOT_EXPR
;
8030 bitop2
= BIT_IOR_EXPR
;
8033 bitop1
= BIT_NOT_EXPR
;
8034 bitop2
= BIT_AND_EXPR
;
8035 std::swap (cond_expr0
, cond_expr1
);
8038 bitop1
= BIT_NOT_EXPR
;
8039 bitop2
= BIT_IOR_EXPR
;
8040 std::swap (cond_expr0
, cond_expr1
);
8043 bitop1
= BIT_XOR_EXPR
;
8046 bitop1
= BIT_XOR_EXPR
;
8047 bitop2
= BIT_NOT_EXPR
;
8052 cond_code
= SSA_NAME
;
8057 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
8058 if (bitop1
!= NOP_EXPR
)
8060 machine_mode mode
= TYPE_MODE (comp_vectype
);
8063 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
8064 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8067 if (bitop2
!= NOP_EXPR
)
8069 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
8071 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8075 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
8078 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, NULL
, NULL
);
8088 vec_oprnds0
.create (1);
8089 vec_oprnds1
.create (1);
8090 vec_oprnds2
.create (1);
8091 vec_oprnds3
.create (1);
8095 scalar_dest
= gimple_assign_lhs (stmt
);
8096 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8098 /* Handle cond expr. */
8099 for (j
= 0; j
< ncopies
; j
++)
8101 gassign
*new_stmt
= NULL
;
8106 auto_vec
<tree
, 4> ops
;
8107 auto_vec
<vec
<tree
>, 4> vec_defs
;
8110 ops
.safe_push (cond_expr
);
8113 ops
.safe_push (cond_expr0
);
8114 ops
.safe_push (cond_expr1
);
8116 ops
.safe_push (then_clause
);
8117 ops
.safe_push (else_clause
);
8118 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8119 vec_oprnds3
= vec_defs
.pop ();
8120 vec_oprnds2
= vec_defs
.pop ();
8122 vec_oprnds1
= vec_defs
.pop ();
8123 vec_oprnds0
= vec_defs
.pop ();
8131 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
8133 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
8139 = vect_get_vec_def_for_operand (cond_expr0
,
8140 stmt
, comp_vectype
);
8141 vect_is_simple_use (cond_expr0
, loop_vinfo
, >emp
, &dts
[0]);
8144 = vect_get_vec_def_for_operand (cond_expr1
,
8145 stmt
, comp_vectype
);
8146 vect_is_simple_use (cond_expr1
, loop_vinfo
, >emp
, &dts
[1]);
8148 if (reduc_index
== 1)
8149 vec_then_clause
= reduc_def
;
8152 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
8154 vect_is_simple_use (then_clause
, loop_vinfo
,
8157 if (reduc_index
== 2)
8158 vec_else_clause
= reduc_def
;
8161 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
8163 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
8170 = vect_get_vec_def_for_stmt_copy (dts
[0],
8171 vec_oprnds0
.pop ());
8174 = vect_get_vec_def_for_stmt_copy (dts
[1],
8175 vec_oprnds1
.pop ());
8177 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
8178 vec_oprnds2
.pop ());
8179 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
8180 vec_oprnds3
.pop ());
8185 vec_oprnds0
.quick_push (vec_cond_lhs
);
8187 vec_oprnds1
.quick_push (vec_cond_rhs
);
8188 vec_oprnds2
.quick_push (vec_then_clause
);
8189 vec_oprnds3
.quick_push (vec_else_clause
);
8192 /* Arguments are ready. Create the new vector stmt. */
8193 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
8195 vec_then_clause
= vec_oprnds2
[i
];
8196 vec_else_clause
= vec_oprnds3
[i
];
8199 vec_compare
= vec_cond_lhs
;
8202 vec_cond_rhs
= vec_oprnds1
[i
];
8203 if (bitop1
== NOP_EXPR
)
8204 vec_compare
= build2 (cond_code
, vec_cmp_type
,
8205 vec_cond_lhs
, vec_cond_rhs
);
8208 new_temp
= make_ssa_name (vec_cmp_type
);
8209 if (bitop1
== BIT_NOT_EXPR
)
8210 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
8214 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
8216 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8217 if (bitop2
== NOP_EXPR
)
8218 vec_compare
= new_temp
;
8219 else if (bitop2
== BIT_NOT_EXPR
)
8221 /* Instead of doing ~x ? y : z do x ? z : y. */
8222 vec_compare
= new_temp
;
8223 std::swap (vec_then_clause
, vec_else_clause
);
8227 vec_compare
= make_ssa_name (vec_cmp_type
);
8229 = gimple_build_assign (vec_compare
, bitop2
,
8230 vec_cond_lhs
, new_temp
);
8231 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8235 new_temp
= make_ssa_name (vec_dest
);
8236 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
8237 vec_compare
, vec_then_clause
,
8239 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8241 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8248 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8250 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8252 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8255 vec_oprnds0
.release ();
8256 vec_oprnds1
.release ();
8257 vec_oprnds2
.release ();
8258 vec_oprnds3
.release ();
8263 /* vectorizable_comparison.
8265 Check if STMT is comparison expression that can be vectorized.
8266 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8267 comparison, put it in VEC_STMT, and insert it at GSI.
8269 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8272 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8273 gimple
**vec_stmt
, tree reduc_def
,
8276 tree lhs
, rhs1
, rhs2
;
8277 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8278 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8279 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8280 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
8282 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8283 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
8287 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8288 stmt_vec_info prev_stmt_info
= NULL
;
8290 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8291 vec
<tree
> vec_oprnds0
= vNULL
;
8292 vec
<tree
> vec_oprnds1
= vNULL
;
8297 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8300 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
8303 mask_type
= vectype
;
8304 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8309 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8311 gcc_assert (ncopies
>= 1);
8312 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8313 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8317 if (STMT_VINFO_LIVE_P (stmt_info
))
8319 if (dump_enabled_p ())
8320 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8321 "value used after loop.\n");
8325 if (!is_gimple_assign (stmt
))
8328 code
= gimple_assign_rhs_code (stmt
);
8330 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
8333 rhs1
= gimple_assign_rhs1 (stmt
);
8334 rhs2
= gimple_assign_rhs2 (stmt
);
8336 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
8337 &dts
[0], &vectype1
))
8340 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
8341 &dts
[1], &vectype2
))
8344 if (vectype1
&& vectype2
8345 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
8348 vectype
= vectype1
? vectype1
: vectype2
;
8350 /* Invariant comparison. */
8353 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
8354 if (TYPE_VECTOR_SUBPARTS (vectype
) != nunits
)
8357 else if (nunits
!= TYPE_VECTOR_SUBPARTS (vectype
))
8360 /* Can't compare mask and non-mask types. */
8361 if (vectype1
&& vectype2
8362 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
8365 /* Boolean values may have another representation in vectors
8366 and therefore we prefer bit operations over comparison for
8367 them (which also works for scalar masks). We store opcodes
8368 to use in bitop1 and bitop2. Statement is vectorized as
8369 BITOP2 (rhs1 BITOP1 rhs2) or
8370 rhs1 BITOP2 (BITOP1 rhs2)
8371 depending on bitop1 and bitop2 arity. */
8372 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
8374 if (code
== GT_EXPR
)
8376 bitop1
= BIT_NOT_EXPR
;
8377 bitop2
= BIT_AND_EXPR
;
8379 else if (code
== GE_EXPR
)
8381 bitop1
= BIT_NOT_EXPR
;
8382 bitop2
= BIT_IOR_EXPR
;
8384 else if (code
== LT_EXPR
)
8386 bitop1
= BIT_NOT_EXPR
;
8387 bitop2
= BIT_AND_EXPR
;
8388 std::swap (rhs1
, rhs2
);
8389 std::swap (dts
[0], dts
[1]);
8391 else if (code
== LE_EXPR
)
8393 bitop1
= BIT_NOT_EXPR
;
8394 bitop2
= BIT_IOR_EXPR
;
8395 std::swap (rhs1
, rhs2
);
8396 std::swap (dts
[0], dts
[1]);
8400 bitop1
= BIT_XOR_EXPR
;
8401 if (code
== EQ_EXPR
)
8402 bitop2
= BIT_NOT_EXPR
;
8408 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
8409 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
8410 dts
, ndts
, NULL
, NULL
);
8411 if (bitop1
== NOP_EXPR
)
8412 return expand_vec_cmp_expr_p (vectype
, mask_type
, code
);
8415 machine_mode mode
= TYPE_MODE (vectype
);
8418 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
8419 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8422 if (bitop2
!= NOP_EXPR
)
8424 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
8425 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8435 vec_oprnds0
.create (1);
8436 vec_oprnds1
.create (1);
8440 lhs
= gimple_assign_lhs (stmt
);
8441 mask
= vect_create_destination_var (lhs
, mask_type
);
8443 /* Handle cmp expr. */
8444 for (j
= 0; j
< ncopies
; j
++)
8446 gassign
*new_stmt
= NULL
;
8451 auto_vec
<tree
, 2> ops
;
8452 auto_vec
<vec
<tree
>, 2> vec_defs
;
8454 ops
.safe_push (rhs1
);
8455 ops
.safe_push (rhs2
);
8456 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8457 vec_oprnds1
= vec_defs
.pop ();
8458 vec_oprnds0
= vec_defs
.pop ();
8462 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
8463 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
8468 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
8469 vec_oprnds0
.pop ());
8470 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
8471 vec_oprnds1
.pop ());
8476 vec_oprnds0
.quick_push (vec_rhs1
);
8477 vec_oprnds1
.quick_push (vec_rhs2
);
8480 /* Arguments are ready. Create the new vector stmt. */
8481 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
8483 vec_rhs2
= vec_oprnds1
[i
];
8485 new_temp
= make_ssa_name (mask
);
8486 if (bitop1
== NOP_EXPR
)
8488 new_stmt
= gimple_build_assign (new_temp
, code
,
8489 vec_rhs1
, vec_rhs2
);
8490 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8494 if (bitop1
== BIT_NOT_EXPR
)
8495 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
8497 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
8499 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8500 if (bitop2
!= NOP_EXPR
)
8502 tree res
= make_ssa_name (mask
);
8503 if (bitop2
== BIT_NOT_EXPR
)
8504 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
8506 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
8508 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8512 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8519 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8521 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8523 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8526 vec_oprnds0
.release ();
8527 vec_oprnds1
.release ();
8532 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8533 can handle all live statements in the node. Otherwise return true
8534 if STMT is not live or if vectorizable_live_operation can handle it.
8535 GSI and VEC_STMT are as for vectorizable_live_operation. */
8538 can_vectorize_live_stmts (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8539 slp_tree slp_node
, gimple
**vec_stmt
)
8545 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt
)
8547 stmt_vec_info slp_stmt_info
= vinfo_for_stmt (slp_stmt
);
8548 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
8549 && !vectorizable_live_operation (slp_stmt
, gsi
, slp_node
, i
,
8554 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt
))
8555 && !vectorizable_live_operation (stmt
, gsi
, slp_node
, -1, vec_stmt
))
8561 /* Make sure the statement is vectorizable. */
8564 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
,
8565 slp_instance node_instance
)
8567 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8568 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8569 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
8571 gimple
*pattern_stmt
;
8572 gimple_seq pattern_def_seq
;
8574 if (dump_enabled_p ())
8576 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
8577 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8580 if (gimple_has_volatile_ops (stmt
))
8582 if (dump_enabled_p ())
8583 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8584 "not vectorized: stmt has volatile operands\n");
8589 /* Skip stmts that do not need to be vectorized. In loops this is expected
8591 - the COND_EXPR which is the loop exit condition
8592 - any LABEL_EXPRs in the loop
8593 - computations that are used only for array indexing or loop control.
8594 In basic blocks we only analyze statements that are a part of some SLP
8595 instance, therefore, all the statements are relevant.
8597 Pattern statement needs to be analyzed instead of the original statement
8598 if the original statement is not relevant. Otherwise, we analyze both
8599 statements. In basic blocks we are called from some SLP instance
8600 traversal, don't analyze pattern stmts instead, the pattern stmts
8601 already will be part of SLP instance. */
8603 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
8604 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
8605 && !STMT_VINFO_LIVE_P (stmt_info
))
8607 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8609 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8610 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8612 /* Analyze PATTERN_STMT instead of the original stmt. */
8613 stmt
= pattern_stmt
;
8614 stmt_info
= vinfo_for_stmt (pattern_stmt
);
8615 if (dump_enabled_p ())
8617 dump_printf_loc (MSG_NOTE
, vect_location
,
8618 "==> examining pattern statement: ");
8619 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8624 if (dump_enabled_p ())
8625 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
8630 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8633 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
8634 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
8636 /* Analyze PATTERN_STMT too. */
8637 if (dump_enabled_p ())
8639 dump_printf_loc (MSG_NOTE
, vect_location
,
8640 "==> examining pattern statement: ");
8641 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8644 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
,
8649 if (is_pattern_stmt_p (stmt_info
)
8651 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
8653 gimple_stmt_iterator si
;
8655 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
8657 gimple
*pattern_def_stmt
= gsi_stmt (si
);
8658 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
8659 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
8661 /* Analyze def stmt of STMT if it's a pattern stmt. */
8662 if (dump_enabled_p ())
8664 dump_printf_loc (MSG_NOTE
, vect_location
,
8665 "==> examining pattern def statement: ");
8666 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
8669 if (!vect_analyze_stmt (pattern_def_stmt
,
8670 need_to_vectorize
, node
, node_instance
))
8676 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
8678 case vect_internal_def
:
8681 case vect_reduction_def
:
8682 case vect_nested_cycle
:
8683 gcc_assert (!bb_vinfo
8684 && (relevance
== vect_used_in_outer
8685 || relevance
== vect_used_in_outer_by_reduction
8686 || relevance
== vect_used_by_reduction
8687 || relevance
== vect_unused_in_scope
8688 || relevance
== vect_used_only_live
));
8691 case vect_induction_def
:
8692 gcc_assert (!bb_vinfo
);
8695 case vect_constant_def
:
8696 case vect_external_def
:
8697 case vect_unknown_def_type
:
8702 if (STMT_VINFO_RELEVANT_P (stmt_info
))
8704 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
8705 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
8706 || (is_gimple_call (stmt
)
8707 && gimple_call_lhs (stmt
) == NULL_TREE
));
8708 *need_to_vectorize
= true;
8711 if (PURE_SLP_STMT (stmt_info
) && !node
)
8713 dump_printf_loc (MSG_NOTE
, vect_location
,
8714 "handled only by SLP analysis\n");
8720 && (STMT_VINFO_RELEVANT_P (stmt_info
)
8721 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
8722 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8723 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8724 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8725 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8726 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8727 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8728 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8729 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8730 || vectorizable_reduction (stmt
, NULL
, NULL
, node
, node_instance
)
8731 || vectorizable_induction (stmt
, NULL
, NULL
, node
)
8732 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8733 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8737 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8738 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8739 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8740 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8741 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8742 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8743 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8744 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8745 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8746 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8751 if (dump_enabled_p ())
8753 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8754 "not vectorized: relevant stmt not ");
8755 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8756 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8765 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8766 need extra handling, except for vectorizable reductions. */
8767 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8768 && !can_vectorize_live_stmts (stmt
, NULL
, node
, NULL
))
8770 if (dump_enabled_p ())
8772 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8773 "not vectorized: live stmt not supported: ");
8774 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8784 /* Function vect_transform_stmt.
8786 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8789 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8790 bool *grouped_store
, slp_tree slp_node
,
8791 slp_instance slp_node_instance
)
8793 bool is_store
= false;
8794 gimple
*vec_stmt
= NULL
;
8795 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8798 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
8799 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8801 switch (STMT_VINFO_TYPE (stmt_info
))
8803 case type_demotion_vec_info_type
:
8804 case type_promotion_vec_info_type
:
8805 case type_conversion_vec_info_type
:
8806 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
8810 case induc_vec_info_type
:
8811 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
, slp_node
);
8815 case shift_vec_info_type
:
8816 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
8820 case op_vec_info_type
:
8821 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
8825 case assignment_vec_info_type
:
8826 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
8830 case load_vec_info_type
:
8831 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
8836 case store_vec_info_type
:
8837 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
8839 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
8841 /* In case of interleaving, the whole chain is vectorized when the
8842 last store in the chain is reached. Store stmts before the last
8843 one are skipped, and there vec_stmt_info shouldn't be freed
8845 *grouped_store
= true;
8846 if (STMT_VINFO_VEC_STMT (stmt_info
))
8853 case condition_vec_info_type
:
8854 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
8858 case comparison_vec_info_type
:
8859 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
8863 case call_vec_info_type
:
8864 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8865 stmt
= gsi_stmt (*gsi
);
8866 if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
8870 case call_simd_clone_vec_info_type
:
8871 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8872 stmt
= gsi_stmt (*gsi
);
8875 case reduc_vec_info_type
:
8876 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
,
8882 if (!STMT_VINFO_LIVE_P (stmt_info
))
8884 if (dump_enabled_p ())
8885 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8886 "stmt not supported.\n");
8891 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8892 This would break hybrid SLP vectorization. */
8894 gcc_assert (!vec_stmt
8895 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
8897 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8898 is being vectorized, but outside the immediately enclosing loop. */
8900 && STMT_VINFO_LOOP_VINFO (stmt_info
)
8901 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8902 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
8903 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8904 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
8905 || STMT_VINFO_RELEVANT (stmt_info
) ==
8906 vect_used_in_outer_by_reduction
))
8908 struct loop
*innerloop
= LOOP_VINFO_LOOP (
8909 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
8910 imm_use_iterator imm_iter
;
8911 use_operand_p use_p
;
8915 if (dump_enabled_p ())
8916 dump_printf_loc (MSG_NOTE
, vect_location
,
8917 "Record the vdef for outer-loop vectorization.\n");
8919 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8920 (to be used when vectorizing outer-loop stmts that use the DEF of
8922 if (gimple_code (stmt
) == GIMPLE_PHI
)
8923 scalar_dest
= PHI_RESULT (stmt
);
8925 scalar_dest
= gimple_assign_lhs (stmt
);
8927 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
8929 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
8931 exit_phi
= USE_STMT (use_p
);
8932 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
8937 /* Handle stmts whose DEF is used outside the loop-nest that is
8938 being vectorized. */
8939 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8941 done
= can_vectorize_live_stmts (stmt
, gsi
, slp_node
, &vec_stmt
);
8946 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
8952 /* Remove a group of stores (for SLP or interleaving), free their
8956 vect_remove_stores (gimple
*first_stmt
)
8958 gimple
*next
= first_stmt
;
8960 gimple_stmt_iterator next_si
;
8964 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
8966 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
8967 if (is_pattern_stmt_p (stmt_info
))
8968 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
8969 /* Free the attached stmt_vec_info and remove the stmt. */
8970 next_si
= gsi_for_stmt (next
);
8971 unlink_stmt_vdef (next
);
8972 gsi_remove (&next_si
, true);
8973 release_defs (next
);
8974 free_stmt_vec_info (next
);
8980 /* Function new_stmt_vec_info.
8982 Create and initialize a new stmt_vec_info struct for STMT. */
8985 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
8988 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
8990 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
8991 STMT_VINFO_STMT (res
) = stmt
;
8993 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
8994 STMT_VINFO_LIVE_P (res
) = false;
8995 STMT_VINFO_VECTYPE (res
) = NULL
;
8996 STMT_VINFO_VEC_STMT (res
) = NULL
;
8997 STMT_VINFO_VECTORIZABLE (res
) = true;
8998 STMT_VINFO_IN_PATTERN_P (res
) = false;
8999 STMT_VINFO_RELATED_STMT (res
) = NULL
;
9000 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
9001 STMT_VINFO_DATA_REF (res
) = NULL
;
9002 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
9003 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res
) = ERROR_MARK
;
9005 if (gimple_code (stmt
) == GIMPLE_PHI
9006 && is_loop_header_bb_p (gimple_bb (stmt
)))
9007 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
9009 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
9011 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
9012 STMT_SLP_TYPE (res
) = loop_vect
;
9013 STMT_VINFO_NUM_SLP_USES (res
) = 0;
9015 GROUP_FIRST_ELEMENT (res
) = NULL
;
9016 GROUP_NEXT_ELEMENT (res
) = NULL
;
9017 GROUP_SIZE (res
) = 0;
9018 GROUP_STORE_COUNT (res
) = 0;
9019 GROUP_GAP (res
) = 0;
9020 GROUP_SAME_DR_STMT (res
) = NULL
;
9026 /* Create a hash table for stmt_vec_info. */
9029 init_stmt_vec_info_vec (void)
9031 gcc_assert (!stmt_vec_info_vec
.exists ());
9032 stmt_vec_info_vec
.create (50);
9036 /* Free hash table for stmt_vec_info. */
9039 free_stmt_vec_info_vec (void)
9043 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
9045 free_stmt_vec_info (STMT_VINFO_STMT (info
));
9046 gcc_assert (stmt_vec_info_vec
.exists ());
9047 stmt_vec_info_vec
.release ();
9051 /* Free stmt vectorization related info. */
9054 free_stmt_vec_info (gimple
*stmt
)
9056 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9061 /* Check if this statement has a related "pattern stmt"
9062 (introduced by the vectorizer during the pattern recognition
9063 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9065 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
9067 stmt_vec_info patt_info
9068 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9071 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
9072 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
9073 gimple_set_bb (patt_stmt
, NULL
);
9074 tree lhs
= gimple_get_lhs (patt_stmt
);
9075 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9076 release_ssa_name (lhs
);
9079 gimple_stmt_iterator si
;
9080 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
9082 gimple
*seq_stmt
= gsi_stmt (si
);
9083 gimple_set_bb (seq_stmt
, NULL
);
9084 lhs
= gimple_get_lhs (seq_stmt
);
9085 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9086 release_ssa_name (lhs
);
9087 free_stmt_vec_info (seq_stmt
);
9090 free_stmt_vec_info (patt_stmt
);
9094 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
9095 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
9096 set_vinfo_for_stmt (stmt
, NULL
);
9101 /* Function get_vectype_for_scalar_type_and_size.
9103 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9107 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
9109 tree orig_scalar_type
= scalar_type
;
9110 scalar_mode inner_mode
;
9111 machine_mode simd_mode
;
9115 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
9116 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
9119 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
9121 /* For vector types of elements whose mode precision doesn't
9122 match their types precision we use a element type of mode
9123 precision. The vectorization routines will have to make sure
9124 they support the proper result truncation/extension.
9125 We also make sure to build vector types with INTEGER_TYPE
9126 component type only. */
9127 if (INTEGRAL_TYPE_P (scalar_type
)
9128 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
9129 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
9130 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
9131 TYPE_UNSIGNED (scalar_type
));
9133 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9134 When the component mode passes the above test simply use a type
9135 corresponding to that mode. The theory is that any use that
9136 would cause problems with this will disable vectorization anyway. */
9137 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
9138 && !INTEGRAL_TYPE_P (scalar_type
))
9139 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
9141 /* We can't build a vector type of elements with alignment bigger than
9143 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
9144 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
9145 TYPE_UNSIGNED (scalar_type
));
9147 /* If we felt back to using the mode fail if there was
9148 no scalar type for it. */
9149 if (scalar_type
== NULL_TREE
)
9152 /* If no size was supplied use the mode the target prefers. Otherwise
9153 lookup a vector mode of the specified size. */
9155 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
9156 else if (!mode_for_vector (inner_mode
, size
/ nbytes
).exists (&simd_mode
))
9158 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
9159 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9163 vectype
= build_vector_type (scalar_type
, nunits
);
9165 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
9166 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
9169 /* Re-attach the address-space qualifier if we canonicalized the scalar
9171 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
9172 return build_qualified_type
9173 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
9178 unsigned int current_vector_size
;
9180 /* Function get_vectype_for_scalar_type.
9182 Returns the vector type corresponding to SCALAR_TYPE as supported
9186 get_vectype_for_scalar_type (tree scalar_type
)
9189 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
9190 current_vector_size
);
9192 && current_vector_size
== 0)
9193 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
9197 /* Function get_mask_type_for_scalar_type.
9199 Returns the mask type corresponding to a result of comparison
9200 of vectors of specified SCALAR_TYPE as supported by target. */
9203 get_mask_type_for_scalar_type (tree scalar_type
)
9205 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
9210 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
9211 current_vector_size
);
9214 /* Function get_same_sized_vectype
9216 Returns a vector type corresponding to SCALAR_TYPE of size
9217 VECTOR_TYPE if supported by the target. */
9220 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
9222 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
9223 return build_same_sized_truth_vector_type (vector_type
);
9225 return get_vectype_for_scalar_type_and_size
9226 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
9229 /* Function vect_is_simple_use.
9232 VINFO - the vect info of the loop or basic block that is being vectorized.
9233 OPERAND - operand in the loop or bb.
9235 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9236 DT - the type of definition
9238 Returns whether a stmt with OPERAND can be vectorized.
9239 For loops, supportable operands are constants, loop invariants, and operands
9240 that are defined by the current iteration of the loop. Unsupportable
9241 operands are those that are defined by a previous iteration of the loop (as
9242 is the case in reduction/induction computations).
9243 For basic blocks, supportable operands are constants and bb invariants.
9244 For now, operands defined outside the basic block are not supported. */
9247 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9248 gimple
**def_stmt
, enum vect_def_type
*dt
)
9251 *dt
= vect_unknown_def_type
;
9253 if (dump_enabled_p ())
9255 dump_printf_loc (MSG_NOTE
, vect_location
,
9256 "vect_is_simple_use: operand ");
9257 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
9258 dump_printf (MSG_NOTE
, "\n");
9261 if (CONSTANT_CLASS_P (operand
))
9263 *dt
= vect_constant_def
;
9267 if (is_gimple_min_invariant (operand
))
9269 *dt
= vect_external_def
;
9273 if (TREE_CODE (operand
) != SSA_NAME
)
9275 if (dump_enabled_p ())
9276 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9281 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
9283 *dt
= vect_external_def
;
9287 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
9288 if (dump_enabled_p ())
9290 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
9291 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
9294 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
9295 *dt
= vect_external_def
;
9298 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
9299 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
9302 if (dump_enabled_p ())
9304 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
9307 case vect_uninitialized_def
:
9308 dump_printf (MSG_NOTE
, "uninitialized\n");
9310 case vect_constant_def
:
9311 dump_printf (MSG_NOTE
, "constant\n");
9313 case vect_external_def
:
9314 dump_printf (MSG_NOTE
, "external\n");
9316 case vect_internal_def
:
9317 dump_printf (MSG_NOTE
, "internal\n");
9319 case vect_induction_def
:
9320 dump_printf (MSG_NOTE
, "induction\n");
9322 case vect_reduction_def
:
9323 dump_printf (MSG_NOTE
, "reduction\n");
9325 case vect_double_reduction_def
:
9326 dump_printf (MSG_NOTE
, "double reduction\n");
9328 case vect_nested_cycle
:
9329 dump_printf (MSG_NOTE
, "nested cycle\n");
9331 case vect_unknown_def_type
:
9332 dump_printf (MSG_NOTE
, "unknown\n");
9337 if (*dt
== vect_unknown_def_type
)
9339 if (dump_enabled_p ())
9340 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9341 "Unsupported pattern.\n");
9345 switch (gimple_code (*def_stmt
))
9352 if (dump_enabled_p ())
9353 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9354 "unsupported defining stmt:\n");
9361 /* Function vect_is_simple_use.
9363 Same as vect_is_simple_use but also determines the vector operand
9364 type of OPERAND and stores it to *VECTYPE. If the definition of
9365 OPERAND is vect_uninitialized_def, vect_constant_def or
9366 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9367 is responsible to compute the best suited vector type for the
9371 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9372 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
9374 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
9377 /* Now get a vector type if the def is internal, otherwise supply
9378 NULL_TREE and leave it up to the caller to figure out a proper
9379 type for the use stmt. */
9380 if (*dt
== vect_internal_def
9381 || *dt
== vect_induction_def
9382 || *dt
== vect_reduction_def
9383 || *dt
== vect_double_reduction_def
9384 || *dt
== vect_nested_cycle
)
9386 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
9388 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9389 && !STMT_VINFO_RELEVANT (stmt_info
)
9390 && !STMT_VINFO_LIVE_P (stmt_info
))
9391 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9393 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9394 gcc_assert (*vectype
!= NULL_TREE
);
9396 else if (*dt
== vect_uninitialized_def
9397 || *dt
== vect_constant_def
9398 || *dt
== vect_external_def
)
9399 *vectype
= NULL_TREE
;
9407 /* Function supportable_widening_operation
9409 Check whether an operation represented by the code CODE is a
9410 widening operation that is supported by the target platform in
9411 vector form (i.e., when operating on arguments of type VECTYPE_IN
9412 producing a result of type VECTYPE_OUT).
9414 Widening operations we currently support are NOP (CONVERT), FLOAT
9415 and WIDEN_MULT. This function checks if these operations are supported
9416 by the target platform either directly (via vector tree-codes), or via
9420 - CODE1 and CODE2 are codes of vector operations to be used when
9421 vectorizing the operation, if available.
9422 - MULTI_STEP_CVT determines the number of required intermediate steps in
9423 case of multi-step conversion (like char->short->int - in that case
9424 MULTI_STEP_CVT will be 1).
9425 - INTERM_TYPES contains the intermediate type required to perform the
9426 widening operation (short in the above example). */
9429 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
9430 tree vectype_out
, tree vectype_in
,
9431 enum tree_code
*code1
, enum tree_code
*code2
,
9432 int *multi_step_cvt
,
9433 vec
<tree
> *interm_types
)
9435 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9436 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9437 struct loop
*vect_loop
= NULL
;
9438 machine_mode vec_mode
;
9439 enum insn_code icode1
, icode2
;
9440 optab optab1
, optab2
;
9441 tree vectype
= vectype_in
;
9442 tree wide_vectype
= vectype_out
;
9443 enum tree_code c1
, c2
;
9445 tree prev_type
, intermediate_type
;
9446 machine_mode intermediate_mode
, prev_mode
;
9447 optab optab3
, optab4
;
9449 *multi_step_cvt
= 0;
9451 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
9455 case WIDEN_MULT_EXPR
:
9456 /* The result of a vectorized widening operation usually requires
9457 two vectors (because the widened results do not fit into one vector).
9458 The generated vector results would normally be expected to be
9459 generated in the same order as in the original scalar computation,
9460 i.e. if 8 results are generated in each vector iteration, they are
9461 to be organized as follows:
9462 vect1: [res1,res2,res3,res4],
9463 vect2: [res5,res6,res7,res8].
9465 However, in the special case that the result of the widening
9466 operation is used in a reduction computation only, the order doesn't
9467 matter (because when vectorizing a reduction we change the order of
9468 the computation). Some targets can take advantage of this and
9469 generate more efficient code. For example, targets like Altivec,
9470 that support widen_mult using a sequence of {mult_even,mult_odd}
9471 generate the following vectors:
9472 vect1: [res1,res3,res5,res7],
9473 vect2: [res2,res4,res6,res8].
9475 When vectorizing outer-loops, we execute the inner-loop sequentially
9476 (each vectorized inner-loop iteration contributes to VF outer-loop
9477 iterations in parallel). We therefore don't allow to change the
9478 order of the computation in the inner-loop during outer-loop
9480 /* TODO: Another case in which order doesn't *really* matter is when we
9481 widen and then contract again, e.g. (short)((int)x * y >> 8).
9482 Normally, pack_trunc performs an even/odd permute, whereas the
9483 repack from an even/odd expansion would be an interleave, which
9484 would be significantly simpler for e.g. AVX2. */
9485 /* In any case, in order to avoid duplicating the code below, recurse
9486 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9487 are properly set up for the caller. If we fail, we'll continue with
9488 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9490 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
9491 && !nested_in_vect_loop_p (vect_loop
, stmt
)
9492 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
9493 stmt
, vectype_out
, vectype_in
,
9494 code1
, code2
, multi_step_cvt
,
9497 /* Elements in a vector with vect_used_by_reduction property cannot
9498 be reordered if the use chain with this property does not have the
9499 same operation. One such an example is s += a * b, where elements
9500 in a and b cannot be reordered. Here we check if the vector defined
9501 by STMT is only directly used in the reduction statement. */
9502 tree lhs
= gimple_assign_lhs (stmt
);
9503 use_operand_p dummy
;
9505 stmt_vec_info use_stmt_info
= NULL
;
9506 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
9507 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
9508 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
9511 c1
= VEC_WIDEN_MULT_LO_EXPR
;
9512 c2
= VEC_WIDEN_MULT_HI_EXPR
;
9525 case VEC_WIDEN_MULT_EVEN_EXPR
:
9526 /* Support the recursion induced just above. */
9527 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
9528 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
9531 case WIDEN_LSHIFT_EXPR
:
9532 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
9533 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
9537 c1
= VEC_UNPACK_LO_EXPR
;
9538 c2
= VEC_UNPACK_HI_EXPR
;
9542 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
9543 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
9546 case FIX_TRUNC_EXPR
:
9547 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9548 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9549 computing the operation. */
9556 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
9559 if (code
== FIX_TRUNC_EXPR
)
9561 /* The signedness is determined from output operand. */
9562 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9563 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
9567 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9568 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
9571 if (!optab1
|| !optab2
)
9574 vec_mode
= TYPE_MODE (vectype
);
9575 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
9576 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
9582 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9583 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9584 /* For scalar masks we may have different boolean
9585 vector types having the same QImode. Thus we
9586 add additional check for elements number. */
9587 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9588 || (TYPE_VECTOR_SUBPARTS (vectype
) / 2
9589 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9591 /* Check if it's a multi-step conversion that can be done using intermediate
9594 prev_type
= vectype
;
9595 prev_mode
= vec_mode
;
9597 if (!CONVERT_EXPR_CODE_P (code
))
9600 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9601 intermediate steps in promotion sequence. We try
9602 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9604 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9605 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9607 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9608 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9611 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) / 2,
9612 current_vector_size
);
9613 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9618 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
9619 TYPE_UNSIGNED (prev_type
));
9621 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9622 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
9624 if (!optab3
|| !optab4
9625 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
9626 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9627 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
9628 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
9629 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
9630 == CODE_FOR_nothing
)
9631 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
9632 == CODE_FOR_nothing
))
9635 interm_types
->quick_push (intermediate_type
);
9636 (*multi_step_cvt
)++;
9638 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9639 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9640 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9641 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) / 2
9642 == TYPE_VECTOR_SUBPARTS (wide_vectype
)));
9644 prev_type
= intermediate_type
;
9645 prev_mode
= intermediate_mode
;
9648 interm_types
->release ();
9653 /* Function supportable_narrowing_operation
9655 Check whether an operation represented by the code CODE is a
9656 narrowing operation that is supported by the target platform in
9657 vector form (i.e., when operating on arguments of type VECTYPE_IN
9658 and producing a result of type VECTYPE_OUT).
9660 Narrowing operations we currently support are NOP (CONVERT) and
9661 FIX_TRUNC. This function checks if these operations are supported by
9662 the target platform directly via vector tree-codes.
9665 - CODE1 is the code of a vector operation to be used when
9666 vectorizing the operation, if available.
9667 - MULTI_STEP_CVT determines the number of required intermediate steps in
9668 case of multi-step conversion (like int->short->char - in that case
9669 MULTI_STEP_CVT will be 1).
9670 - INTERM_TYPES contains the intermediate type required to perform the
9671 narrowing operation (short in the above example). */
9674 supportable_narrowing_operation (enum tree_code code
,
9675 tree vectype_out
, tree vectype_in
,
9676 enum tree_code
*code1
, int *multi_step_cvt
,
9677 vec
<tree
> *interm_types
)
9679 machine_mode vec_mode
;
9680 enum insn_code icode1
;
9681 optab optab1
, interm_optab
;
9682 tree vectype
= vectype_in
;
9683 tree narrow_vectype
= vectype_out
;
9685 tree intermediate_type
, prev_type
;
9686 machine_mode intermediate_mode
, prev_mode
;
9690 *multi_step_cvt
= 0;
9694 c1
= VEC_PACK_TRUNC_EXPR
;
9697 case FIX_TRUNC_EXPR
:
9698 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
9702 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9703 tree code and optabs used for computing the operation. */
9710 if (code
== FIX_TRUNC_EXPR
)
9711 /* The signedness is determined from output operand. */
9712 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9714 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9719 vec_mode
= TYPE_MODE (vectype
);
9720 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
9725 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9726 /* For scalar masks we may have different boolean
9727 vector types having the same QImode. Thus we
9728 add additional check for elements number. */
9729 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9730 || (TYPE_VECTOR_SUBPARTS (vectype
) * 2
9731 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9733 /* Check if it's a multi-step conversion that can be done using intermediate
9735 prev_mode
= vec_mode
;
9736 prev_type
= vectype
;
9737 if (code
== FIX_TRUNC_EXPR
)
9738 uns
= TYPE_UNSIGNED (vectype_out
);
9740 uns
= TYPE_UNSIGNED (vectype
);
9742 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9743 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9744 costly than signed. */
9745 if (code
== FIX_TRUNC_EXPR
&& uns
)
9747 enum insn_code icode2
;
9750 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
9752 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9753 if (interm_optab
!= unknown_optab
9754 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
9755 && insn_data
[icode1
].operand
[0].mode
9756 == insn_data
[icode2
].operand
[0].mode
)
9759 optab1
= interm_optab
;
9764 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9765 intermediate steps in promotion sequence. We try
9766 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9767 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9768 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9770 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9771 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
9774 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type
) * 2,
9775 current_vector_size
);
9776 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
9781 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
9783 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
9786 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
9787 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9788 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
9789 == CODE_FOR_nothing
))
9792 interm_types
->quick_push (intermediate_type
);
9793 (*multi_step_cvt
)++;
9795 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9796 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
9797 || (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2
9798 == TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
9800 prev_mode
= intermediate_mode
;
9801 prev_type
= intermediate_type
;
9802 optab1
= interm_optab
;
9805 interm_types
->release ();