1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
62 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
64 return STMT_VINFO_VECTYPE (stmt_info
);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
70 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
72 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
73 basic_block bb
= gimple_bb (stmt
);
74 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
80 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
82 return (bb
->loop_father
== loop
->inner
);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
90 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
91 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
92 int misalign
, enum vect_cost_model_location where
)
94 if ((kind
== vector_load
|| kind
== unaligned_load
)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
96 kind
= vector_gather_load
;
97 if ((kind
== vector_store
|| kind
== unaligned_store
)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
99 kind
= vector_scatter_store
;
102 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
103 stmt_info_for_cost si
= { count
, kind
,
104 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
106 body_cost_vec
->safe_push (si
);
108 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
111 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
112 count
, kind
, stmt_info
, misalign
, where
);
115 /* Return a variable of type ELEM_TYPE[NELEMS]. */
118 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
120 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
124 /* ARRAY is an array of vectors created by create_vector_array.
125 Return an SSA_NAME for the vector in index N. The reference
126 is part of the vectorization of STMT and the vector is associated
127 with scalar destination SCALAR_DEST. */
130 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
131 tree array
, unsigned HOST_WIDE_INT n
)
133 tree vect_type
, vect
, vect_name
, array_ref
;
136 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
137 vect_type
= TREE_TYPE (TREE_TYPE (array
));
138 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
139 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
140 build_int_cst (size_type_node
, n
),
141 NULL_TREE
, NULL_TREE
);
143 new_stmt
= gimple_build_assign (vect
, array_ref
);
144 vect_name
= make_ssa_name (vect
, new_stmt
);
145 gimple_assign_set_lhs (new_stmt
, vect_name
);
146 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
151 /* ARRAY is an array of vectors created by create_vector_array.
152 Emit code to store SSA_NAME VECT in index N of the array.
153 The store is part of the vectorization of STMT. */
156 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
157 tree array
, unsigned HOST_WIDE_INT n
)
162 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
163 build_int_cst (size_type_node
, n
),
164 NULL_TREE
, NULL_TREE
);
166 new_stmt
= gimple_build_assign (array_ref
, vect
);
167 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
170 /* PTR is a pointer to an array of type TYPE. Return a representation
171 of *PTR. The memory reference replaces those in FIRST_DR
175 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
179 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
180 /* Arrays have the same alignment as their type. */
181 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
185 /* Add a clobber of variable VAR to the vectorization of STMT.
186 Emit the clobber before *GSI. */
189 vect_clobber_variable (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree var
)
191 tree clobber
= build_clobber (TREE_TYPE (var
));
192 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
193 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
203 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
204 enum vect_relevant relevant
, bool live_p
)
206 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
207 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
208 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
209 gimple
*pattern_stmt
;
211 if (dump_enabled_p ())
213 dump_printf_loc (MSG_NOTE
, vect_location
,
214 "mark relevant %d, live %d: ", relevant
, live_p
);
215 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
218 /* If this stmt is an original stmt in a pattern, we might need to mark its
219 related pattern stmt instead of the original stmt. However, such stmts
220 may have their own uses that are not in any pattern, in such cases the
221 stmt itself should be marked. */
222 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
224 /* This is the last stmt in a sequence that was detected as a
225 pattern that can potentially be vectorized. Don't mark the stmt
226 as relevant/live because it's not going to be vectorized.
227 Instead mark the pattern-stmt that replaces it. */
229 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
231 if (dump_enabled_p ())
232 dump_printf_loc (MSG_NOTE
, vect_location
,
233 "last stmt in pattern. don't mark"
234 " relevant/live.\n");
235 stmt_info
= vinfo_for_stmt (pattern_stmt
);
236 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
237 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
238 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
242 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
243 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
244 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
246 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
247 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
249 if (dump_enabled_p ())
250 dump_printf_loc (MSG_NOTE
, vect_location
,
251 "already marked relevant/live.\n");
255 worklist
->safe_push (stmt
);
259 /* Function is_simple_and_all_uses_invariant
261 Return true if STMT is simple and all uses of it are invariant. */
264 is_simple_and_all_uses_invariant (gimple
*stmt
, loop_vec_info loop_vinfo
)
270 if (!is_gimple_assign (stmt
))
273 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
275 enum vect_def_type dt
= vect_uninitialized_def
;
277 if (!vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
))
279 if (dump_enabled_p ())
280 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
281 "use not simple.\n");
285 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
291 /* Function vect_stmt_relevant_p.
293 Return true if STMT in loop that is represented by LOOP_VINFO is
294 "relevant for vectorization".
296 A stmt is considered "relevant for vectorization" if:
297 - it has uses outside the loop.
298 - it has vdefs (it alters memory).
299 - control stmts in the loop (except for the exit condition).
301 CHECKME: what other side effects would the vectorizer allow? */
304 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
305 enum vect_relevant
*relevant
, bool *live_p
)
307 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
309 imm_use_iterator imm_iter
;
313 *relevant
= vect_unused_in_scope
;
316 /* cond stmt other than loop exit cond. */
317 if (is_ctrl_stmt (stmt
)
318 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
319 != loop_exit_ctrl_vec_info_type
)
320 *relevant
= vect_used_in_scope
;
322 /* changing memory. */
323 if (gimple_code (stmt
) != GIMPLE_PHI
)
324 if (gimple_vdef (stmt
)
325 && !gimple_clobber_p (stmt
))
327 if (dump_enabled_p ())
328 dump_printf_loc (MSG_NOTE
, vect_location
,
329 "vec_stmt_relevant_p: stmt has vdefs.\n");
330 *relevant
= vect_used_in_scope
;
333 /* uses outside the loop. */
334 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
336 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
338 basic_block bb
= gimple_bb (USE_STMT (use_p
));
339 if (!flow_bb_inside_loop_p (loop
, bb
))
341 if (dump_enabled_p ())
342 dump_printf_loc (MSG_NOTE
, vect_location
,
343 "vec_stmt_relevant_p: used out of loop.\n");
345 if (is_gimple_debug (USE_STMT (use_p
)))
348 /* We expect all such uses to be in the loop exit phis
349 (because of loop closed form) */
350 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
351 gcc_assert (bb
== single_exit (loop
)->dest
);
358 if (*live_p
&& *relevant
== vect_unused_in_scope
359 && !is_simple_and_all_uses_invariant (stmt
, loop_vinfo
))
361 if (dump_enabled_p ())
362 dump_printf_loc (MSG_NOTE
, vect_location
,
363 "vec_stmt_relevant_p: stmt live but not relevant.\n");
364 *relevant
= vect_used_only_live
;
367 return (*live_p
|| *relevant
);
371 /* Function exist_non_indexing_operands_for_use_p
373 USE is one of the uses attached to STMT. Check if USE is
374 used in STMT for anything other than indexing an array. */
377 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
380 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
382 /* USE corresponds to some operand in STMT. If there is no data
383 reference in STMT, then any operand that corresponds to USE
384 is not indexing an array. */
385 if (!STMT_VINFO_DATA_REF (stmt_info
))
388 /* STMT has a data_ref. FORNOW this means that its of one of
392 (This should have been verified in analyze_data_refs).
394 'var' in the second case corresponds to a def, not a use,
395 so USE cannot correspond to any operands that are not used
398 Therefore, all we need to check is if STMT falls into the
399 first case, and whether var corresponds to USE. */
401 if (!gimple_assign_copy_p (stmt
))
403 if (is_gimple_call (stmt
)
404 && gimple_call_internal_p (stmt
))
406 internal_fn ifn
= gimple_call_internal_fn (stmt
);
407 int mask_index
= internal_fn_mask_index (ifn
);
409 && use
== gimple_call_arg (stmt
, mask_index
))
411 int stored_value_index
= internal_fn_stored_value_index (ifn
);
412 if (stored_value_index
>= 0
413 && use
== gimple_call_arg (stmt
, stored_value_index
))
415 if (internal_gather_scatter_fn_p (ifn
)
416 && use
== gimple_call_arg (stmt
, 1))
422 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
424 operand
= gimple_assign_rhs1 (stmt
);
425 if (TREE_CODE (operand
) != SSA_NAME
)
436 Function process_use.
439 - a USE in STMT in a loop represented by LOOP_VINFO
440 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
441 that defined USE. This is done by calling mark_relevant and passing it
442 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
443 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
447 Generally, LIVE_P and RELEVANT are used to define the liveness and
448 relevance info of the DEF_STMT of this USE:
449 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
450 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
452 - case 1: If USE is used only for address computations (e.g. array indexing),
453 which does not need to be directly vectorized, then the liveness/relevance
454 of the respective DEF_STMT is left unchanged.
455 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
456 skip DEF_STMT cause it had already been processed.
457 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
458 be modified accordingly.
460 Return true if everything is as expected. Return false otherwise. */
463 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
,
464 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
467 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
468 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
469 stmt_vec_info dstmt_vinfo
;
470 basic_block bb
, def_bb
;
472 enum vect_def_type dt
;
474 /* case 1: we are only interested in uses that need to be vectorized. Uses
475 that are used for address computation are not considered relevant. */
476 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
479 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
483 "not vectorized: unsupported use in stmt.\n");
487 if (!def_stmt
|| gimple_nop_p (def_stmt
))
490 def_bb
= gimple_bb (def_stmt
);
491 if (!flow_bb_inside_loop_p (loop
, def_bb
))
493 if (dump_enabled_p ())
494 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
498 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
499 DEF_STMT must have already been processed, because this should be the
500 only way that STMT, which is a reduction-phi, was put in the worklist,
501 as there should be no other uses for DEF_STMT in the loop. So we just
502 check that everything is as expected, and we are done. */
503 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
504 bb
= gimple_bb (stmt
);
505 if (gimple_code (stmt
) == GIMPLE_PHI
506 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
507 && gimple_code (def_stmt
) != GIMPLE_PHI
508 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
509 && bb
->loop_father
== def_bb
->loop_father
)
511 if (dump_enabled_p ())
512 dump_printf_loc (MSG_NOTE
, vect_location
,
513 "reduc-stmt defining reduc-phi in the same nest.\n");
514 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
515 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
516 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
517 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
518 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
522 /* case 3a: outer-loop stmt defining an inner-loop stmt:
523 outer-loop-header-bb:
529 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
531 if (dump_enabled_p ())
532 dump_printf_loc (MSG_NOTE
, vect_location
,
533 "outer-loop def-stmt defining inner-loop stmt.\n");
537 case vect_unused_in_scope
:
538 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
539 vect_used_in_scope
: vect_unused_in_scope
;
542 case vect_used_in_outer_by_reduction
:
543 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
544 relevant
= vect_used_by_reduction
;
547 case vect_used_in_outer
:
548 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
549 relevant
= vect_used_in_scope
;
552 case vect_used_in_scope
:
560 /* case 3b: inner-loop stmt defining an outer-loop stmt:
561 outer-loop-header-bb:
565 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
567 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
569 if (dump_enabled_p ())
570 dump_printf_loc (MSG_NOTE
, vect_location
,
571 "inner-loop def-stmt defining outer-loop stmt.\n");
575 case vect_unused_in_scope
:
576 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
577 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
578 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
581 case vect_used_by_reduction
:
582 case vect_used_only_live
:
583 relevant
= vect_used_in_outer_by_reduction
;
586 case vect_used_in_scope
:
587 relevant
= vect_used_in_outer
;
594 /* We are also not interested in uses on loop PHI backedges that are
595 inductions. Otherwise we'll needlessly vectorize the IV increment
596 and cause hybrid SLP for SLP inductions. Unless the PHI is live
598 else if (gimple_code (stmt
) == GIMPLE_PHI
599 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
600 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
601 && (PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (bb
->loop_father
))
604 if (dump_enabled_p ())
605 dump_printf_loc (MSG_NOTE
, vect_location
,
606 "induction value on backedge.\n");
611 vect_mark_relevant (worklist
, def_stmt
, relevant
, false);
616 /* Function vect_mark_stmts_to_be_vectorized.
618 Not all stmts in the loop need to be vectorized. For example:
627 Stmt 1 and 3 do not need to be vectorized, because loop control and
628 addressing of vectorized data-refs are handled differently.
630 This pass detects such stmts. */
633 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
635 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
636 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
637 unsigned int nbbs
= loop
->num_nodes
;
638 gimple_stmt_iterator si
;
641 stmt_vec_info stmt_vinfo
;
645 enum vect_relevant relevant
;
647 if (dump_enabled_p ())
648 dump_printf_loc (MSG_NOTE
, vect_location
,
649 "=== vect_mark_stmts_to_be_vectorized ===\n");
651 auto_vec
<gimple
*, 64> worklist
;
653 /* 1. Init worklist. */
654 for (i
= 0; i
< nbbs
; i
++)
657 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
660 if (dump_enabled_p ())
662 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
663 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
666 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
667 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
669 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
671 stmt
= gsi_stmt (si
);
672 if (dump_enabled_p ())
674 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
675 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
678 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
679 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
683 /* 2. Process_worklist */
684 while (worklist
.length () > 0)
689 stmt
= worklist
.pop ();
690 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
693 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
696 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
697 (DEF_STMT) as relevant/irrelevant according to the relevance property
699 stmt_vinfo
= vinfo_for_stmt (stmt
);
700 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
702 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
703 propagated as is to the DEF_STMTs of its USEs.
705 One exception is when STMT has been identified as defining a reduction
706 variable; in this case we set the relevance to vect_used_by_reduction.
707 This is because we distinguish between two kinds of relevant stmts -
708 those that are used by a reduction computation, and those that are
709 (also) used by a regular computation. This allows us later on to
710 identify stmts that are used solely by a reduction, and therefore the
711 order of the results that they produce does not have to be kept. */
713 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
715 case vect_reduction_def
:
716 gcc_assert (relevant
!= vect_unused_in_scope
);
717 if (relevant
!= vect_unused_in_scope
718 && relevant
!= vect_used_in_scope
719 && relevant
!= vect_used_by_reduction
720 && relevant
!= vect_used_only_live
)
722 if (dump_enabled_p ())
723 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
724 "unsupported use of reduction.\n");
729 case vect_nested_cycle
:
730 if (relevant
!= vect_unused_in_scope
731 && relevant
!= vect_used_in_outer_by_reduction
732 && relevant
!= vect_used_in_outer
)
734 if (dump_enabled_p ())
735 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
736 "unsupported use of nested cycle.\n");
742 case vect_double_reduction_def
:
743 if (relevant
!= vect_unused_in_scope
744 && relevant
!= vect_used_by_reduction
745 && relevant
!= vect_used_only_live
)
747 if (dump_enabled_p ())
748 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
749 "unsupported use of double reduction.\n");
759 if (is_pattern_stmt_p (stmt_vinfo
))
761 /* Pattern statements are not inserted into the code, so
762 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
763 have to scan the RHS or function arguments instead. */
764 if (is_gimple_assign (stmt
))
766 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
767 tree op
= gimple_assign_rhs1 (stmt
);
770 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
772 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
773 relevant
, &worklist
, false)
774 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
775 relevant
, &worklist
, false))
779 for (; i
< gimple_num_ops (stmt
); i
++)
781 op
= gimple_op (stmt
, i
);
782 if (TREE_CODE (op
) == SSA_NAME
783 && !process_use (stmt
, op
, loop_vinfo
, relevant
,
788 else if (is_gimple_call (stmt
))
790 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
792 tree arg
= gimple_call_arg (stmt
, i
);
793 if (!process_use (stmt
, arg
, loop_vinfo
, relevant
,
800 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
802 tree op
= USE_FROM_PTR (use_p
);
803 if (!process_use (stmt
, op
, loop_vinfo
, relevant
,
808 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
810 gather_scatter_info gs_info
;
811 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
))
813 if (!process_use (stmt
, gs_info
.offset
, loop_vinfo
, relevant
,
817 } /* while worklist */
823 /* Function vect_model_simple_cost.
825 Models cost for simple operations, i.e. those that only emit ncopies of a
826 single op. Right now, this does not account for multiple insns that could
827 be generated for the single vector op. We will handle that shortly. */
830 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
831 enum vect_def_type
*dt
,
833 stmt_vector_for_cost
*prologue_cost_vec
,
834 stmt_vector_for_cost
*body_cost_vec
)
837 int inside_cost
= 0, prologue_cost
= 0;
839 /* The SLP costs were already calculated during SLP tree build. */
840 gcc_assert (!PURE_SLP_STMT (stmt_info
));
842 /* Cost the "broadcast" of a scalar operand in to a vector operand.
843 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
845 for (i
= 0; i
< ndts
; i
++)
846 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
847 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
848 stmt_info
, 0, vect_prologue
);
850 /* Pass the inside-of-loop statements to the target-specific cost model. */
851 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
852 stmt_info
, 0, vect_body
);
854 if (dump_enabled_p ())
855 dump_printf_loc (MSG_NOTE
, vect_location
,
856 "vect_model_simple_cost: inside_cost = %d, "
857 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
861 /* Model cost for type demotion and promotion operations. PWR is normally
862 zero for single-step promotions and demotions. It will be one if
863 two-step promotion/demotion is required, and so on. Each additional
864 step doubles the number of instructions required. */
867 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
868 enum vect_def_type
*dt
, int pwr
)
871 int inside_cost
= 0, prologue_cost
= 0;
872 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
873 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
874 void *target_cost_data
;
876 /* The SLP costs were already calculated during SLP tree build. */
877 gcc_assert (!PURE_SLP_STMT (stmt_info
));
880 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
882 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
884 for (i
= 0; i
< pwr
+ 1; i
++)
886 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
888 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
889 vec_promote_demote
, stmt_info
, 0,
893 /* FORNOW: Assuming maximum 2 args per stmts. */
894 for (i
= 0; i
< 2; i
++)
895 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
896 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
897 stmt_info
, 0, vect_prologue
);
899 if (dump_enabled_p ())
900 dump_printf_loc (MSG_NOTE
, vect_location
,
901 "vect_model_promotion_demotion_cost: inside_cost = %d, "
902 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
905 /* Function vect_model_store_cost
907 Models cost for stores. In the case of grouped accesses, one access
908 has the overhead of the grouped access attributed to it. */
911 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
912 vect_memory_access_type memory_access_type
,
913 vec_load_store_type vls_type
, slp_tree slp_node
,
914 stmt_vector_for_cost
*prologue_cost_vec
,
915 stmt_vector_for_cost
*body_cost_vec
)
917 unsigned int inside_cost
= 0, prologue_cost
= 0;
918 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
919 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
920 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
922 if (vls_type
== VLS_STORE_INVARIANT
)
923 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
924 stmt_info
, 0, vect_prologue
);
926 /* Grouped stores update all elements in the group at once,
927 so we want the DR for the first statement. */
928 if (!slp_node
&& grouped_access_p
)
930 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
931 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
934 /* True if we should include any once-per-group costs as well as
935 the cost of the statement itself. For SLP we only get called
936 once per group anyhow. */
937 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
939 /* We assume that the cost of a single store-lanes instruction is
940 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
941 access is instead being provided by a permute-and-store operation,
942 include the cost of the permutes. */
944 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
946 /* Uses a high and low interleave or shuffle operations for each
948 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
949 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
950 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
951 stmt_info
, 0, vect_body
);
953 if (dump_enabled_p ())
954 dump_printf_loc (MSG_NOTE
, vect_location
,
955 "vect_model_store_cost: strided group_size = %d .\n",
959 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
960 /* Costs of the stores. */
961 if (memory_access_type
== VMAT_ELEMENTWISE
962 || memory_access_type
== VMAT_GATHER_SCATTER
)
964 /* N scalar stores plus extracting the elements. */
965 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
966 inside_cost
+= record_stmt_cost (body_cost_vec
,
967 ncopies
* assumed_nunits
,
968 scalar_store
, stmt_info
, 0, vect_body
);
971 vect_get_store_cost (dr
, ncopies
, &inside_cost
, body_cost_vec
);
973 if (memory_access_type
== VMAT_ELEMENTWISE
974 || memory_access_type
== VMAT_STRIDED_SLP
)
976 /* N scalar stores plus extracting the elements. */
977 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
978 inside_cost
+= record_stmt_cost (body_cost_vec
,
979 ncopies
* assumed_nunits
,
980 vec_to_scalar
, stmt_info
, 0, vect_body
);
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE
, vect_location
,
985 "vect_model_store_cost: inside_cost = %d, "
986 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
990 /* Calculate cost of DR's memory access. */
992 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
993 unsigned int *inside_cost
,
994 stmt_vector_for_cost
*body_cost_vec
)
996 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
997 gimple
*stmt
= DR_STMT (dr
);
998 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1000 switch (alignment_support_scheme
)
1004 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1005 vector_store
, stmt_info
, 0,
1008 if (dump_enabled_p ())
1009 dump_printf_loc (MSG_NOTE
, vect_location
,
1010 "vect_model_store_cost: aligned.\n");
1014 case dr_unaligned_supported
:
1016 /* Here, we assign an additional cost for the unaligned store. */
1017 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1018 unaligned_store
, stmt_info
,
1019 DR_MISALIGNMENT (dr
), vect_body
);
1020 if (dump_enabled_p ())
1021 dump_printf_loc (MSG_NOTE
, vect_location
,
1022 "vect_model_store_cost: unaligned supported by "
1027 case dr_unaligned_unsupported
:
1029 *inside_cost
= VECT_MAX_COST
;
1031 if (dump_enabled_p ())
1032 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1033 "vect_model_store_cost: unsupported access.\n");
1043 /* Function vect_model_load_cost
1045 Models cost for loads. In the case of grouped accesses, one access has
1046 the overhead of the grouped access attributed to it. Since unaligned
1047 accesses are supported for loads, we also account for the costs of the
1048 access scheme chosen. */
1051 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1052 vect_memory_access_type memory_access_type
,
1054 stmt_vector_for_cost
*prologue_cost_vec
,
1055 stmt_vector_for_cost
*body_cost_vec
)
1057 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
1058 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1059 unsigned int inside_cost
= 0, prologue_cost
= 0;
1060 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1062 /* Grouped loads read all elements in the group at once,
1063 so we want the DR for the first statement. */
1064 if (!slp_node
&& grouped_access_p
)
1066 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1067 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1070 /* True if we should include any once-per-group costs as well as
1071 the cost of the statement itself. For SLP we only get called
1072 once per group anyhow. */
1073 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
1075 /* We assume that the cost of a single load-lanes instruction is
1076 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1077 access is instead being provided by a load-and-permute operation,
1078 include the cost of the permutes. */
1080 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1082 /* Uses an even and odd extract operations or shuffle operations
1083 for each needed permute. */
1084 int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1085 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1086 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1087 stmt_info
, 0, vect_body
);
1089 if (dump_enabled_p ())
1090 dump_printf_loc (MSG_NOTE
, vect_location
,
1091 "vect_model_load_cost: strided group_size = %d .\n",
1095 /* The loads themselves. */
1096 if (memory_access_type
== VMAT_ELEMENTWISE
1097 || memory_access_type
== VMAT_GATHER_SCATTER
)
1099 /* N scalar loads plus gathering them into a vector. */
1100 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1101 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1102 inside_cost
+= record_stmt_cost (body_cost_vec
,
1103 ncopies
* assumed_nunits
,
1104 scalar_load
, stmt_info
, 0, vect_body
);
1107 vect_get_load_cost (dr
, ncopies
, first_stmt_p
,
1108 &inside_cost
, &prologue_cost
,
1109 prologue_cost_vec
, body_cost_vec
, true);
1110 if (memory_access_type
== VMAT_ELEMENTWISE
1111 || memory_access_type
== VMAT_STRIDED_SLP
)
1112 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1113 stmt_info
, 0, vect_body
);
1115 if (dump_enabled_p ())
1116 dump_printf_loc (MSG_NOTE
, vect_location
,
1117 "vect_model_load_cost: inside_cost = %d, "
1118 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1122 /* Calculate cost of DR's memory access. */
1124 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1125 bool add_realign_cost
, unsigned int *inside_cost
,
1126 unsigned int *prologue_cost
,
1127 stmt_vector_for_cost
*prologue_cost_vec
,
1128 stmt_vector_for_cost
*body_cost_vec
,
1129 bool record_prologue_costs
)
1131 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1132 gimple
*stmt
= DR_STMT (dr
);
1133 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1135 switch (alignment_support_scheme
)
1139 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1140 stmt_info
, 0, vect_body
);
1142 if (dump_enabled_p ())
1143 dump_printf_loc (MSG_NOTE
, vect_location
,
1144 "vect_model_load_cost: aligned.\n");
1148 case dr_unaligned_supported
:
1150 /* Here, we assign an additional cost for the unaligned load. */
1151 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1152 unaligned_load
, stmt_info
,
1153 DR_MISALIGNMENT (dr
), vect_body
);
1155 if (dump_enabled_p ())
1156 dump_printf_loc (MSG_NOTE
, vect_location
,
1157 "vect_model_load_cost: unaligned supported by "
1162 case dr_explicit_realign
:
1164 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1165 vector_load
, stmt_info
, 0, vect_body
);
1166 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1167 vec_perm
, stmt_info
, 0, vect_body
);
1169 /* FIXME: If the misalignment remains fixed across the iterations of
1170 the containing loop, the following cost should be added to the
1172 if (targetm
.vectorize
.builtin_mask_for_load
)
1173 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1174 stmt_info
, 0, vect_body
);
1176 if (dump_enabled_p ())
1177 dump_printf_loc (MSG_NOTE
, vect_location
,
1178 "vect_model_load_cost: explicit realign\n");
1182 case dr_explicit_realign_optimized
:
1184 if (dump_enabled_p ())
1185 dump_printf_loc (MSG_NOTE
, vect_location
,
1186 "vect_model_load_cost: unaligned software "
1189 /* Unaligned software pipeline has a load of an address, an initial
1190 load, and possibly a mask operation to "prime" the loop. However,
1191 if this is an access in a group of loads, which provide grouped
1192 access, then the above cost should only be considered for one
1193 access in the group. Inside the loop, there is a load op
1194 and a realignment op. */
1196 if (add_realign_cost
&& record_prologue_costs
)
1198 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1199 vector_stmt
, stmt_info
,
1201 if (targetm
.vectorize
.builtin_mask_for_load
)
1202 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1203 vector_stmt
, stmt_info
,
1207 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1208 stmt_info
, 0, vect_body
);
1209 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1210 stmt_info
, 0, vect_body
);
1212 if (dump_enabled_p ())
1213 dump_printf_loc (MSG_NOTE
, vect_location
,
1214 "vect_model_load_cost: explicit realign optimized"
1220 case dr_unaligned_unsupported
:
1222 *inside_cost
= VECT_MAX_COST
;
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1226 "vect_model_load_cost: unsupported access.\n");
1235 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1236 the loop preheader for the vectorized stmt STMT. */
1239 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1242 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1245 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1246 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1250 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1254 if (nested_in_vect_loop_p (loop
, stmt
))
1257 pe
= loop_preheader_edge (loop
);
1258 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1259 gcc_assert (!new_bb
);
1263 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1265 gimple_stmt_iterator gsi_bb_start
;
1267 gcc_assert (bb_vinfo
);
1268 bb
= BB_VINFO_BB (bb_vinfo
);
1269 gsi_bb_start
= gsi_after_labels (bb
);
1270 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1274 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE
, vect_location
,
1277 "created new init_stmt: ");
1278 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1282 /* Function vect_init_vector.
1284 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1285 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1286 vector type a vector with all elements equal to VAL is created first.
1287 Place the initialization at BSI if it is not NULL. Otherwise, place the
1288 initialization at the loop preheader.
1289 Return the DEF of INIT_STMT.
1290 It will be used in the vectorization of STMT. */
1293 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1298 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1299 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1301 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1302 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1304 /* Scalar boolean value should be transformed into
1305 all zeros or all ones value before building a vector. */
1306 if (VECTOR_BOOLEAN_TYPE_P (type
))
1308 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1309 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1311 if (CONSTANT_CLASS_P (val
))
1312 val
= integer_zerop (val
) ? false_val
: true_val
;
1315 new_temp
= make_ssa_name (TREE_TYPE (type
));
1316 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1317 val
, true_val
, false_val
);
1318 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1322 else if (CONSTANT_CLASS_P (val
))
1323 val
= fold_convert (TREE_TYPE (type
), val
);
1326 new_temp
= make_ssa_name (TREE_TYPE (type
));
1327 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1328 init_stmt
= gimple_build_assign (new_temp
,
1329 fold_build1 (VIEW_CONVERT_EXPR
,
1333 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1334 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1338 val
= build_vector_from_val (type
, val
);
1341 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1342 init_stmt
= gimple_build_assign (new_temp
, val
);
1343 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1347 /* Function vect_get_vec_def_for_operand_1.
1349 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1350 DT that will be used in the vectorized stmt. */
1353 vect_get_vec_def_for_operand_1 (gimple
*def_stmt
, enum vect_def_type dt
)
1357 stmt_vec_info def_stmt_info
= NULL
;
1361 /* operand is a constant or a loop invariant. */
1362 case vect_constant_def
:
1363 case vect_external_def
:
1364 /* Code should use vect_get_vec_def_for_operand. */
1367 /* operand is defined inside the loop. */
1368 case vect_internal_def
:
1370 /* Get the def from the vectorized stmt. */
1371 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1373 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1374 /* Get vectorized pattern statement. */
1376 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1377 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1378 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1379 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1380 gcc_assert (vec_stmt
);
1381 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1382 vec_oprnd
= PHI_RESULT (vec_stmt
);
1383 else if (is_gimple_call (vec_stmt
))
1384 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1386 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1390 /* operand is defined by a loop header phi. */
1391 case vect_reduction_def
:
1392 case vect_double_reduction_def
:
1393 case vect_nested_cycle
:
1394 case vect_induction_def
:
1396 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1398 /* Get the def from the vectorized stmt. */
1399 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1400 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1401 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1402 vec_oprnd
= PHI_RESULT (vec_stmt
);
1404 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1414 /* Function vect_get_vec_def_for_operand.
1416 OP is an operand in STMT. This function returns a (vector) def that will be
1417 used in the vectorized stmt for STMT.
1419 In the case that OP is an SSA_NAME which is defined in the loop, then
1420 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1422 In case OP is an invariant or constant, a new stmt that creates a vector def
1423 needs to be introduced. VECTYPE may be used to specify a required type for
1424 vector invariant. */
1427 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1430 enum vect_def_type dt
;
1432 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1433 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1435 if (dump_enabled_p ())
1437 dump_printf_loc (MSG_NOTE
, vect_location
,
1438 "vect_get_vec_def_for_operand: ");
1439 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1440 dump_printf (MSG_NOTE
, "\n");
1443 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1444 gcc_assert (is_simple_use
);
1445 if (def_stmt
&& dump_enabled_p ())
1447 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1448 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1451 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1453 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1457 vector_type
= vectype
;
1458 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1459 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1460 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1462 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1464 gcc_assert (vector_type
);
1465 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1468 return vect_get_vec_def_for_operand_1 (def_stmt
, dt
);
1472 /* Function vect_get_vec_def_for_stmt_copy
1474 Return a vector-def for an operand. This function is used when the
1475 vectorized stmt to be created (by the caller to this function) is a "copy"
1476 created in case the vectorized result cannot fit in one vector, and several
1477 copies of the vector-stmt are required. In this case the vector-def is
1478 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1479 of the stmt that defines VEC_OPRND.
1480 DT is the type of the vector def VEC_OPRND.
1483 In case the vectorization factor (VF) is bigger than the number
1484 of elements that can fit in a vectype (nunits), we have to generate
1485 more than one vector stmt to vectorize the scalar stmt. This situation
1486 arises when there are multiple data-types operated upon in the loop; the
1487 smallest data-type determines the VF, and as a result, when vectorizing
1488 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1489 vector stmt (each computing a vector of 'nunits' results, and together
1490 computing 'VF' results in each iteration). This function is called when
1491 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1492 which VF=16 and nunits=4, so the number of copies required is 4):
1494 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1496 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1497 VS1.1: vx.1 = memref1 VS1.2
1498 VS1.2: vx.2 = memref2 VS1.3
1499 VS1.3: vx.3 = memref3
1501 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1502 VSnew.1: vz1 = vx.1 + ... VSnew.2
1503 VSnew.2: vz2 = vx.2 + ... VSnew.3
1504 VSnew.3: vz3 = vx.3 + ...
1506 The vectorization of S1 is explained in vectorizable_load.
1507 The vectorization of S2:
1508 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1509 the function 'vect_get_vec_def_for_operand' is called to
1510 get the relevant vector-def for each operand of S2. For operand x it
1511 returns the vector-def 'vx.0'.
1513 To create the remaining copies of the vector-stmt (VSnew.j), this
1514 function is called to get the relevant vector-def for each operand. It is
1515 obtained from the respective VS1.j stmt, which is recorded in the
1516 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1518 For example, to obtain the vector-def 'vx.1' in order to create the
1519 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1520 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1521 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1522 and return its def ('vx.1').
1523 Overall, to create the above sequence this function will be called 3 times:
1524 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1525 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1526 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1529 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1531 gimple
*vec_stmt_for_operand
;
1532 stmt_vec_info def_stmt_info
;
1534 /* Do nothing; can reuse same def. */
1535 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1538 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1539 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1540 gcc_assert (def_stmt_info
);
1541 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1542 gcc_assert (vec_stmt_for_operand
);
1543 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1544 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1546 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1551 /* Get vectorized definitions for the operands to create a copy of an original
1552 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1555 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1556 vec
<tree
> *vec_oprnds0
,
1557 vec
<tree
> *vec_oprnds1
)
1559 tree vec_oprnd
= vec_oprnds0
->pop ();
1561 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1562 vec_oprnds0
->quick_push (vec_oprnd
);
1564 if (vec_oprnds1
&& vec_oprnds1
->length ())
1566 vec_oprnd
= vec_oprnds1
->pop ();
1567 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1568 vec_oprnds1
->quick_push (vec_oprnd
);
1573 /* Get vectorized definitions for OP0 and OP1. */
1576 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1577 vec
<tree
> *vec_oprnds0
,
1578 vec
<tree
> *vec_oprnds1
,
1583 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1584 auto_vec
<tree
> ops (nops
);
1585 auto_vec
<vec
<tree
> > vec_defs (nops
);
1587 ops
.quick_push (op0
);
1589 ops
.quick_push (op1
);
1591 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
1593 *vec_oprnds0
= vec_defs
[0];
1595 *vec_oprnds1
= vec_defs
[1];
1601 vec_oprnds0
->create (1);
1602 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1603 vec_oprnds0
->quick_push (vec_oprnd
);
1607 vec_oprnds1
->create (1);
1608 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1609 vec_oprnds1
->quick_push (vec_oprnd
);
1614 /* Helper function called by vect_finish_replace_stmt and
1615 vect_finish_stmt_generation. Set the location of the new
1616 statement and create a stmt_vec_info for it. */
1619 vect_finish_stmt_generation_1 (gimple
*stmt
, gimple
*vec_stmt
)
1621 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1622 vec_info
*vinfo
= stmt_info
->vinfo
;
1624 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1626 if (dump_enabled_p ())
1628 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1629 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1632 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1634 /* While EH edges will generally prevent vectorization, stmt might
1635 e.g. be in a must-not-throw region. Ensure newly created stmts
1636 that could throw are part of the same region. */
1637 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1638 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1639 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1642 /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1643 which sets the same scalar result as STMT did. */
1646 vect_finish_replace_stmt (gimple
*stmt
, gimple
*vec_stmt
)
1648 gcc_assert (gimple_get_lhs (stmt
) == gimple_get_lhs (vec_stmt
));
1650 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1651 gsi_replace (&gsi
, vec_stmt
, false);
1653 vect_finish_stmt_generation_1 (stmt
, vec_stmt
);
1656 /* Function vect_finish_stmt_generation.
1658 Insert a new stmt. */
1661 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1662 gimple_stmt_iterator
*gsi
)
1664 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1666 if (!gsi_end_p (*gsi
)
1667 && gimple_has_mem_ops (vec_stmt
))
1669 gimple
*at_stmt
= gsi_stmt (*gsi
);
1670 tree vuse
= gimple_vuse (at_stmt
);
1671 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1673 tree vdef
= gimple_vdef (at_stmt
);
1674 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1675 /* If we have an SSA vuse and insert a store, update virtual
1676 SSA form to avoid triggering the renamer. Do so only
1677 if we can easily see all uses - which is what almost always
1678 happens with the way vectorized stmts are inserted. */
1679 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1680 && ((is_gimple_assign (vec_stmt
)
1681 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1682 || (is_gimple_call (vec_stmt
)
1683 && !(gimple_call_flags (vec_stmt
)
1684 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1686 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1687 gimple_set_vdef (vec_stmt
, new_vdef
);
1688 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1692 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1693 vect_finish_stmt_generation_1 (stmt
, vec_stmt
);
1696 /* We want to vectorize a call to combined function CFN with function
1697 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1698 as the types of all inputs. Check whether this is possible using
1699 an internal function, returning its code if so or IFN_LAST if not. */
1702 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1703 tree vectype_out
, tree vectype_in
)
1706 if (internal_fn_p (cfn
))
1707 ifn
= as_internal_fn (cfn
);
1709 ifn
= associated_internal_fn (fndecl
);
1710 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1712 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1713 if (info
.vectorizable
)
1715 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1716 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1717 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1718 OPTIMIZE_FOR_SPEED
))
1726 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1727 gimple_stmt_iterator
*);
1729 /* Check whether a load or store statement in the loop described by
1730 LOOP_VINFO is possible in a fully-masked loop. This is testing
1731 whether the vectorizer pass has the appropriate support, as well as
1732 whether the target does.
1734 VLS_TYPE says whether the statement is a load or store and VECTYPE
1735 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1736 says how the load or store is going to be implemented and GROUP_SIZE
1737 is the number of load or store statements in the containing group.
1738 If the access is a gather load or scatter store, GS_INFO describes
1741 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1742 supported, otherwise record the required mask types. */
1745 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1746 vec_load_store_type vls_type
, int group_size
,
1747 vect_memory_access_type memory_access_type
,
1748 gather_scatter_info
*gs_info
)
1750 /* Invariant loads need no special support. */
1751 if (memory_access_type
== VMAT_INVARIANT
)
1754 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1755 machine_mode vecmode
= TYPE_MODE (vectype
);
1756 bool is_load
= (vls_type
== VLS_LOAD
);
1757 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1760 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1761 : !vect_store_lanes_supported (vectype
, group_size
, true))
1763 if (dump_enabled_p ())
1764 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1765 "can't use a fully-masked loop because the"
1766 " target doesn't have an appropriate masked"
1767 " load/store-lanes instruction.\n");
1768 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1771 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1772 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1776 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1778 internal_fn ifn
= (is_load
1779 ? IFN_MASK_GATHER_LOAD
1780 : IFN_MASK_SCATTER_STORE
);
1781 tree offset_type
= TREE_TYPE (gs_info
->offset
);
1782 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1783 gs_info
->memory_type
,
1784 TYPE_SIGN (offset_type
),
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1789 "can't use a fully-masked loop because the"
1790 " target doesn't have an appropriate masked"
1791 " gather load or scatter store instruction.\n");
1792 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1795 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1796 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1800 if (memory_access_type
!= VMAT_CONTIGUOUS
1801 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1803 /* Element X of the data must come from iteration i * VF + X of the
1804 scalar loop. We need more work to support other mappings. */
1805 if (dump_enabled_p ())
1806 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1807 "can't use a fully-masked loop because an access"
1808 " isn't contiguous.\n");
1809 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1813 machine_mode mask_mode
;
1814 if (!(targetm
.vectorize
.get_mask_mode
1815 (GET_MODE_NUNITS (vecmode
),
1816 GET_MODE_SIZE (vecmode
)).exists (&mask_mode
))
1817 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1819 if (dump_enabled_p ())
1820 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1821 "can't use a fully-masked loop because the target"
1822 " doesn't have the appropriate masked load or"
1824 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1827 /* We might load more scalars than we need for permuting SLP loads.
1828 We checked in get_group_load_store_type that the extra elements
1829 don't leak into a new vector. */
1830 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1831 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1832 unsigned int nvectors
;
1833 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1834 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
);
1839 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1840 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1841 that needs to be applied to all loads and stores in a vectorized loop.
1842 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1844 MASK_TYPE is the type of both masks. If new statements are needed,
1845 insert them before GSI. */
1848 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1849 gimple_stmt_iterator
*gsi
)
1851 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1855 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1856 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1857 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1858 vec_mask
, loop_mask
);
1859 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1863 /* Determine whether we can use a gather load or scatter store to vectorize
1864 strided load or store STMT by truncating the current offset to a smaller
1865 width. We need to be able to construct an offset vector:
1867 { 0, X, X*2, X*3, ... }
1869 without loss of precision, where X is STMT's DR_STEP.
1871 Return true if this is possible, describing the gather load or scatter
1872 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1875 vect_truncate_gather_scatter_offset (gimple
*stmt
, loop_vec_info loop_vinfo
,
1877 gather_scatter_info
*gs_info
)
1879 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1880 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1881 tree step
= DR_STEP (dr
);
1882 if (TREE_CODE (step
) != INTEGER_CST
)
1884 /* ??? Perhaps we could use range information here? */
1885 if (dump_enabled_p ())
1886 dump_printf_loc (MSG_NOTE
, vect_location
,
1887 "cannot truncate variable step.\n");
1891 /* Get the number of bits in an element. */
1892 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1893 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1894 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1896 /* Set COUNT to the upper limit on the number of elements - 1.
1897 Start with the maximum vectorization factor. */
1898 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1900 /* Try lowering COUNT to the number of scalar latch iterations. */
1901 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1902 widest_int max_iters
;
1903 if (max_loop_iterations (loop
, &max_iters
)
1904 && max_iters
< count
)
1905 count
= max_iters
.to_shwi ();
1907 /* Try scales of 1 and the element size. */
1908 int scales
[] = { 1, vect_get_scalar_dr_size (dr
) };
1909 bool overflow_p
= false;
1910 for (int i
= 0; i
< 2; ++i
)
1912 int scale
= scales
[i
];
1914 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1917 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
1918 in OFFSET_BITS bits. */
1919 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow_p
);
1922 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1923 if (wi::min_precision (range
, sign
) > element_bits
)
1929 /* See whether the target supports the operation. */
1930 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1931 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr
), masked_p
, vectype
,
1932 memory_type
, element_bits
, sign
, scale
,
1933 &gs_info
->ifn
, &gs_info
->element_type
))
1936 tree offset_type
= build_nonstandard_integer_type (element_bits
,
1939 gs_info
->decl
= NULL_TREE
;
1940 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1941 but we don't need to store that here. */
1942 gs_info
->base
= NULL_TREE
;
1943 gs_info
->offset
= fold_convert (offset_type
, step
);
1944 gs_info
->offset_dt
= vect_constant_def
;
1945 gs_info
->offset_vectype
= NULL_TREE
;
1946 gs_info
->scale
= scale
;
1947 gs_info
->memory_type
= memory_type
;
1951 if (overflow_p
&& dump_enabled_p ())
1952 dump_printf_loc (MSG_NOTE
, vect_location
,
1953 "truncating gather/scatter offset to %d bits"
1954 " might change its value.\n", element_bits
);
1959 /* Return true if we can use gather/scatter internal functions to
1960 vectorize STMT, which is a grouped or strided load or store.
1961 MASKED_P is true if load or store is conditional. When returning
1962 true, fill in GS_INFO with the information required to perform the
1966 vect_use_strided_gather_scatters_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
1968 gather_scatter_info
*gs_info
)
1970 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, gs_info
)
1972 return vect_truncate_gather_scatter_offset (stmt
, loop_vinfo
,
1975 scalar_mode element_mode
= SCALAR_TYPE_MODE (gs_info
->element_type
);
1976 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1977 tree offset_type
= TREE_TYPE (gs_info
->offset
);
1978 unsigned int offset_bits
= TYPE_PRECISION (offset_type
);
1980 /* Enforced by vect_check_gather_scatter. */
1981 gcc_assert (element_bits
>= offset_bits
);
1983 /* If the elements are wider than the offset, convert the offset to the
1984 same width, without changing its sign. */
1985 if (element_bits
> offset_bits
)
1987 bool unsigned_p
= TYPE_UNSIGNED (offset_type
);
1988 offset_type
= build_nonstandard_integer_type (element_bits
, unsigned_p
);
1989 gs_info
->offset
= fold_convert (offset_type
, gs_info
->offset
);
1992 if (dump_enabled_p ())
1993 dump_printf_loc (MSG_NOTE
, vect_location
,
1994 "using gather/scatter for strided/grouped access,"
1995 " scale = %d\n", gs_info
->scale
);
2000 /* STMT is a non-strided load or store, meaning that it accesses
2001 elements with a known constant step. Return -1 if that step
2002 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2005 compare_step_with_zero (gimple
*stmt
)
2007 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2008 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2009 return tree_int_cst_compare (vect_dr_behavior (dr
)->step
,
2013 /* If the target supports a permute mask that reverses the elements in
2014 a vector of type VECTYPE, return that mask, otherwise return null. */
2017 perm_mask_for_reverse (tree vectype
)
2019 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2021 /* The encoding has a single stepped pattern. */
2022 vec_perm_builder
sel (nunits
, 1, 3);
2023 for (int i
= 0; i
< 3; ++i
)
2024 sel
.quick_push (nunits
- 1 - i
);
2026 vec_perm_indices
indices (sel
, 1, nunits
);
2027 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
2029 return vect_gen_perm_mask_checked (vectype
, indices
);
2032 /* STMT is either a masked or unconditional store. Return the value
2036 vect_get_store_rhs (gimple
*stmt
)
2038 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt
))
2040 gcc_assert (gimple_assign_single_p (assign
));
2041 return gimple_assign_rhs1 (assign
);
2043 if (gcall
*call
= dyn_cast
<gcall
*> (stmt
))
2045 internal_fn ifn
= gimple_call_internal_fn (call
);
2046 int index
= internal_fn_stored_value_index (ifn
);
2047 gcc_assert (index
>= 0);
2048 return gimple_call_arg (stmt
, index
);
2053 /* A subroutine of get_load_store_type, with a subset of the same
2054 arguments. Handle the case where STMT is part of a grouped load
2057 For stores, the statements in the group are all consecutive
2058 and there is no gap at the end. For loads, the statements in the
2059 group might not be consecutive; there can be gaps between statements
2060 as well as at the end. */
2063 get_group_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
2064 bool masked_p
, vec_load_store_type vls_type
,
2065 vect_memory_access_type
*memory_access_type
,
2066 gather_scatter_info
*gs_info
)
2068 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2069 vec_info
*vinfo
= stmt_info
->vinfo
;
2070 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2071 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2072 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
2073 data_reference
*first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
2074 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
2075 bool single_element_p
= (stmt
== first_stmt
2076 && !GROUP_NEXT_ELEMENT (stmt_info
));
2077 unsigned HOST_WIDE_INT gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
2078 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2080 /* True if the vectorized statements would access beyond the last
2081 statement in the group. */
2082 bool overrun_p
= false;
2084 /* True if we can cope with such overrun by peeling for gaps, so that
2085 there is at least one final scalar iteration after the vector loop. */
2086 bool can_overrun_p
= (!masked_p
2087 && vls_type
== VLS_LOAD
2091 /* There can only be a gap at the end of the group if the stride is
2092 known at compile time. */
2093 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info
) || gap
== 0);
2095 /* Stores can't yet have gaps. */
2096 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
2100 if (STMT_VINFO_STRIDED_P (stmt_info
))
2102 /* Try to use consecutive accesses of GROUP_SIZE elements,
2103 separated by the stride, until we have a complete vector.
2104 Fall back to scalar accesses if that isn't possible. */
2105 if (multiple_p (nunits
, group_size
))
2106 *memory_access_type
= VMAT_STRIDED_SLP
;
2108 *memory_access_type
= VMAT_ELEMENTWISE
;
2112 overrun_p
= loop_vinfo
&& gap
!= 0;
2113 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2115 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2116 "Grouped store with gaps requires"
2117 " non-consecutive accesses\n");
2120 /* An overrun is fine if the trailing elements are smaller
2121 than the alignment boundary B. Every vector access will
2122 be a multiple of B and so we are guaranteed to access a
2123 non-gap element in the same B-sized block. */
2125 && gap
< (vect_known_alignment_in_bytes (first_dr
)
2126 / vect_get_scalar_dr_size (first_dr
)))
2128 if (overrun_p
&& !can_overrun_p
)
2130 if (dump_enabled_p ())
2131 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2132 "Peeling for outer loop is not supported\n");
2135 *memory_access_type
= VMAT_CONTIGUOUS
;
2140 /* We can always handle this case using elementwise accesses,
2141 but see if something more efficient is available. */
2142 *memory_access_type
= VMAT_ELEMENTWISE
;
2144 /* If there is a gap at the end of the group then these optimizations
2145 would access excess elements in the last iteration. */
2146 bool would_overrun_p
= (gap
!= 0);
2147 /* An overrun is fine if the trailing elements are smaller than the
2148 alignment boundary B. Every vector access will be a multiple of B
2149 and so we are guaranteed to access a non-gap element in the
2150 same B-sized block. */
2153 && gap
< (vect_known_alignment_in_bytes (first_dr
)
2154 / vect_get_scalar_dr_size (first_dr
)))
2155 would_overrun_p
= false;
2157 if (!STMT_VINFO_STRIDED_P (stmt_info
)
2158 && (can_overrun_p
|| !would_overrun_p
)
2159 && compare_step_with_zero (stmt
) > 0)
2161 /* First cope with the degenerate case of a single-element
2163 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2164 *memory_access_type
= VMAT_CONTIGUOUS
;
2166 /* Otherwise try using LOAD/STORE_LANES. */
2167 if (*memory_access_type
== VMAT_ELEMENTWISE
2168 && (vls_type
== VLS_LOAD
2169 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2170 : vect_store_lanes_supported (vectype
, group_size
,
2173 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2174 overrun_p
= would_overrun_p
;
2177 /* If that fails, try using permuting loads. */
2178 if (*memory_access_type
== VMAT_ELEMENTWISE
2179 && (vls_type
== VLS_LOAD
2180 ? vect_grouped_load_supported (vectype
, single_element_p
,
2182 : vect_grouped_store_supported (vectype
, group_size
)))
2184 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2185 overrun_p
= would_overrun_p
;
2189 /* As a last resort, trying using a gather load or scatter store.
2191 ??? Although the code can handle all group sizes correctly,
2192 it probably isn't a win to use separate strided accesses based
2193 on nearby locations. Or, even if it's a win over scalar code,
2194 it might not be a win over vectorizing at a lower VF, if that
2195 allows us to use contiguous accesses. */
2196 if (*memory_access_type
== VMAT_ELEMENTWISE
2199 && vect_use_strided_gather_scatters_p (stmt
, loop_vinfo
,
2201 *memory_access_type
= VMAT_GATHER_SCATTER
;
2204 if (vls_type
!= VLS_LOAD
&& first_stmt
== stmt
)
2206 /* STMT is the leader of the group. Check the operands of all the
2207 stmts of the group. */
2208 gimple
*next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
2211 tree op
= vect_get_store_rhs (next_stmt
);
2213 enum vect_def_type dt
;
2214 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
2216 if (dump_enabled_p ())
2217 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2218 "use not simple.\n");
2221 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
2227 gcc_assert (can_overrun_p
);
2228 if (dump_enabled_p ())
2229 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2230 "Data access with gaps requires scalar "
2232 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2238 /* A subroutine of get_load_store_type, with a subset of the same
2239 arguments. Handle the case where STMT is a load or store that
2240 accesses consecutive elements with a negative step. */
2242 static vect_memory_access_type
2243 get_negative_load_store_type (gimple
*stmt
, tree vectype
,
2244 vec_load_store_type vls_type
,
2245 unsigned int ncopies
)
2247 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2248 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2249 dr_alignment_support alignment_support_scheme
;
2253 if (dump_enabled_p ())
2254 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2255 "multiple types with negative step.\n");
2256 return VMAT_ELEMENTWISE
;
2259 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
2260 if (alignment_support_scheme
!= dr_aligned
2261 && alignment_support_scheme
!= dr_unaligned_supported
)
2263 if (dump_enabled_p ())
2264 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2265 "negative step but alignment required.\n");
2266 return VMAT_ELEMENTWISE
;
2269 if (vls_type
== VLS_STORE_INVARIANT
)
2271 if (dump_enabled_p ())
2272 dump_printf_loc (MSG_NOTE
, vect_location
,
2273 "negative step with invariant source;"
2274 " no permute needed.\n");
2275 return VMAT_CONTIGUOUS_DOWN
;
2278 if (!perm_mask_for_reverse (vectype
))
2280 if (dump_enabled_p ())
2281 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2282 "negative step and reversing not supported.\n");
2283 return VMAT_ELEMENTWISE
;
2286 return VMAT_CONTIGUOUS_REVERSE
;
2289 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
2290 if there is a memory access type that the vectorized form can use,
2291 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2292 or scatters, fill in GS_INFO accordingly.
2294 SLP says whether we're performing SLP rather than loop vectorization.
2295 MASKED_P is true if the statement is conditional on a vectorized mask.
2296 VECTYPE is the vector type that the vectorized statements will use.
2297 NCOPIES is the number of vector statements that will be needed. */
2300 get_load_store_type (gimple
*stmt
, tree vectype
, bool slp
, bool masked_p
,
2301 vec_load_store_type vls_type
, unsigned int ncopies
,
2302 vect_memory_access_type
*memory_access_type
,
2303 gather_scatter_info
*gs_info
)
2305 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2306 vec_info
*vinfo
= stmt_info
->vinfo
;
2307 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2308 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2309 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2311 *memory_access_type
= VMAT_GATHER_SCATTER
;
2313 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, gs_info
))
2315 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
, &def_stmt
,
2316 &gs_info
->offset_dt
,
2317 &gs_info
->offset_vectype
))
2319 if (dump_enabled_p ())
2320 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2321 "%s index use not simple.\n",
2322 vls_type
== VLS_LOAD
? "gather" : "scatter");
2326 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2328 if (!get_group_load_store_type (stmt
, vectype
, slp
, masked_p
, vls_type
,
2329 memory_access_type
, gs_info
))
2332 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2336 && vect_use_strided_gather_scatters_p (stmt
, loop_vinfo
,
2338 *memory_access_type
= VMAT_GATHER_SCATTER
;
2340 *memory_access_type
= VMAT_ELEMENTWISE
;
2344 int cmp
= compare_step_with_zero (stmt
);
2346 *memory_access_type
= get_negative_load_store_type
2347 (stmt
, vectype
, vls_type
, ncopies
);
2350 gcc_assert (vls_type
== VLS_LOAD
);
2351 *memory_access_type
= VMAT_INVARIANT
;
2354 *memory_access_type
= VMAT_CONTIGUOUS
;
2357 if ((*memory_access_type
== VMAT_ELEMENTWISE
2358 || *memory_access_type
== VMAT_STRIDED_SLP
)
2359 && !nunits
.is_constant ())
2361 if (dump_enabled_p ())
2362 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2363 "Not using elementwise accesses due to variable "
2364 "vectorization factor.\n");
2368 /* FIXME: At the moment the cost model seems to underestimate the
2369 cost of using elementwise accesses. This check preserves the
2370 traditional behavior until that can be fixed. */
2371 if (*memory_access_type
== VMAT_ELEMENTWISE
2372 && !STMT_VINFO_STRIDED_P (stmt_info
)
2373 && !(stmt
== GROUP_FIRST_ELEMENT (stmt_info
)
2374 && !GROUP_NEXT_ELEMENT (stmt_info
)
2375 && !pow2p_hwi (GROUP_SIZE (stmt_info
))))
2377 if (dump_enabled_p ())
2378 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2379 "not falling back to elementwise accesses\n");
2385 /* Return true if boolean argument MASK is suitable for vectorizing
2386 conditional load or store STMT. When returning true, store the type
2387 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2388 in *MASK_VECTYPE_OUT. */
2391 vect_check_load_store_mask (gimple
*stmt
, tree mask
,
2392 vect_def_type
*mask_dt_out
,
2393 tree
*mask_vectype_out
)
2395 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2397 if (dump_enabled_p ())
2398 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2399 "mask argument is not a boolean.\n");
2403 if (TREE_CODE (mask
) != SSA_NAME
)
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2407 "mask argument is not an SSA name.\n");
2411 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2413 enum vect_def_type mask_dt
;
2415 if (!vect_is_simple_use (mask
, stmt_info
->vinfo
, &def_stmt
, &mask_dt
,
2418 if (dump_enabled_p ())
2419 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2420 "mask use not simple.\n");
2424 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2426 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2428 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2430 if (dump_enabled_p ())
2431 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2432 "could not find an appropriate vector mask type.\n");
2436 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2437 TYPE_VECTOR_SUBPARTS (vectype
)))
2439 if (dump_enabled_p ())
2441 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2442 "vector mask type ");
2443 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, mask_vectype
);
2444 dump_printf (MSG_MISSED_OPTIMIZATION
,
2445 " does not match vector data type ");
2446 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, vectype
);
2447 dump_printf (MSG_MISSED_OPTIMIZATION
, ".\n");
2452 *mask_dt_out
= mask_dt
;
2453 *mask_vectype_out
= mask_vectype
;
2457 /* Return true if stored value RHS is suitable for vectorizing store
2458 statement STMT. When returning true, store the type of the
2459 definition in *RHS_DT_OUT, the type of the vectorized store value in
2460 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2463 vect_check_store_rhs (gimple
*stmt
, tree rhs
, vect_def_type
*rhs_dt_out
,
2464 tree
*rhs_vectype_out
, vec_load_store_type
*vls_type_out
)
2466 /* In the case this is a store from a constant make sure
2467 native_encode_expr can handle it. */
2468 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2470 if (dump_enabled_p ())
2471 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2472 "cannot encode constant as a byte sequence.\n");
2476 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2478 enum vect_def_type rhs_dt
;
2480 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &def_stmt
, &rhs_dt
,
2483 if (dump_enabled_p ())
2484 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2485 "use not simple.\n");
2489 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2490 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2492 if (dump_enabled_p ())
2493 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2494 "incompatible vector types.\n");
2498 *rhs_dt_out
= rhs_dt
;
2499 *rhs_vectype_out
= rhs_vectype
;
2500 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2501 *vls_type_out
= VLS_STORE_INVARIANT
;
2503 *vls_type_out
= VLS_STORE
;
2507 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2508 Note that we support masks with floating-point type, in which case the
2509 floats are interpreted as a bitmask. */
2512 vect_build_all_ones_mask (gimple
*stmt
, tree masktype
)
2514 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2515 return build_int_cst (masktype
, -1);
2516 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2518 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2519 mask
= build_vector_from_val (masktype
, mask
);
2520 return vect_init_vector (stmt
, mask
, masktype
, NULL
);
2522 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2526 for (int j
= 0; j
< 6; ++j
)
2528 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2529 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2530 mask
= build_vector_from_val (masktype
, mask
);
2531 return vect_init_vector (stmt
, mask
, masktype
, NULL
);
2536 /* Build an all-zero merge value of type VECTYPE while vectorizing
2537 STMT as a gather load. */
2540 vect_build_zero_merge_argument (gimple
*stmt
, tree vectype
)
2543 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2544 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2545 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2549 for (int j
= 0; j
< 6; ++j
)
2551 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2552 merge
= build_real (TREE_TYPE (vectype
), r
);
2556 merge
= build_vector_from_val (vectype
, merge
);
2557 return vect_init_vector (stmt
, merge
, vectype
, NULL
);
2560 /* Build a gather load call while vectorizing STMT. Insert new instructions
2561 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2562 operation. If the load is conditional, MASK is the unvectorized
2563 condition and MASK_DT is its definition type, otherwise MASK is null. */
2566 vect_build_gather_load_calls (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2567 gimple
**vec_stmt
, gather_scatter_info
*gs_info
,
2568 tree mask
, vect_def_type mask_dt
)
2570 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2571 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2572 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2573 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2574 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2575 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2576 edge pe
= loop_preheader_edge (loop
);
2577 enum { NARROW
, NONE
, WIDEN
} modifier
;
2578 poly_uint64 gather_off_nunits
2579 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2581 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2582 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2583 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2584 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2585 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2586 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2587 tree scaletype
= TREE_VALUE (arglist
);
2588 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2589 && (!mask
|| types_compatible_p (srctype
, masktype
)));
2591 tree perm_mask
= NULL_TREE
;
2592 tree mask_perm_mask
= NULL_TREE
;
2593 if (known_eq (nunits
, gather_off_nunits
))
2595 else if (known_eq (nunits
* 2, gather_off_nunits
))
2599 /* Currently widening gathers and scatters are only supported for
2600 fixed-length vectors. */
2601 int count
= gather_off_nunits
.to_constant ();
2602 vec_perm_builder
sel (count
, count
, 1);
2603 for (int i
= 0; i
< count
; ++i
)
2604 sel
.quick_push (i
| (count
/ 2));
2606 vec_perm_indices
indices (sel
, 1, count
);
2607 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2610 else if (known_eq (nunits
, gather_off_nunits
* 2))
2614 /* Currently narrowing gathers and scatters are only supported for
2615 fixed-length vectors. */
2616 int count
= nunits
.to_constant ();
2617 vec_perm_builder
sel (count
, count
, 1);
2618 sel
.quick_grow (count
);
2619 for (int i
= 0; i
< count
; ++i
)
2620 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2621 vec_perm_indices
indices (sel
, 2, count
);
2622 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2628 for (int i
= 0; i
< count
; ++i
)
2629 sel
[i
] = i
| (count
/ 2);
2630 indices
.new_vector (sel
, 2, count
);
2631 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2637 tree vec_dest
= vect_create_destination_var (gimple_get_lhs (stmt
),
2640 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2641 if (!is_gimple_min_invariant (ptr
))
2644 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2645 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2646 gcc_assert (!new_bb
);
2649 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2651 tree vec_oprnd0
= NULL_TREE
;
2652 tree vec_mask
= NULL_TREE
;
2653 tree src_op
= NULL_TREE
;
2654 tree mask_op
= NULL_TREE
;
2655 tree prev_res
= NULL_TREE
;
2656 stmt_vec_info prev_stmt_info
= NULL
;
2660 src_op
= vect_build_zero_merge_argument (stmt
, rettype
);
2661 mask_op
= vect_build_all_ones_mask (stmt
, masktype
);
2664 for (int j
= 0; j
< ncopies
; ++j
)
2668 if (modifier
== WIDEN
&& (j
& 1))
2669 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2670 perm_mask
, stmt
, gsi
);
2673 = vect_get_vec_def_for_operand (gs_info
->offset
, stmt
);
2676 = vect_get_vec_def_for_stmt_copy (gs_info
->offset_dt
, vec_oprnd0
);
2678 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2680 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2681 TYPE_VECTOR_SUBPARTS (idxtype
)));
2682 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2683 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2684 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2685 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2691 if (mask_perm_mask
&& (j
& 1))
2692 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2693 mask_perm_mask
, stmt
, gsi
);
2697 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2699 vec_mask
= vect_get_vec_def_for_stmt_copy (mask_dt
, vec_mask
);
2702 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2705 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
)),
2706 TYPE_VECTOR_SUBPARTS (masktype
)));
2707 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2708 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2709 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
,
2711 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2718 new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2721 if (!useless_type_conversion_p (vectype
, rettype
))
2723 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2724 TYPE_VECTOR_SUBPARTS (rettype
)));
2725 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2726 gimple_call_set_lhs (new_stmt
, op
);
2727 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2728 var
= make_ssa_name (vec_dest
);
2729 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2730 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2734 var
= make_ssa_name (vec_dest
, new_stmt
);
2735 gimple_call_set_lhs (new_stmt
, var
);
2738 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2740 if (modifier
== NARROW
)
2747 var
= permute_vec_elements (prev_res
, var
, perm_mask
, stmt
, gsi
);
2748 new_stmt
= SSA_NAME_DEF_STMT (var
);
2751 if (prev_stmt_info
== NULL
)
2752 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2754 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2755 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2759 /* Prepare the base and offset in GS_INFO for vectorization.
2760 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2761 to the vectorized offset argument for the first copy of STMT. STMT
2762 is the statement described by GS_INFO and LOOP is the containing loop. */
2765 vect_get_gather_scatter_ops (struct loop
*loop
, gimple
*stmt
,
2766 gather_scatter_info
*gs_info
,
2767 tree
*dataref_ptr
, tree
*vec_offset
)
2769 gimple_seq stmts
= NULL
;
2770 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2774 edge pe
= loop_preheader_edge (loop
);
2775 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2776 gcc_assert (!new_bb
);
2778 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2779 tree offset_vectype
= get_vectype_for_scalar_type (offset_type
);
2780 *vec_offset
= vect_get_vec_def_for_operand (gs_info
->offset
, stmt
,
2784 /* Prepare to implement a grouped or strided load or store using
2785 the gather load or scatter store operation described by GS_INFO.
2786 STMT is the load or store statement.
2788 Set *DATAREF_BUMP to the amount that should be added to the base
2789 address after each copy of the vectorized statement. Set *VEC_OFFSET
2790 to an invariant offset vector in which element I has the value
2791 I * DR_STEP / SCALE. */
2794 vect_get_strided_load_store_ops (gimple
*stmt
, loop_vec_info loop_vinfo
,
2795 gather_scatter_info
*gs_info
,
2796 tree
*dataref_bump
, tree
*vec_offset
)
2798 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2799 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2800 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2801 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2804 tree bump
= size_binop (MULT_EXPR
,
2805 fold_convert (sizetype
, DR_STEP (dr
)),
2806 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2807 *dataref_bump
= force_gimple_operand (bump
, &stmts
, true, NULL_TREE
);
2809 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2811 /* The offset given in GS_INFO can have pointer type, so use the element
2812 type of the vector instead. */
2813 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2814 tree offset_vectype
= get_vectype_for_scalar_type (offset_type
);
2815 offset_type
= TREE_TYPE (offset_vectype
);
2817 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2818 tree step
= size_binop (EXACT_DIV_EXPR
, DR_STEP (dr
),
2819 ssize_int (gs_info
->scale
));
2820 step
= fold_convert (offset_type
, step
);
2821 step
= force_gimple_operand (step
, &stmts
, true, NULL_TREE
);
2823 /* Create {0, X, X*2, X*3, ...}. */
2824 *vec_offset
= gimple_build (&stmts
, VEC_SERIES_EXPR
, offset_vectype
,
2825 build_zero_cst (offset_type
), step
);
2827 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2830 /* Return the amount that should be added to a vector pointer to move
2831 to the next or previous copy of AGGR_TYPE. DR is the data reference
2832 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2836 vect_get_data_ptr_increment (data_reference
*dr
, tree aggr_type
,
2837 vect_memory_access_type memory_access_type
)
2839 if (memory_access_type
== VMAT_INVARIANT
)
2840 return size_zero_node
;
2842 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
2843 tree step
= vect_dr_behavior (dr
)->step
;
2844 if (tree_int_cst_sgn (step
) == -1)
2845 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
2849 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2852 vectorizable_bswap (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2853 gimple
**vec_stmt
, slp_tree slp_node
,
2854 tree vectype_in
, enum vect_def_type
*dt
)
2857 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2858 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2860 unsigned HOST_WIDE_INT nunits
, num_bytes
;
2862 op
= gimple_call_arg (stmt
, 0);
2863 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2865 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
2868 /* Multiple types in SLP are handled by creating the appropriate number of
2869 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2874 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2876 gcc_assert (ncopies
>= 1);
2878 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
2882 if (!TYPE_VECTOR_SUBPARTS (char_vectype
).is_constant (&num_bytes
))
2885 unsigned word_bytes
= num_bytes
/ nunits
;
2887 /* The encoding uses one stepped pattern for each byte in the word. */
2888 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
2889 for (unsigned i
= 0; i
< 3; ++i
)
2890 for (unsigned j
= 0; j
< word_bytes
; ++j
)
2891 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
2893 vec_perm_indices
indices (elts
, 1, num_bytes
);
2894 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
2899 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2900 if (dump_enabled_p ())
2901 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_bswap ==="
2905 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2906 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
2907 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
2908 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
2913 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
2916 vec
<tree
> vec_oprnds
= vNULL
;
2917 gimple
*new_stmt
= NULL
;
2918 stmt_vec_info prev_stmt_info
= NULL
;
2919 for (unsigned j
= 0; j
< ncopies
; j
++)
2923 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
2925 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2927 /* Arguments are ready. create the new vector stmt. */
2930 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
2932 tree tem
= make_ssa_name (char_vectype
);
2933 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2934 char_vectype
, vop
));
2935 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2936 tree tem2
= make_ssa_name (char_vectype
);
2937 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
2938 tem
, tem
, bswap_vconst
);
2939 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2940 tem
= make_ssa_name (vectype
);
2941 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2943 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2945 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2952 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2954 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2956 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2959 vec_oprnds
.release ();
2963 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2964 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2965 in a single step. On success, store the binary pack code in
2969 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2970 tree_code
*convert_code
)
2972 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2973 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2977 int multi_step_cvt
= 0;
2978 auto_vec
<tree
, 8> interm_types
;
2979 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2980 &code
, &multi_step_cvt
,
2985 *convert_code
= code
;
2989 /* Function vectorizable_call.
2991 Check if GS performs a function call that can be vectorized.
2992 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2993 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2994 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2997 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
3004 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3005 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
3006 tree vectype_out
, vectype_in
;
3007 poly_uint64 nunits_in
;
3008 poly_uint64 nunits_out
;
3009 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3010 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3011 vec_info
*vinfo
= stmt_info
->vinfo
;
3012 tree fndecl
, new_temp
, rhs_type
;
3014 enum vect_def_type dt
[3]
3015 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
3017 gimple
*new_stmt
= NULL
;
3019 vec
<tree
> vargs
= vNULL
;
3020 enum { NARROW
, NONE
, WIDEN
} modifier
;
3024 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3027 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3031 /* Is GS a vectorizable call? */
3032 stmt
= dyn_cast
<gcall
*> (gs
);
3036 if (gimple_call_internal_p (stmt
)
3037 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3038 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3039 /* Handled by vectorizable_load and vectorizable_store. */
3042 if (gimple_call_lhs (stmt
) == NULL_TREE
3043 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3046 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3048 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3050 /* Process function arguments. */
3051 rhs_type
= NULL_TREE
;
3052 vectype_in
= NULL_TREE
;
3053 nargs
= gimple_call_num_args (stmt
);
3055 /* Bail out if the function has more than three arguments, we do not have
3056 interesting builtin functions to vectorize with more than two arguments
3057 except for fma. No arguments is also not good. */
3058 if (nargs
== 0 || nargs
> 3)
3061 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3062 if (gimple_call_internal_p (stmt
)
3063 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
3066 rhs_type
= unsigned_type_node
;
3069 for (i
= 0; i
< nargs
; i
++)
3073 op
= gimple_call_arg (stmt
, i
);
3075 /* We can only handle calls with arguments of the same type. */
3077 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3079 if (dump_enabled_p ())
3080 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3081 "argument types differ.\n");
3085 rhs_type
= TREE_TYPE (op
);
3087 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
3089 if (dump_enabled_p ())
3090 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3091 "use not simple.\n");
3096 vectype_in
= opvectype
;
3098 && opvectype
!= vectype_in
)
3100 if (dump_enabled_p ())
3101 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3102 "argument vector types differ.\n");
3106 /* If all arguments are external or constant defs use a vector type with
3107 the same size as the output vector type. */
3109 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3111 gcc_assert (vectype_in
);
3114 if (dump_enabled_p ())
3116 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3117 "no vectype for scalar type ");
3118 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3119 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3126 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3127 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3128 if (known_eq (nunits_in
* 2, nunits_out
))
3130 else if (known_eq (nunits_out
, nunits_in
))
3132 else if (known_eq (nunits_out
* 2, nunits_in
))
3137 /* We only handle functions that do not read or clobber memory. */
3138 if (gimple_vuse (stmt
))
3140 if (dump_enabled_p ())
3141 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3142 "function reads from or writes to memory.\n");
3146 /* For now, we only vectorize functions if a target specific builtin
3147 is available. TODO -- in some cases, it might be profitable to
3148 insert the calls for pieces of the vector, in order to be able
3149 to vectorize other operations in the loop. */
3151 internal_fn ifn
= IFN_LAST
;
3152 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3153 tree callee
= gimple_call_fndecl (stmt
);
3155 /* First try using an internal function. */
3156 tree_code convert_code
= ERROR_MARK
;
3158 && (modifier
== NONE
3159 || (modifier
== NARROW
3160 && simple_integer_narrowing (vectype_out
, vectype_in
,
3162 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3165 /* If that fails, try asking for a target-specific built-in function. */
3166 if (ifn
== IFN_LAST
)
3168 if (cfn
!= CFN_LAST
)
3169 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3170 (cfn
, vectype_out
, vectype_in
);
3172 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3173 (callee
, vectype_out
, vectype_in
);
3176 if (ifn
== IFN_LAST
&& !fndecl
)
3178 if (cfn
== CFN_GOMP_SIMD_LANE
3181 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3182 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3183 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3184 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3186 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3187 { 0, 1, 2, ... vf - 1 } vector. */
3188 gcc_assert (nargs
== 0);
3190 else if (modifier
== NONE
3191 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3192 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3193 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
3194 return vectorizable_bswap (stmt
, gsi
, vec_stmt
, slp_node
,
3198 if (dump_enabled_p ())
3199 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3200 "function is not vectorizable.\n");
3207 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3208 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3210 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3212 /* Sanity check: make sure that at least one copy of the vectorized stmt
3213 needs to be generated. */
3214 gcc_assert (ncopies
>= 1);
3216 if (!vec_stmt
) /* transformation not required. */
3218 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3219 if (dump_enabled_p ())
3220 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
3224 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
3225 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3226 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
3227 vec_promote_demote
, stmt_info
, 0, vect_body
);
3235 if (dump_enabled_p ())
3236 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3239 scalar_dest
= gimple_call_lhs (stmt
);
3240 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3242 prev_stmt_info
= NULL
;
3243 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3245 tree prev_res
= NULL_TREE
;
3246 for (j
= 0; j
< ncopies
; ++j
)
3248 /* Build argument list for the vectorized call. */
3250 vargs
.create (nargs
);
3256 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3257 vec
<tree
> vec_oprnds0
;
3259 for (i
= 0; i
< nargs
; i
++)
3260 vargs
.quick_push (gimple_call_arg (stmt
, i
));
3261 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3262 vec_oprnds0
= vec_defs
[0];
3264 /* Arguments are ready. Create the new vector stmt. */
3265 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3268 for (k
= 0; k
< nargs
; k
++)
3270 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3271 vargs
[k
] = vec_oprndsk
[i
];
3273 if (modifier
== NARROW
)
3275 tree half_res
= make_ssa_name (vectype_in
);
3277 = gimple_build_call_internal_vec (ifn
, vargs
);
3278 gimple_call_set_lhs (call
, half_res
);
3279 gimple_call_set_nothrow (call
, true);
3281 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3284 prev_res
= half_res
;
3287 new_temp
= make_ssa_name (vec_dest
);
3288 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3289 prev_res
, half_res
);
3294 if (ifn
!= IFN_LAST
)
3295 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3297 call
= gimple_build_call_vec (fndecl
, vargs
);
3298 new_temp
= make_ssa_name (vec_dest
, call
);
3299 gimple_call_set_lhs (call
, new_temp
);
3300 gimple_call_set_nothrow (call
, true);
3303 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3304 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3307 for (i
= 0; i
< nargs
; i
++)
3309 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3310 vec_oprndsi
.release ();
3315 for (i
= 0; i
< nargs
; i
++)
3317 op
= gimple_call_arg (stmt
, i
);
3320 = vect_get_vec_def_for_operand (op
, stmt
);
3323 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
3325 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3328 vargs
.quick_push (vec_oprnd0
);
3331 if (gimple_call_internal_p (stmt
)
3332 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
3334 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3336 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3337 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3338 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
3339 new_temp
= make_ssa_name (vec_dest
);
3340 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3342 else if (modifier
== NARROW
)
3344 tree half_res
= make_ssa_name (vectype_in
);
3345 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3346 gimple_call_set_lhs (call
, half_res
);
3347 gimple_call_set_nothrow (call
, true);
3349 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3352 prev_res
= half_res
;
3355 new_temp
= make_ssa_name (vec_dest
);
3356 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3357 prev_res
, half_res
);
3362 if (ifn
!= IFN_LAST
)
3363 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3365 call
= gimple_build_call_vec (fndecl
, vargs
);
3366 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3367 gimple_call_set_lhs (call
, new_temp
);
3368 gimple_call_set_nothrow (call
, true);
3371 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3373 if (j
== (modifier
== NARROW
? 1 : 0))
3374 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3376 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3378 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3381 else if (modifier
== NARROW
)
3383 for (j
= 0; j
< ncopies
; ++j
)
3385 /* Build argument list for the vectorized call. */
3387 vargs
.create (nargs
* 2);
3393 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3394 vec
<tree
> vec_oprnds0
;
3396 for (i
= 0; i
< nargs
; i
++)
3397 vargs
.quick_push (gimple_call_arg (stmt
, i
));
3398 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3399 vec_oprnds0
= vec_defs
[0];
3401 /* Arguments are ready. Create the new vector stmt. */
3402 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3406 for (k
= 0; k
< nargs
; k
++)
3408 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3409 vargs
.quick_push (vec_oprndsk
[i
]);
3410 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3413 if (ifn
!= IFN_LAST
)
3414 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3416 call
= gimple_build_call_vec (fndecl
, vargs
);
3417 new_temp
= make_ssa_name (vec_dest
, call
);
3418 gimple_call_set_lhs (call
, new_temp
);
3419 gimple_call_set_nothrow (call
, true);
3421 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3422 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3425 for (i
= 0; i
< nargs
; i
++)
3427 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3428 vec_oprndsi
.release ();
3433 for (i
= 0; i
< nargs
; i
++)
3435 op
= gimple_call_arg (stmt
, i
);
3439 = vect_get_vec_def_for_operand (op
, stmt
);
3441 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3445 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
3447 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
3449 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3452 vargs
.quick_push (vec_oprnd0
);
3453 vargs
.quick_push (vec_oprnd1
);
3456 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3457 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3458 gimple_call_set_lhs (new_stmt
, new_temp
);
3459 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3462 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3464 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3466 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3469 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3472 /* No current target implements this case. */
3477 /* The call in STMT might prevent it from being removed in dce.
3478 We however cannot remove it here, due to the way the ssa name
3479 it defines is mapped to the new definition. So just replace
3480 rhs of the statement with something harmless. */
3485 type
= TREE_TYPE (scalar_dest
);
3486 if (is_pattern_stmt_p (stmt_info
))
3487 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3489 lhs
= gimple_call_lhs (stmt
);
3491 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3492 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3493 set_vinfo_for_stmt (stmt
, NULL
);
3494 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3495 gsi_replace (gsi
, new_stmt
, false);
3501 struct simd_call_arg_info
3505 HOST_WIDE_INT linear_step
;
3506 enum vect_def_type dt
;
3508 bool simd_lane_linear
;
3511 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3512 is linear within simd lane (but not within whole loop), note it in
3516 vect_simd_lane_linear (tree op
, struct loop
*loop
,
3517 struct simd_call_arg_info
*arginfo
)
3519 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3521 if (!is_gimple_assign (def_stmt
)
3522 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3523 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3526 tree base
= gimple_assign_rhs1 (def_stmt
);
3527 HOST_WIDE_INT linear_step
= 0;
3528 tree v
= gimple_assign_rhs2 (def_stmt
);
3529 while (TREE_CODE (v
) == SSA_NAME
)
3532 def_stmt
= SSA_NAME_DEF_STMT (v
);
3533 if (is_gimple_assign (def_stmt
))
3534 switch (gimple_assign_rhs_code (def_stmt
))
3537 t
= gimple_assign_rhs2 (def_stmt
);
3538 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3540 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3541 v
= gimple_assign_rhs1 (def_stmt
);
3544 t
= gimple_assign_rhs2 (def_stmt
);
3545 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3547 linear_step
= tree_to_shwi (t
);
3548 v
= gimple_assign_rhs1 (def_stmt
);
3551 t
= gimple_assign_rhs1 (def_stmt
);
3552 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3553 || (TYPE_PRECISION (TREE_TYPE (v
))
3554 < TYPE_PRECISION (TREE_TYPE (t
))))
3563 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3565 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3566 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3571 arginfo
->linear_step
= linear_step
;
3573 arginfo
->simd_lane_linear
= true;
3579 /* Return the number of elements in vector type VECTYPE, which is associated
3580 with a SIMD clone. At present these vectors always have a constant
3583 static unsigned HOST_WIDE_INT
3584 simd_clone_subparts (tree vectype
)
3586 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3589 /* Function vectorizable_simd_clone_call.
3591 Check if STMT performs a function call that can be vectorized
3592 by calling a simd clone of the function.
3593 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3594 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3595 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3598 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3599 gimple
**vec_stmt
, slp_tree slp_node
)
3604 tree vec_oprnd0
= NULL_TREE
;
3605 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
3607 unsigned int nunits
;
3608 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3609 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3610 vec_info
*vinfo
= stmt_info
->vinfo
;
3611 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3612 tree fndecl
, new_temp
;
3614 gimple
*new_stmt
= NULL
;
3616 auto_vec
<simd_call_arg_info
> arginfo
;
3617 vec
<tree
> vargs
= vNULL
;
3619 tree lhs
, rtype
, ratype
;
3620 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3622 /* Is STMT a vectorizable call? */
3623 if (!is_gimple_call (stmt
))
3626 fndecl
= gimple_call_fndecl (stmt
);
3627 if (fndecl
== NULL_TREE
)
3630 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3631 if (node
== NULL
|| node
->simd_clones
== NULL
)
3634 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3637 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3641 if (gimple_call_lhs (stmt
)
3642 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3645 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3647 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3649 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
3656 /* Process function arguments. */
3657 nargs
= gimple_call_num_args (stmt
);
3659 /* Bail out if the function has zero arguments. */
3663 arginfo
.reserve (nargs
, true);
3665 for (i
= 0; i
< nargs
; i
++)
3667 simd_call_arg_info thisarginfo
;
3670 thisarginfo
.linear_step
= 0;
3671 thisarginfo
.align
= 0;
3672 thisarginfo
.op
= NULL_TREE
;
3673 thisarginfo
.simd_lane_linear
= false;
3675 op
= gimple_call_arg (stmt
, i
);
3676 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
3677 &thisarginfo
.vectype
)
3678 || thisarginfo
.dt
== vect_uninitialized_def
)
3680 if (dump_enabled_p ())
3681 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3682 "use not simple.\n");
3686 if (thisarginfo
.dt
== vect_constant_def
3687 || thisarginfo
.dt
== vect_external_def
)
3688 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3690 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3692 /* For linear arguments, the analyze phase should have saved
3693 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3694 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3695 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3697 gcc_assert (vec_stmt
);
3698 thisarginfo
.linear_step
3699 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3701 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3702 thisarginfo
.simd_lane_linear
3703 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3704 == boolean_true_node
);
3705 /* If loop has been peeled for alignment, we need to adjust it. */
3706 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3707 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3708 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3710 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3711 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3712 tree opt
= TREE_TYPE (thisarginfo
.op
);
3713 bias
= fold_convert (TREE_TYPE (step
), bias
);
3714 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3716 = fold_build2 (POINTER_TYPE_P (opt
)
3717 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3718 thisarginfo
.op
, bias
);
3722 && thisarginfo
.dt
!= vect_constant_def
3723 && thisarginfo
.dt
!= vect_external_def
3725 && TREE_CODE (op
) == SSA_NAME
3726 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3728 && tree_fits_shwi_p (iv
.step
))
3730 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3731 thisarginfo
.op
= iv
.base
;
3733 else if ((thisarginfo
.dt
== vect_constant_def
3734 || thisarginfo
.dt
== vect_external_def
)
3735 && POINTER_TYPE_P (TREE_TYPE (op
)))
3736 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3737 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3739 if (POINTER_TYPE_P (TREE_TYPE (op
))
3740 && !thisarginfo
.linear_step
3742 && thisarginfo
.dt
!= vect_constant_def
3743 && thisarginfo
.dt
!= vect_external_def
3746 && TREE_CODE (op
) == SSA_NAME
)
3747 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3749 arginfo
.quick_push (thisarginfo
);
3752 unsigned HOST_WIDE_INT vf
;
3753 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
3755 if (dump_enabled_p ())
3756 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3757 "not considering SIMD clones; not yet supported"
3758 " for variable-width vectors.\n");
3762 unsigned int badness
= 0;
3763 struct cgraph_node
*bestn
= NULL
;
3764 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3765 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3767 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3768 n
= n
->simdclone
->next_clone
)
3770 unsigned int this_badness
= 0;
3771 if (n
->simdclone
->simdlen
> vf
3772 || n
->simdclone
->nargs
!= nargs
)
3774 if (n
->simdclone
->simdlen
< vf
)
3775 this_badness
+= (exact_log2 (vf
)
3776 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3777 if (n
->simdclone
->inbranch
)
3778 this_badness
+= 2048;
3779 int target_badness
= targetm
.simd_clone
.usable (n
);
3780 if (target_badness
< 0)
3782 this_badness
+= target_badness
* 512;
3783 /* FORNOW: Have to add code to add the mask argument. */
3784 if (n
->simdclone
->inbranch
)
3786 for (i
= 0; i
< nargs
; i
++)
3788 switch (n
->simdclone
->args
[i
].arg_type
)
3790 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3791 if (!useless_type_conversion_p
3792 (n
->simdclone
->args
[i
].orig_type
,
3793 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3795 else if (arginfo
[i
].dt
== vect_constant_def
3796 || arginfo
[i
].dt
== vect_external_def
3797 || arginfo
[i
].linear_step
)
3800 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3801 if (arginfo
[i
].dt
!= vect_constant_def
3802 && arginfo
[i
].dt
!= vect_external_def
)
3805 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3806 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3807 if (arginfo
[i
].dt
== vect_constant_def
3808 || arginfo
[i
].dt
== vect_external_def
3809 || (arginfo
[i
].linear_step
3810 != n
->simdclone
->args
[i
].linear_step
))
3813 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3814 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3815 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3816 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3817 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3818 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3822 case SIMD_CLONE_ARG_TYPE_MASK
:
3825 if (i
== (size_t) -1)
3827 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3832 if (arginfo
[i
].align
)
3833 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3834 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3836 if (i
== (size_t) -1)
3838 if (bestn
== NULL
|| this_badness
< badness
)
3841 badness
= this_badness
;
3848 for (i
= 0; i
< nargs
; i
++)
3849 if ((arginfo
[i
].dt
== vect_constant_def
3850 || arginfo
[i
].dt
== vect_external_def
)
3851 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3854 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3856 if (arginfo
[i
].vectype
== NULL
3857 || (simd_clone_subparts (arginfo
[i
].vectype
)
3858 > bestn
->simdclone
->simdlen
))
3862 fndecl
= bestn
->decl
;
3863 nunits
= bestn
->simdclone
->simdlen
;
3864 ncopies
= vf
/ nunits
;
3866 /* If the function isn't const, only allow it in simd loops where user
3867 has asserted that at least nunits consecutive iterations can be
3868 performed using SIMD instructions. */
3869 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3870 && gimple_vuse (stmt
))
3873 /* Sanity check: make sure that at least one copy of the vectorized stmt
3874 needs to be generated. */
3875 gcc_assert (ncopies
>= 1);
3877 if (!vec_stmt
) /* transformation not required. */
3879 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3880 for (i
= 0; i
< nargs
; i
++)
3881 if ((bestn
->simdclone
->args
[i
].arg_type
3882 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3883 || (bestn
->simdclone
->args
[i
].arg_type
3884 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3886 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3888 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3889 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3890 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3891 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3892 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3893 tree sll
= arginfo
[i
].simd_lane_linear
3894 ? boolean_true_node
: boolean_false_node
;
3895 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3897 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3898 if (dump_enabled_p ())
3899 dump_printf_loc (MSG_NOTE
, vect_location
,
3900 "=== vectorizable_simd_clone_call ===\n");
3901 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3907 if (dump_enabled_p ())
3908 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3911 scalar_dest
= gimple_call_lhs (stmt
);
3912 vec_dest
= NULL_TREE
;
3917 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3918 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3919 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3922 rtype
= TREE_TYPE (ratype
);
3926 prev_stmt_info
= NULL
;
3927 for (j
= 0; j
< ncopies
; ++j
)
3929 /* Build argument list for the vectorized call. */
3931 vargs
.create (nargs
);
3935 for (i
= 0; i
< nargs
; i
++)
3937 unsigned int k
, l
, m
, o
;
3939 op
= gimple_call_arg (stmt
, i
);
3940 switch (bestn
->simdclone
->args
[i
].arg_type
)
3942 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3943 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3944 o
= nunits
/ simd_clone_subparts (atype
);
3945 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3947 if (simd_clone_subparts (atype
)
3948 < simd_clone_subparts (arginfo
[i
].vectype
))
3950 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3951 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
3952 / simd_clone_subparts (atype
));
3953 gcc_assert ((k
& (k
- 1)) == 0);
3956 = vect_get_vec_def_for_operand (op
, stmt
);
3959 vec_oprnd0
= arginfo
[i
].op
;
3960 if ((m
& (k
- 1)) == 0)
3962 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3965 arginfo
[i
].op
= vec_oprnd0
;
3967 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3969 bitsize_int ((m
& (k
- 1)) * prec
));
3971 = gimple_build_assign (make_ssa_name (atype
),
3973 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3974 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3978 k
= (simd_clone_subparts (atype
)
3979 / simd_clone_subparts (arginfo
[i
].vectype
));
3980 gcc_assert ((k
& (k
- 1)) == 0);
3981 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3983 vec_alloc (ctor_elts
, k
);
3986 for (l
= 0; l
< k
; l
++)
3988 if (m
== 0 && l
== 0)
3990 = vect_get_vec_def_for_operand (op
, stmt
);
3993 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3995 arginfo
[i
].op
= vec_oprnd0
;
3998 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4002 vargs
.safe_push (vec_oprnd0
);
4005 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4007 = gimple_build_assign (make_ssa_name (atype
),
4009 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4010 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4015 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4016 vargs
.safe_push (op
);
4018 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4019 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4024 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
4029 edge pe
= loop_preheader_edge (loop
);
4030 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4031 gcc_assert (!new_bb
);
4033 if (arginfo
[i
].simd_lane_linear
)
4035 vargs
.safe_push (arginfo
[i
].op
);
4038 tree phi_res
= copy_ssa_name (op
);
4039 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4040 set_vinfo_for_stmt (new_phi
,
4041 new_stmt_vec_info (new_phi
, loop_vinfo
));
4042 add_phi_arg (new_phi
, arginfo
[i
].op
,
4043 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4045 = POINTER_TYPE_P (TREE_TYPE (op
))
4046 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4047 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4048 ? sizetype
: TREE_TYPE (op
);
4050 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4052 tree tcst
= wide_int_to_tree (type
, cst
);
4053 tree phi_arg
= copy_ssa_name (op
);
4055 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4056 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4057 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4058 set_vinfo_for_stmt (new_stmt
,
4059 new_stmt_vec_info (new_stmt
, loop_vinfo
));
4060 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4062 arginfo
[i
].op
= phi_res
;
4063 vargs
.safe_push (phi_res
);
4068 = POINTER_TYPE_P (TREE_TYPE (op
))
4069 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4070 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4071 ? sizetype
: TREE_TYPE (op
);
4073 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4075 tree tcst
= wide_int_to_tree (type
, cst
);
4076 new_temp
= make_ssa_name (TREE_TYPE (op
));
4077 new_stmt
= gimple_build_assign (new_temp
, code
,
4078 arginfo
[i
].op
, tcst
);
4079 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4080 vargs
.safe_push (new_temp
);
4083 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4084 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4085 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4086 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4087 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4088 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4094 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
4097 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
4099 new_temp
= create_tmp_var (ratype
);
4100 else if (simd_clone_subparts (vectype
)
4101 == simd_clone_subparts (rtype
))
4102 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4104 new_temp
= make_ssa_name (rtype
, new_stmt
);
4105 gimple_call_set_lhs (new_stmt
, new_temp
);
4107 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4111 if (simd_clone_subparts (vectype
) < nunits
)
4114 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4115 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4116 k
= nunits
/ simd_clone_subparts (vectype
);
4117 gcc_assert ((k
& (k
- 1)) == 0);
4118 for (l
= 0; l
< k
; l
++)
4123 t
= build_fold_addr_expr (new_temp
);
4124 t
= build2 (MEM_REF
, vectype
, t
,
4125 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4128 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4129 bitsize_int (prec
), bitsize_int (l
* prec
));
4131 = gimple_build_assign (make_ssa_name (vectype
), t
);
4132 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4133 if (j
== 0 && l
== 0)
4134 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4136 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4138 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4142 vect_clobber_variable (stmt
, gsi
, new_temp
);
4145 else if (simd_clone_subparts (vectype
) > nunits
)
4147 unsigned int k
= (simd_clone_subparts (vectype
)
4148 / simd_clone_subparts (rtype
));
4149 gcc_assert ((k
& (k
- 1)) == 0);
4150 if ((j
& (k
- 1)) == 0)
4151 vec_alloc (ret_ctor_elts
, k
);
4154 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
4155 for (m
= 0; m
< o
; m
++)
4157 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4158 size_int (m
), NULL_TREE
, NULL_TREE
);
4160 = gimple_build_assign (make_ssa_name (rtype
), tem
);
4161 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4162 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4163 gimple_assign_lhs (new_stmt
));
4165 vect_clobber_variable (stmt
, gsi
, new_temp
);
4168 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4169 if ((j
& (k
- 1)) != k
- 1)
4171 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4173 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4174 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4176 if ((unsigned) j
== k
- 1)
4177 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4179 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4181 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4186 tree t
= build_fold_addr_expr (new_temp
);
4187 t
= build2 (MEM_REF
, vectype
, t
,
4188 build_int_cst (TREE_TYPE (t
), 0));
4190 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
4191 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4192 vect_clobber_variable (stmt
, gsi
, new_temp
);
4197 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4199 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4201 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4206 /* The call in STMT might prevent it from being removed in dce.
4207 We however cannot remove it here, due to the way the ssa name
4208 it defines is mapped to the new definition. So just replace
4209 rhs of the statement with something harmless. */
4216 type
= TREE_TYPE (scalar_dest
);
4217 if (is_pattern_stmt_p (stmt_info
))
4218 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
4220 lhs
= gimple_call_lhs (stmt
);
4221 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4224 new_stmt
= gimple_build_nop ();
4225 set_vinfo_for_stmt (new_stmt
, stmt_info
);
4226 set_vinfo_for_stmt (stmt
, NULL
);
4227 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
4228 gsi_replace (gsi
, new_stmt
, true);
4229 unlink_stmt_vdef (stmt
);
4235 /* Function vect_gen_widened_results_half
4237 Create a vector stmt whose code, type, number of arguments, and result
4238 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4239 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4240 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4241 needs to be created (DECL is a function-decl of a target-builtin).
4242 STMT is the original scalar stmt that we are vectorizing. */
4245 vect_gen_widened_results_half (enum tree_code code
,
4247 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4248 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4254 /* Generate half of the widened result: */
4255 if (code
== CALL_EXPR
)
4257 /* Target specific support */
4258 if (op_type
== binary_op
)
4259 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
4261 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
4262 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4263 gimple_call_set_lhs (new_stmt
, new_temp
);
4267 /* Generic support */
4268 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4269 if (op_type
!= binary_op
)
4271 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4272 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4273 gimple_assign_set_lhs (new_stmt
, new_temp
);
4275 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4281 /* Get vectorized definitions for loop-based vectorization. For the first
4282 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4283 scalar operand), and for the rest we get a copy with
4284 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4285 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4286 The vectors are collected into VEC_OPRNDS. */
4289 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
4290 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
4294 /* Get first vector operand. */
4295 /* All the vector operands except the very first one (that is scalar oprnd)
4297 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4298 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
4300 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
4302 vec_oprnds
->quick_push (vec_oprnd
);
4304 /* Get second vector operand. */
4305 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
4306 vec_oprnds
->quick_push (vec_oprnd
);
4310 /* For conversion in multiple steps, continue to get operands
4313 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
4317 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4318 For multi-step conversions store the resulting vectors and call the function
4322 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
4323 int multi_step_cvt
, gimple
*stmt
,
4325 gimple_stmt_iterator
*gsi
,
4326 slp_tree slp_node
, enum tree_code code
,
4327 stmt_vec_info
*prev_stmt_info
)
4330 tree vop0
, vop1
, new_tmp
, vec_dest
;
4332 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4334 vec_dest
= vec_dsts
.pop ();
4336 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4338 /* Create demotion operation. */
4339 vop0
= (*vec_oprnds
)[i
];
4340 vop1
= (*vec_oprnds
)[i
+ 1];
4341 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4342 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4343 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4344 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4347 /* Store the resulting vector for next recursive call. */
4348 (*vec_oprnds
)[i
/2] = new_tmp
;
4351 /* This is the last step of the conversion sequence. Store the
4352 vectors in SLP_NODE or in vector info of the scalar statement
4353 (or in STMT_VINFO_RELATED_STMT chain). */
4355 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4358 if (!*prev_stmt_info
)
4359 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4361 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
4363 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4368 /* For multi-step demotion operations we first generate demotion operations
4369 from the source type to the intermediate types, and then combine the
4370 results (stored in VEC_OPRNDS) in demotion operation to the destination
4374 /* At each level of recursion we have half of the operands we had at the
4376 vec_oprnds
->truncate ((i
+1)/2);
4377 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
4378 stmt
, vec_dsts
, gsi
, slp_node
,
4379 VEC_PACK_TRUNC_EXPR
,
4383 vec_dsts
.quick_push (vec_dest
);
4387 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4388 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4389 the resulting vectors and call the function recursively. */
4392 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4393 vec
<tree
> *vec_oprnds1
,
4394 gimple
*stmt
, tree vec_dest
,
4395 gimple_stmt_iterator
*gsi
,
4396 enum tree_code code1
,
4397 enum tree_code code2
, tree decl1
,
4398 tree decl2
, int op_type
)
4401 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4402 gimple
*new_stmt1
, *new_stmt2
;
4403 vec
<tree
> vec_tmp
= vNULL
;
4405 vec_tmp
.create (vec_oprnds0
->length () * 2);
4406 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4408 if (op_type
== binary_op
)
4409 vop1
= (*vec_oprnds1
)[i
];
4413 /* Generate the two halves of promotion operation. */
4414 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4415 op_type
, vec_dest
, gsi
, stmt
);
4416 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4417 op_type
, vec_dest
, gsi
, stmt
);
4418 if (is_gimple_call (new_stmt1
))
4420 new_tmp1
= gimple_call_lhs (new_stmt1
);
4421 new_tmp2
= gimple_call_lhs (new_stmt2
);
4425 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4426 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4429 /* Store the results for the next step. */
4430 vec_tmp
.quick_push (new_tmp1
);
4431 vec_tmp
.quick_push (new_tmp2
);
4434 vec_oprnds0
->release ();
4435 *vec_oprnds0
= vec_tmp
;
4439 /* Check if STMT performs a conversion operation, that can be vectorized.
4440 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4441 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4442 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4445 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4446 gimple
**vec_stmt
, slp_tree slp_node
)
4450 tree op0
, op1
= NULL_TREE
;
4451 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4452 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4453 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4454 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4455 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4456 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4459 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4461 gimple
*new_stmt
= NULL
;
4462 stmt_vec_info prev_stmt_info
;
4463 poly_uint64 nunits_in
;
4464 poly_uint64 nunits_out
;
4465 tree vectype_out
, vectype_in
;
4467 tree lhs_type
, rhs_type
;
4468 enum { NARROW
, NONE
, WIDEN
} modifier
;
4469 vec
<tree
> vec_oprnds0
= vNULL
;
4470 vec
<tree
> vec_oprnds1
= vNULL
;
4472 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4473 vec_info
*vinfo
= stmt_info
->vinfo
;
4474 int multi_step_cvt
= 0;
4475 vec
<tree
> interm_types
= vNULL
;
4476 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4478 unsigned short fltsz
;
4480 /* Is STMT a vectorizable conversion? */
4482 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4485 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4489 if (!is_gimple_assign (stmt
))
4492 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4495 code
= gimple_assign_rhs_code (stmt
);
4496 if (!CONVERT_EXPR_CODE_P (code
)
4497 && code
!= FIX_TRUNC_EXPR
4498 && code
!= FLOAT_EXPR
4499 && code
!= WIDEN_MULT_EXPR
4500 && code
!= WIDEN_LSHIFT_EXPR
)
4503 op_type
= TREE_CODE_LENGTH (code
);
4505 /* Check types of lhs and rhs. */
4506 scalar_dest
= gimple_assign_lhs (stmt
);
4507 lhs_type
= TREE_TYPE (scalar_dest
);
4508 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4510 op0
= gimple_assign_rhs1 (stmt
);
4511 rhs_type
= TREE_TYPE (op0
);
4513 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4514 && !((INTEGRAL_TYPE_P (lhs_type
)
4515 && INTEGRAL_TYPE_P (rhs_type
))
4516 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4517 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4520 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4521 && ((INTEGRAL_TYPE_P (lhs_type
)
4522 && !type_has_mode_precision_p (lhs_type
))
4523 || (INTEGRAL_TYPE_P (rhs_type
)
4524 && !type_has_mode_precision_p (rhs_type
))))
4526 if (dump_enabled_p ())
4527 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4528 "type conversion to/from bit-precision unsupported."
4533 /* Check the operands of the operation. */
4534 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4536 if (dump_enabled_p ())
4537 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4538 "use not simple.\n");
4541 if (op_type
== binary_op
)
4545 op1
= gimple_assign_rhs2 (stmt
);
4546 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4547 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4549 if (CONSTANT_CLASS_P (op0
))
4550 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
4552 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
4556 if (dump_enabled_p ())
4557 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4558 "use not simple.\n");
4563 /* If op0 is an external or constant defs use a vector type of
4564 the same size as the output vector type. */
4566 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4568 gcc_assert (vectype_in
);
4571 if (dump_enabled_p ())
4573 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4574 "no vectype for scalar type ");
4575 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4576 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4582 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4583 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4585 if (dump_enabled_p ())
4587 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4588 "can't convert between boolean and non "
4590 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4591 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4597 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4598 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4599 if (known_eq (nunits_out
, nunits_in
))
4601 else if (multiple_p (nunits_out
, nunits_in
))
4605 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4609 /* Multiple types in SLP are handled by creating the appropriate number of
4610 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4614 else if (modifier
== NARROW
)
4615 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4617 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4619 /* Sanity check: make sure that at least one copy of the vectorized stmt
4620 needs to be generated. */
4621 gcc_assert (ncopies
>= 1);
4623 bool found_mode
= false;
4624 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4625 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4626 opt_scalar_mode rhs_mode_iter
;
4628 /* Supportable by target? */
4632 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4634 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4639 if (dump_enabled_p ())
4640 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4641 "conversion not supported by target.\n");
4645 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
4646 &code1
, &code2
, &multi_step_cvt
,
4649 /* Binary widening operation can only be supported directly by the
4651 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4655 if (code
!= FLOAT_EXPR
4656 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4659 fltsz
= GET_MODE_SIZE (lhs_mode
);
4660 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4662 rhs_mode
= rhs_mode_iter
.require ();
4663 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4667 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4668 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4669 if (cvt_type
== NULL_TREE
)
4672 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4674 if (!supportable_convert_operation (code
, vectype_out
,
4675 cvt_type
, &decl1
, &codecvt1
))
4678 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
4679 cvt_type
, &codecvt1
,
4680 &codecvt2
, &multi_step_cvt
,
4684 gcc_assert (multi_step_cvt
== 0);
4686 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
4687 vectype_in
, &code1
, &code2
,
4688 &multi_step_cvt
, &interm_types
))
4698 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4699 codecvt2
= ERROR_MARK
;
4703 interm_types
.safe_push (cvt_type
);
4704 cvt_type
= NULL_TREE
;
4709 gcc_assert (op_type
== unary_op
);
4710 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4711 &code1
, &multi_step_cvt
,
4715 if (code
!= FIX_TRUNC_EXPR
4716 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4720 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4721 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4722 if (cvt_type
== NULL_TREE
)
4724 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4727 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4728 &code1
, &multi_step_cvt
,
4737 if (!vec_stmt
) /* transformation not required. */
4739 if (dump_enabled_p ())
4740 dump_printf_loc (MSG_NOTE
, vect_location
,
4741 "=== vectorizable_conversion ===\n");
4742 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4744 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4746 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
4748 else if (modifier
== NARROW
)
4750 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4752 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4756 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4758 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
4760 interm_types
.release ();
4765 if (dump_enabled_p ())
4766 dump_printf_loc (MSG_NOTE
, vect_location
,
4767 "transform conversion. ncopies = %d.\n", ncopies
);
4769 if (op_type
== binary_op
)
4771 if (CONSTANT_CLASS_P (op0
))
4772 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4773 else if (CONSTANT_CLASS_P (op1
))
4774 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4777 /* In case of multi-step conversion, we first generate conversion operations
4778 to the intermediate types, and then from that types to the final one.
4779 We create vector destinations for the intermediate type (TYPES) received
4780 from supportable_*_operation, and store them in the correct order
4781 for future use in vect_create_vectorized_*_stmts (). */
4782 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4783 vec_dest
= vect_create_destination_var (scalar_dest
,
4784 (cvt_type
&& modifier
== WIDEN
)
4785 ? cvt_type
: vectype_out
);
4786 vec_dsts
.quick_push (vec_dest
);
4790 for (i
= interm_types
.length () - 1;
4791 interm_types
.iterate (i
, &intermediate_type
); i
--)
4793 vec_dest
= vect_create_destination_var (scalar_dest
,
4795 vec_dsts
.quick_push (vec_dest
);
4800 vec_dest
= vect_create_destination_var (scalar_dest
,
4802 ? vectype_out
: cvt_type
);
4806 if (modifier
== WIDEN
)
4808 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
4809 if (op_type
== binary_op
)
4810 vec_oprnds1
.create (1);
4812 else if (modifier
== NARROW
)
4813 vec_oprnds0
.create (
4814 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4816 else if (code
== WIDEN_LSHIFT_EXPR
)
4817 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4820 prev_stmt_info
= NULL
;
4824 for (j
= 0; j
< ncopies
; j
++)
4827 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
);
4829 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
4831 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4833 /* Arguments are ready, create the new vector stmt. */
4834 if (code1
== CALL_EXPR
)
4836 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4837 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4838 gimple_call_set_lhs (new_stmt
, new_temp
);
4842 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4843 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4844 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4845 gimple_assign_set_lhs (new_stmt
, new_temp
);
4848 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4850 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4853 if (!prev_stmt_info
)
4854 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4856 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4857 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4864 /* In case the vectorization factor (VF) is bigger than the number
4865 of elements that we can fit in a vectype (nunits), we have to
4866 generate more than one vector stmt - i.e - we need to "unroll"
4867 the vector stmt by a factor VF/nunits. */
4868 for (j
= 0; j
< ncopies
; j
++)
4875 if (code
== WIDEN_LSHIFT_EXPR
)
4880 /* Store vec_oprnd1 for every vector stmt to be created
4881 for SLP_NODE. We check during the analysis that all
4882 the shift arguments are the same. */
4883 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4884 vec_oprnds1
.quick_push (vec_oprnd1
);
4886 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4890 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4891 &vec_oprnds1
, slp_node
);
4895 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4896 vec_oprnds0
.quick_push (vec_oprnd0
);
4897 if (op_type
== binary_op
)
4899 if (code
== WIDEN_LSHIFT_EXPR
)
4902 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4903 vec_oprnds1
.quick_push (vec_oprnd1
);
4909 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4910 vec_oprnds0
.truncate (0);
4911 vec_oprnds0
.quick_push (vec_oprnd0
);
4912 if (op_type
== binary_op
)
4914 if (code
== WIDEN_LSHIFT_EXPR
)
4917 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4919 vec_oprnds1
.truncate (0);
4920 vec_oprnds1
.quick_push (vec_oprnd1
);
4924 /* Arguments are ready. Create the new vector stmts. */
4925 for (i
= multi_step_cvt
; i
>= 0; i
--)
4927 tree this_dest
= vec_dsts
[i
];
4928 enum tree_code c1
= code1
, c2
= code2
;
4929 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4934 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4936 stmt
, this_dest
, gsi
,
4937 c1
, c2
, decl1
, decl2
,
4941 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4945 if (codecvt1
== CALL_EXPR
)
4947 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4948 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4949 gimple_call_set_lhs (new_stmt
, new_temp
);
4953 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4954 new_temp
= make_ssa_name (vec_dest
);
4955 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4959 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4962 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4965 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4968 if (!prev_stmt_info
)
4969 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4971 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4972 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4977 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4981 /* In case the vectorization factor (VF) is bigger than the number
4982 of elements that we can fit in a vectype (nunits), we have to
4983 generate more than one vector stmt - i.e - we need to "unroll"
4984 the vector stmt by a factor VF/nunits. */
4985 for (j
= 0; j
< ncopies
; j
++)
4989 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4993 vec_oprnds0
.truncate (0);
4994 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4995 vect_pow2 (multi_step_cvt
) - 1);
4998 /* Arguments are ready. Create the new vector stmts. */
5000 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5002 if (codecvt1
== CALL_EXPR
)
5004 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5005 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5006 gimple_call_set_lhs (new_stmt
, new_temp
);
5010 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5011 new_temp
= make_ssa_name (vec_dest
);
5012 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
5016 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5017 vec_oprnds0
[i
] = new_temp
;
5020 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
5021 stmt
, vec_dsts
, gsi
,
5026 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5030 vec_oprnds0
.release ();
5031 vec_oprnds1
.release ();
5032 interm_types
.release ();
5038 /* Function vectorizable_assignment.
5040 Check if STMT performs an assignment (copy) that can be vectorized.
5041 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5042 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5043 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5046 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5047 gimple
**vec_stmt
, slp_tree slp_node
)
5052 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5053 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5056 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5060 vec
<tree
> vec_oprnds
= vNULL
;
5062 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5063 vec_info
*vinfo
= stmt_info
->vinfo
;
5064 gimple
*new_stmt
= NULL
;
5065 stmt_vec_info prev_stmt_info
= NULL
;
5066 enum tree_code code
;
5069 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5072 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5076 /* Is vectorizable assignment? */
5077 if (!is_gimple_assign (stmt
))
5080 scalar_dest
= gimple_assign_lhs (stmt
);
5081 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5084 code
= gimple_assign_rhs_code (stmt
);
5085 if (gimple_assign_single_p (stmt
)
5086 || code
== PAREN_EXPR
5087 || CONVERT_EXPR_CODE_P (code
))
5088 op
= gimple_assign_rhs1 (stmt
);
5092 if (code
== VIEW_CONVERT_EXPR
)
5093 op
= TREE_OPERAND (op
, 0);
5095 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5096 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5098 /* Multiple types in SLP are handled by creating the appropriate number of
5099 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5104 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5106 gcc_assert (ncopies
>= 1);
5108 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
5110 if (dump_enabled_p ())
5111 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5112 "use not simple.\n");
5116 /* We can handle NOP_EXPR conversions that do not change the number
5117 of elements or the vector size. */
5118 if ((CONVERT_EXPR_CODE_P (code
)
5119 || code
== VIEW_CONVERT_EXPR
)
5121 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5122 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5123 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5126 /* We do not handle bit-precision changes. */
5127 if ((CONVERT_EXPR_CODE_P (code
)
5128 || code
== VIEW_CONVERT_EXPR
)
5129 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5130 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5131 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5132 /* But a conversion that does not change the bit-pattern is ok. */
5133 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5134 > TYPE_PRECISION (TREE_TYPE (op
)))
5135 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5136 /* Conversion between boolean types of different sizes is
5137 a simple assignment in case their vectypes are same
5139 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
5140 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
5142 if (dump_enabled_p ())
5143 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5144 "type conversion to/from bit-precision "
5149 if (!vec_stmt
) /* transformation not required. */
5151 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5152 if (dump_enabled_p ())
5153 dump_printf_loc (MSG_NOTE
, vect_location
,
5154 "=== vectorizable_assignment ===\n");
5156 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5161 if (dump_enabled_p ())
5162 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5165 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5168 for (j
= 0; j
< ncopies
; j
++)
5172 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
5174 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
5176 /* Arguments are ready. create the new vector stmt. */
5177 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5179 if (CONVERT_EXPR_CODE_P (code
)
5180 || code
== VIEW_CONVERT_EXPR
)
5181 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5182 new_stmt
= gimple_build_assign (vec_dest
, vop
);
5183 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5184 gimple_assign_set_lhs (new_stmt
, new_temp
);
5185 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5187 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5194 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5196 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5198 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5201 vec_oprnds
.release ();
5206 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5207 either as shift by a scalar or by a vector. */
5210 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
5213 machine_mode vec_mode
;
5218 vectype
= get_vectype_for_scalar_type (scalar_type
);
5222 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5224 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5226 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5228 || (optab_handler (optab
, TYPE_MODE (vectype
))
5229 == CODE_FOR_nothing
))
5233 vec_mode
= TYPE_MODE (vectype
);
5234 icode
= (int) optab_handler (optab
, vec_mode
);
5235 if (icode
== CODE_FOR_nothing
)
5242 /* Function vectorizable_shift.
5244 Check if STMT performs a shift operation that can be vectorized.
5245 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5246 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5247 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5250 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5251 gimple
**vec_stmt
, slp_tree slp_node
)
5255 tree op0
, op1
= NULL
;
5256 tree vec_oprnd1
= NULL_TREE
;
5257 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5259 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5260 enum tree_code code
;
5261 machine_mode vec_mode
;
5265 machine_mode optab_op2_mode
;
5267 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5269 gimple
*new_stmt
= NULL
;
5270 stmt_vec_info prev_stmt_info
;
5271 poly_uint64 nunits_in
;
5272 poly_uint64 nunits_out
;
5277 vec
<tree
> vec_oprnds0
= vNULL
;
5278 vec
<tree
> vec_oprnds1
= vNULL
;
5281 bool scalar_shift_arg
= true;
5282 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5283 vec_info
*vinfo
= stmt_info
->vinfo
;
5285 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5288 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5292 /* Is STMT a vectorizable binary/unary operation? */
5293 if (!is_gimple_assign (stmt
))
5296 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5299 code
= gimple_assign_rhs_code (stmt
);
5301 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5302 || code
== RROTATE_EXPR
))
5305 scalar_dest
= gimple_assign_lhs (stmt
);
5306 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5307 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5309 if (dump_enabled_p ())
5310 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5311 "bit-precision shifts not supported.\n");
5315 op0
= gimple_assign_rhs1 (stmt
);
5316 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
5318 if (dump_enabled_p ())
5319 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5320 "use not simple.\n");
5323 /* If op0 is an external or constant def use a vector type with
5324 the same size as the output vector type. */
5326 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5328 gcc_assert (vectype
);
5331 if (dump_enabled_p ())
5332 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5333 "no vectype for scalar type\n");
5337 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5338 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5339 if (maybe_ne (nunits_out
, nunits_in
))
5342 op1
= gimple_assign_rhs2 (stmt
);
5343 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
5345 if (dump_enabled_p ())
5346 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5347 "use not simple.\n");
5351 /* Multiple types in SLP are handled by creating the appropriate number of
5352 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5357 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5359 gcc_assert (ncopies
>= 1);
5361 /* Determine whether the shift amount is a vector, or scalar. If the
5362 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5364 if ((dt
[1] == vect_internal_def
5365 || dt
[1] == vect_induction_def
)
5367 scalar_shift_arg
= false;
5368 else if (dt
[1] == vect_constant_def
5369 || dt
[1] == vect_external_def
5370 || dt
[1] == vect_internal_def
)
5372 /* In SLP, need to check whether the shift count is the same,
5373 in loops if it is a constant or invariant, it is always
5377 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5380 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
5381 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5382 scalar_shift_arg
= false;
5385 /* If the shift amount is computed by a pattern stmt we cannot
5386 use the scalar amount directly thus give up and use a vector
5388 if (dt
[1] == vect_internal_def
)
5390 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
5391 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
5392 scalar_shift_arg
= false;
5397 if (dump_enabled_p ())
5398 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5399 "operand mode requires invariant argument.\n");
5403 /* Vector shifted by vector. */
5404 if (!scalar_shift_arg
)
5406 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5407 if (dump_enabled_p ())
5408 dump_printf_loc (MSG_NOTE
, vect_location
,
5409 "vector/vector shift/rotate found.\n");
5412 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
5413 if (op1_vectype
== NULL_TREE
5414 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
5416 if (dump_enabled_p ())
5417 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5418 "unusable type for last operand in"
5419 " vector/vector shift/rotate.\n");
5423 /* See if the machine has a vector shifted by scalar insn and if not
5424 then see if it has a vector shifted by vector insn. */
5427 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5429 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5431 if (dump_enabled_p ())
5432 dump_printf_loc (MSG_NOTE
, vect_location
,
5433 "vector/scalar shift/rotate found.\n");
5437 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5439 && (optab_handler (optab
, TYPE_MODE (vectype
))
5440 != CODE_FOR_nothing
))
5442 scalar_shift_arg
= false;
5444 if (dump_enabled_p ())
5445 dump_printf_loc (MSG_NOTE
, vect_location
,
5446 "vector/vector shift/rotate found.\n");
5448 /* Unlike the other binary operators, shifts/rotates have
5449 the rhs being int, instead of the same type as the lhs,
5450 so make sure the scalar is the right type if we are
5451 dealing with vectors of long long/long/short/char. */
5452 if (dt
[1] == vect_constant_def
)
5453 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5454 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5458 && TYPE_MODE (TREE_TYPE (vectype
))
5459 != TYPE_MODE (TREE_TYPE (op1
)))
5461 if (dump_enabled_p ())
5462 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5463 "unusable type for last operand in"
5464 " vector/vector shift/rotate.\n");
5467 if (vec_stmt
&& !slp_node
)
5469 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5470 op1
= vect_init_vector (stmt
, op1
,
5471 TREE_TYPE (vectype
), NULL
);
5478 /* Supportable by target? */
5481 if (dump_enabled_p ())
5482 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5486 vec_mode
= TYPE_MODE (vectype
);
5487 icode
= (int) optab_handler (optab
, vec_mode
);
5488 if (icode
== CODE_FOR_nothing
)
5490 if (dump_enabled_p ())
5491 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5492 "op not supported by target.\n");
5493 /* Check only during analysis. */
5494 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5496 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5498 if (dump_enabled_p ())
5499 dump_printf_loc (MSG_NOTE
, vect_location
,
5500 "proceeding using word mode.\n");
5503 /* Worthwhile without SIMD support? Check only during analysis. */
5505 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5506 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5508 if (dump_enabled_p ())
5509 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5510 "not worthwhile without SIMD support.\n");
5514 if (!vec_stmt
) /* transformation not required. */
5516 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5517 if (dump_enabled_p ())
5518 dump_printf_loc (MSG_NOTE
, vect_location
,
5519 "=== vectorizable_shift ===\n");
5521 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5527 if (dump_enabled_p ())
5528 dump_printf_loc (MSG_NOTE
, vect_location
,
5529 "transform binary/unary operation.\n");
5532 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5534 prev_stmt_info
= NULL
;
5535 for (j
= 0; j
< ncopies
; j
++)
5540 if (scalar_shift_arg
)
5542 /* Vector shl and shr insn patterns can be defined with scalar
5543 operand 2 (shift operand). In this case, use constant or loop
5544 invariant op1 directly, without extending it to vector mode
5546 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5547 if (!VECTOR_MODE_P (optab_op2_mode
))
5549 if (dump_enabled_p ())
5550 dump_printf_loc (MSG_NOTE
, vect_location
,
5551 "operand 1 using scalar mode.\n");
5553 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5554 vec_oprnds1
.quick_push (vec_oprnd1
);
5557 /* Store vec_oprnd1 for every vector stmt to be created
5558 for SLP_NODE. We check during the analysis that all
5559 the shift arguments are the same.
5560 TODO: Allow different constants for different vector
5561 stmts generated for an SLP instance. */
5562 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5563 vec_oprnds1
.quick_push (vec_oprnd1
);
5568 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5569 (a special case for certain kind of vector shifts); otherwise,
5570 operand 1 should be of a vector type (the usual case). */
5572 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5575 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5579 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5581 /* Arguments are ready. Create the new vector stmt. */
5582 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5584 vop1
= vec_oprnds1
[i
];
5585 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5586 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5587 gimple_assign_set_lhs (new_stmt
, new_temp
);
5588 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5590 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5597 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5599 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5600 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5603 vec_oprnds0
.release ();
5604 vec_oprnds1
.release ();
5610 /* Function vectorizable_operation.
5612 Check if STMT performs a binary, unary or ternary operation that can
5614 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5615 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5616 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5619 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5620 gimple
**vec_stmt
, slp_tree slp_node
)
5624 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5625 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5627 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5628 enum tree_code code
, orig_code
;
5629 machine_mode vec_mode
;
5633 bool target_support_p
;
5635 enum vect_def_type dt
[3]
5636 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5638 gimple
*new_stmt
= NULL
;
5639 stmt_vec_info prev_stmt_info
;
5640 poly_uint64 nunits_in
;
5641 poly_uint64 nunits_out
;
5645 vec
<tree
> vec_oprnds0
= vNULL
;
5646 vec
<tree
> vec_oprnds1
= vNULL
;
5647 vec
<tree
> vec_oprnds2
= vNULL
;
5648 tree vop0
, vop1
, vop2
;
5649 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5650 vec_info
*vinfo
= stmt_info
->vinfo
;
5652 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5655 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5659 /* Is STMT a vectorizable binary/unary operation? */
5660 if (!is_gimple_assign (stmt
))
5663 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5666 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5668 /* For pointer addition and subtraction, we should use the normal
5669 plus and minus for the vector operation. */
5670 if (code
== POINTER_PLUS_EXPR
)
5672 if (code
== POINTER_DIFF_EXPR
)
5675 /* Support only unary or binary operations. */
5676 op_type
= TREE_CODE_LENGTH (code
);
5677 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5679 if (dump_enabled_p ())
5680 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5681 "num. args = %d (not unary/binary/ternary op).\n",
5686 scalar_dest
= gimple_assign_lhs (stmt
);
5687 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5689 /* Most operations cannot handle bit-precision types without extra
5691 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5692 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5693 /* Exception are bitwise binary operations. */
5694 && code
!= BIT_IOR_EXPR
5695 && code
!= BIT_XOR_EXPR
5696 && code
!= BIT_AND_EXPR
)
5698 if (dump_enabled_p ())
5699 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5700 "bit-precision arithmetic not supported.\n");
5704 op0
= gimple_assign_rhs1 (stmt
);
5705 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
5707 if (dump_enabled_p ())
5708 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5709 "use not simple.\n");
5712 /* If op0 is an external or constant def use a vector type with
5713 the same size as the output vector type. */
5716 /* For boolean type we cannot determine vectype by
5717 invariant value (don't know whether it is a vector
5718 of booleans or vector of integers). We use output
5719 vectype because operations on boolean don't change
5721 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5723 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5725 if (dump_enabled_p ())
5726 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5727 "not supported operation on bool value.\n");
5730 vectype
= vectype_out
;
5733 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5736 gcc_assert (vectype
);
5739 if (dump_enabled_p ())
5741 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5742 "no vectype for scalar type ");
5743 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5745 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5751 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5752 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5753 if (maybe_ne (nunits_out
, nunits_in
))
5756 if (op_type
== binary_op
|| op_type
== ternary_op
)
5758 op1
= gimple_assign_rhs2 (stmt
);
5759 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
5761 if (dump_enabled_p ())
5762 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5763 "use not simple.\n");
5767 if (op_type
== ternary_op
)
5769 op2
= gimple_assign_rhs3 (stmt
);
5770 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
5772 if (dump_enabled_p ())
5773 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5774 "use not simple.\n");
5779 /* Multiple types in SLP are handled by creating the appropriate number of
5780 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5785 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5787 gcc_assert (ncopies
>= 1);
5789 /* Shifts are handled in vectorizable_shift (). */
5790 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5791 || code
== RROTATE_EXPR
)
5794 /* Supportable by target? */
5796 vec_mode
= TYPE_MODE (vectype
);
5797 if (code
== MULT_HIGHPART_EXPR
)
5798 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5801 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5804 if (dump_enabled_p ())
5805 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5809 target_support_p
= (optab_handler (optab
, vec_mode
)
5810 != CODE_FOR_nothing
);
5813 if (!target_support_p
)
5815 if (dump_enabled_p ())
5816 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5817 "op not supported by target.\n");
5818 /* Check only during analysis. */
5819 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5820 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
5822 if (dump_enabled_p ())
5823 dump_printf_loc (MSG_NOTE
, vect_location
,
5824 "proceeding using word mode.\n");
5827 /* Worthwhile without SIMD support? Check only during analysis. */
5828 if (!VECTOR_MODE_P (vec_mode
)
5830 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5832 if (dump_enabled_p ())
5833 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5834 "not worthwhile without SIMD support.\n");
5838 if (!vec_stmt
) /* transformation not required. */
5840 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5841 if (dump_enabled_p ())
5842 dump_printf_loc (MSG_NOTE
, vect_location
,
5843 "=== vectorizable_operation ===\n");
5845 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, NULL
, NULL
);
5851 if (dump_enabled_p ())
5852 dump_printf_loc (MSG_NOTE
, vect_location
,
5853 "transform binary/unary operation.\n");
5856 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5858 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5859 vectors with unsigned elements, but the result is signed. So, we
5860 need to compute the MINUS_EXPR into vectype temporary and
5861 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5862 tree vec_cvt_dest
= NULL_TREE
;
5863 if (orig_code
== POINTER_DIFF_EXPR
)
5864 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
5866 /* In case the vectorization factor (VF) is bigger than the number
5867 of elements that we can fit in a vectype (nunits), we have to generate
5868 more than one vector stmt - i.e - we need to "unroll" the
5869 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5870 from one copy of the vector stmt to the next, in the field
5871 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5872 stages to find the correct vector defs to be used when vectorizing
5873 stmts that use the defs of the current stmt. The example below
5874 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5875 we need to create 4 vectorized stmts):
5877 before vectorization:
5878 RELATED_STMT VEC_STMT
5882 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5884 RELATED_STMT VEC_STMT
5885 VS1_0: vx0 = memref0 VS1_1 -
5886 VS1_1: vx1 = memref1 VS1_2 -
5887 VS1_2: vx2 = memref2 VS1_3 -
5888 VS1_3: vx3 = memref3 - -
5889 S1: x = load - VS1_0
5892 step2: vectorize stmt S2 (done here):
5893 To vectorize stmt S2 we first need to find the relevant vector
5894 def for the first operand 'x'. This is, as usual, obtained from
5895 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5896 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5897 relevant vector def 'vx0'. Having found 'vx0' we can generate
5898 the vector stmt VS2_0, and as usual, record it in the
5899 STMT_VINFO_VEC_STMT of stmt S2.
5900 When creating the second copy (VS2_1), we obtain the relevant vector
5901 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5902 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5903 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5904 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5905 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5906 chain of stmts and pointers:
5907 RELATED_STMT VEC_STMT
5908 VS1_0: vx0 = memref0 VS1_1 -
5909 VS1_1: vx1 = memref1 VS1_2 -
5910 VS1_2: vx2 = memref2 VS1_3 -
5911 VS1_3: vx3 = memref3 - -
5912 S1: x = load - VS1_0
5913 VS2_0: vz0 = vx0 + v1 VS2_1 -
5914 VS2_1: vz1 = vx1 + v1 VS2_2 -
5915 VS2_2: vz2 = vx2 + v1 VS2_3 -
5916 VS2_3: vz3 = vx3 + v1 - -
5917 S2: z = x + 1 - VS2_0 */
5919 prev_stmt_info
= NULL
;
5920 for (j
= 0; j
< ncopies
; j
++)
5925 if (op_type
== binary_op
)
5926 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5928 else if (op_type
== ternary_op
)
5932 auto_vec
<tree
> ops(3);
5933 ops
.quick_push (op0
);
5934 ops
.quick_push (op1
);
5935 ops
.quick_push (op2
);
5936 auto_vec
<vec
<tree
> > vec_defs(3);
5937 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
5938 vec_oprnds0
= vec_defs
[0];
5939 vec_oprnds1
= vec_defs
[1];
5940 vec_oprnds2
= vec_defs
[2];
5944 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5946 vect_get_vec_defs (op2
, NULL_TREE
, stmt
, &vec_oprnds2
, NULL
,
5951 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5956 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5957 if (op_type
== ternary_op
)
5959 tree vec_oprnd
= vec_oprnds2
.pop ();
5960 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5965 /* Arguments are ready. Create the new vector stmt. */
5966 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5968 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5969 ? vec_oprnds1
[i
] : NULL_TREE
);
5970 vop2
= ((op_type
== ternary_op
)
5971 ? vec_oprnds2
[i
] : NULL_TREE
);
5972 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5973 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5974 gimple_assign_set_lhs (new_stmt
, new_temp
);
5975 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5978 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
5979 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
5981 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
5982 gimple_assign_set_lhs (new_stmt
, new_temp
);
5983 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5986 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5993 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5995 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5996 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5999 vec_oprnds0
.release ();
6000 vec_oprnds1
.release ();
6001 vec_oprnds2
.release ();
6006 /* A helper function to ensure data reference DR's base alignment. */
6009 ensure_base_align (struct data_reference
*dr
)
6014 if (DR_VECT_AUX (dr
)->base_misaligned
)
6016 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
6018 unsigned int align_base_to
= DR_TARGET_ALIGNMENT (dr
) * BITS_PER_UNIT
;
6020 if (decl_in_symtab_p (base_decl
))
6021 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6024 SET_DECL_ALIGN (base_decl
, align_base_to
);
6025 DECL_USER_ALIGN (base_decl
) = 1;
6027 DR_VECT_AUX (dr
)->base_misaligned
= false;
6032 /* Function get_group_alias_ptr_type.
6034 Return the alias type for the group starting at FIRST_STMT. */
6037 get_group_alias_ptr_type (gimple
*first_stmt
)
6039 struct data_reference
*first_dr
, *next_dr
;
6042 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6043 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt
));
6046 next_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt
));
6047 if (get_alias_set (DR_REF (first_dr
))
6048 != get_alias_set (DR_REF (next_dr
)))
6050 if (dump_enabled_p ())
6051 dump_printf_loc (MSG_NOTE
, vect_location
,
6052 "conflicting alias set types.\n");
6053 return ptr_type_node
;
6055 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6057 return reference_alias_ptr_type (DR_REF (first_dr
));
6061 /* Function vectorizable_store.
6063 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6065 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6066 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6067 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6070 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6075 tree vec_oprnd
= NULL_TREE
;
6076 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6077 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6079 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6080 struct loop
*loop
= NULL
;
6081 machine_mode vec_mode
;
6083 enum dr_alignment_support alignment_support_scheme
;
6085 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
6086 enum vect_def_type mask_dt
= vect_unknown_def_type
;
6087 stmt_vec_info prev_stmt_info
= NULL
;
6088 tree dataref_ptr
= NULL_TREE
;
6089 tree dataref_offset
= NULL_TREE
;
6090 gimple
*ptr_incr
= NULL
;
6093 gimple
*next_stmt
, *first_stmt
;
6095 unsigned int group_size
, i
;
6096 vec
<tree
> oprnds
= vNULL
;
6097 vec
<tree
> result_chain
= vNULL
;
6099 tree offset
= NULL_TREE
;
6100 vec
<tree
> vec_oprnds
= vNULL
;
6101 bool slp
= (slp_node
!= NULL
);
6102 unsigned int vec_num
;
6103 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6104 vec_info
*vinfo
= stmt_info
->vinfo
;
6106 gather_scatter_info gs_info
;
6109 vec_load_store_type vls_type
;
6112 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6115 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6119 /* Is vectorizable store? */
6121 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
6122 if (is_gimple_assign (stmt
))
6124 tree scalar_dest
= gimple_assign_lhs (stmt
);
6125 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
6126 && is_pattern_stmt_p (stmt_info
))
6127 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
6128 if (TREE_CODE (scalar_dest
) != ARRAY_REF
6129 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
6130 && TREE_CODE (scalar_dest
) != INDIRECT_REF
6131 && TREE_CODE (scalar_dest
) != COMPONENT_REF
6132 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
6133 && TREE_CODE (scalar_dest
) != REALPART_EXPR
6134 && TREE_CODE (scalar_dest
) != MEM_REF
)
6139 gcall
*call
= dyn_cast
<gcall
*> (stmt
);
6140 if (!call
|| !gimple_call_internal_p (call
))
6143 internal_fn ifn
= gimple_call_internal_fn (call
);
6144 if (!internal_store_fn_p (ifn
))
6147 if (slp_node
!= NULL
)
6149 if (dump_enabled_p ())
6150 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6151 "SLP of masked stores not supported.\n");
6155 int mask_index
= internal_fn_mask_index (ifn
);
6156 if (mask_index
>= 0)
6158 mask
= gimple_call_arg (call
, mask_index
);
6159 if (!vect_check_load_store_mask (stmt
, mask
, &mask_dt
,
6165 op
= vect_get_store_rhs (stmt
);
6167 /* Cannot have hybrid store SLP -- that would mean storing to the
6168 same location twice. */
6169 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
6171 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
6172 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6176 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6177 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6182 /* Multiple types in SLP are handled by creating the appropriate number of
6183 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6188 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6190 gcc_assert (ncopies
>= 1);
6192 /* FORNOW. This restriction should be relaxed. */
6193 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
6195 if (dump_enabled_p ())
6196 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6197 "multiple types in nested loop.\n");
6201 if (!vect_check_store_rhs (stmt
, op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
6204 elem_type
= TREE_TYPE (vectype
);
6205 vec_mode
= TYPE_MODE (vectype
);
6207 if (!STMT_VINFO_DATA_REF (stmt_info
))
6210 vect_memory_access_type memory_access_type
;
6211 if (!get_load_store_type (stmt
, vectype
, slp
, mask
, vls_type
, ncopies
,
6212 &memory_access_type
, &gs_info
))
6217 if (memory_access_type
== VMAT_CONTIGUOUS
)
6219 if (!VECTOR_MODE_P (vec_mode
)
6220 || !can_vec_mask_load_store_p (vec_mode
,
6221 TYPE_MODE (mask_vectype
), false))
6224 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
6225 && (memory_access_type
!= VMAT_GATHER_SCATTER
|| gs_info
.decl
))
6227 if (dump_enabled_p ())
6228 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6229 "unsupported access type for masked store.\n");
6235 /* FORNOW. In some cases can vectorize even if data-type not supported
6236 (e.g. - array initialization with 0). */
6237 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
6241 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6242 && memory_access_type
!= VMAT_GATHER_SCATTER
6243 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
6246 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6247 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6248 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6254 group_size
= vec_num
= 1;
6257 if (!vec_stmt
) /* transformation not required. */
6259 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
6262 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
6263 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
6264 memory_access_type
, &gs_info
);
6266 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
6267 /* The SLP costs are calculated during SLP analysis. */
6269 vect_model_store_cost (stmt_info
, ncopies
, memory_access_type
,
6270 vls_type
, NULL
, NULL
, NULL
);
6273 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
6277 ensure_base_align (dr
);
6279 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
6281 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
6282 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
6283 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6284 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
6285 edge pe
= loop_preheader_edge (loop
);
6288 enum { NARROW
, NONE
, WIDEN
} modifier
;
6289 poly_uint64 scatter_off_nunits
6290 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
6292 if (known_eq (nunits
, scatter_off_nunits
))
6294 else if (known_eq (nunits
* 2, scatter_off_nunits
))
6298 /* Currently gathers and scatters are only supported for
6299 fixed-length vectors. */
6300 unsigned int count
= scatter_off_nunits
.to_constant ();
6301 vec_perm_builder
sel (count
, count
, 1);
6302 for (i
= 0; i
< (unsigned int) count
; ++i
)
6303 sel
.quick_push (i
| (count
/ 2));
6305 vec_perm_indices
indices (sel
, 1, count
);
6306 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
6308 gcc_assert (perm_mask
!= NULL_TREE
);
6310 else if (known_eq (nunits
, scatter_off_nunits
* 2))
6314 /* Currently gathers and scatters are only supported for
6315 fixed-length vectors. */
6316 unsigned int count
= nunits
.to_constant ();
6317 vec_perm_builder
sel (count
, count
, 1);
6318 for (i
= 0; i
< (unsigned int) count
; ++i
)
6319 sel
.quick_push (i
| (count
/ 2));
6321 vec_perm_indices
indices (sel
, 2, count
);
6322 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
6323 gcc_assert (perm_mask
!= NULL_TREE
);
6329 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6330 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6331 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6332 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6333 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6334 scaletype
= TREE_VALUE (arglist
);
6336 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
6337 && TREE_CODE (rettype
) == VOID_TYPE
);
6339 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6340 if (!is_gimple_min_invariant (ptr
))
6342 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6343 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6344 gcc_assert (!new_bb
);
6347 /* Currently we support only unconditional scatter stores,
6348 so mask should be all ones. */
6349 mask
= build_int_cst (masktype
, -1);
6350 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6352 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6354 prev_stmt_info
= NULL
;
6355 for (j
= 0; j
< ncopies
; ++j
)
6360 = vect_get_vec_def_for_operand (op
, stmt
);
6362 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
6364 else if (modifier
!= NONE
&& (j
& 1))
6366 if (modifier
== WIDEN
)
6369 = vect_get_vec_def_for_stmt_copy (rhs_dt
, vec_oprnd1
);
6370 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
6373 else if (modifier
== NARROW
)
6375 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
6378 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
6387 = vect_get_vec_def_for_stmt_copy (rhs_dt
, vec_oprnd1
);
6389 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
6393 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
6395 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
6396 TYPE_VECTOR_SUBPARTS (srctype
)));
6397 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
6398 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
6399 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
6400 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6404 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6406 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
6407 TYPE_VECTOR_SUBPARTS (idxtype
)));
6408 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6409 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6410 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6411 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6416 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask
, op
, src
, scale
);
6418 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6420 if (prev_stmt_info
== NULL
)
6421 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6423 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6424 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6429 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6431 gimple
*group_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6432 GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt
))++;
6438 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
6440 /* We vectorize all the stmts of the interleaving group when we
6441 reach the last stmt in the group. */
6442 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
6443 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
6452 grouped_store
= false;
6453 /* VEC_NUM is the number of vect stmts to be created for this
6455 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6456 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6457 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
6458 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6459 op
= vect_get_store_rhs (first_stmt
);
6462 /* VEC_NUM is the number of vect stmts to be created for this
6464 vec_num
= group_size
;
6466 ref_type
= get_group_alias_ptr_type (first_stmt
);
6469 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
6471 if (dump_enabled_p ())
6472 dump_printf_loc (MSG_NOTE
, vect_location
,
6473 "transform store. ncopies = %d\n", ncopies
);
6475 if (memory_access_type
== VMAT_ELEMENTWISE
6476 || memory_access_type
== VMAT_STRIDED_SLP
)
6478 gimple_stmt_iterator incr_gsi
;
6484 tree stride_base
, stride_step
, alias_off
;
6487 /* Checked by get_load_store_type. */
6488 unsigned int const_nunits
= nunits
.to_constant ();
6490 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
6491 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
6494 = fold_build_pointer_plus
6495 (DR_BASE_ADDRESS (first_dr
),
6496 size_binop (PLUS_EXPR
,
6497 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
6498 convert_to_ptrofftype (DR_INIT (first_dr
))));
6499 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
6501 /* For a store with loop-invariant (but other than power-of-2)
6502 stride (i.e. not a grouped access) like so:
6504 for (i = 0; i < n; i += stride)
6507 we generate a new induction variable and new stores from
6508 the components of the (vectorized) rhs:
6510 for (j = 0; ; j += VF*stride)
6515 array[j + stride] = tmp2;
6519 unsigned nstores
= const_nunits
;
6521 tree ltype
= elem_type
;
6522 tree lvectype
= vectype
;
6525 if (group_size
< const_nunits
6526 && const_nunits
% group_size
== 0)
6528 nstores
= const_nunits
/ group_size
;
6530 ltype
= build_vector_type (elem_type
, group_size
);
6533 /* First check if vec_extract optab doesn't support extraction
6534 of vector elts directly. */
6535 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
6537 if (!mode_for_vector (elmode
, group_size
).exists (&vmode
)
6538 || !VECTOR_MODE_P (vmode
)
6539 || !targetm
.vector_mode_supported_p (vmode
)
6540 || (convert_optab_handler (vec_extract_optab
,
6541 TYPE_MODE (vectype
), vmode
)
6542 == CODE_FOR_nothing
))
6544 /* Try to avoid emitting an extract of vector elements
6545 by performing the extracts using an integer type of the
6546 same size, extracting from a vector of those and then
6547 re-interpreting it as the original vector type if
6550 = group_size
* GET_MODE_BITSIZE (elmode
);
6551 elmode
= int_mode_for_size (lsize
, 0).require ();
6552 unsigned int lnunits
= const_nunits
/ group_size
;
6553 /* If we can't construct such a vector fall back to
6554 element extracts from the original vector type and
6555 element size stores. */
6556 if (mode_for_vector (elmode
, lnunits
).exists (&vmode
)
6557 && VECTOR_MODE_P (vmode
)
6558 && targetm
.vector_mode_supported_p (vmode
)
6559 && (convert_optab_handler (vec_extract_optab
,
6561 != CODE_FOR_nothing
))
6565 ltype
= build_nonstandard_integer_type (lsize
, 1);
6566 lvectype
= build_vector_type (ltype
, nstores
);
6568 /* Else fall back to vector extraction anyway.
6569 Fewer stores are more important than avoiding spilling
6570 of the vector we extract from. Compared to the
6571 construction case in vectorizable_load no store-forwarding
6572 issue exists here for reasonable archs. */
6575 else if (group_size
>= const_nunits
6576 && group_size
% const_nunits
== 0)
6579 lnel
= const_nunits
;
6583 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
6584 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6587 ivstep
= stride_step
;
6588 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6589 build_int_cst (TREE_TYPE (ivstep
), vf
));
6591 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6593 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
6594 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
6595 create_iv (stride_base
, ivstep
, NULL
,
6596 loop
, &incr_gsi
, insert_after
,
6598 incr
= gsi_stmt (incr_gsi
);
6599 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6601 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
6603 prev_stmt_info
= NULL
;
6604 alias_off
= build_int_cst (ref_type
, 0);
6605 next_stmt
= first_stmt
;
6606 for (g
= 0; g
< group_size
; g
++)
6608 running_off
= offvar
;
6611 tree size
= TYPE_SIZE_UNIT (ltype
);
6612 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
6614 tree newoff
= copy_ssa_name (running_off
, NULL
);
6615 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6617 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6618 running_off
= newoff
;
6620 unsigned int group_el
= 0;
6621 unsigned HOST_WIDE_INT
6622 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6623 for (j
= 0; j
< ncopies
; j
++)
6625 /* We've set op and dt above, from vect_get_store_rhs,
6626 and first_stmt == stmt. */
6631 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
6633 vec_oprnd
= vec_oprnds
[0];
6637 op
= vect_get_store_rhs (next_stmt
);
6638 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6644 vec_oprnd
= vec_oprnds
[j
];
6647 vect_is_simple_use (op
, vinfo
, &def_stmt
, &rhs_dt
);
6648 vec_oprnd
= vect_get_vec_def_for_stmt_copy (rhs_dt
,
6652 /* Pun the vector to extract from if necessary. */
6653 if (lvectype
!= vectype
)
6655 tree tem
= make_ssa_name (lvectype
);
6657 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
6658 lvectype
, vec_oprnd
));
6659 vect_finish_stmt_generation (stmt
, pun
, gsi
);
6662 for (i
= 0; i
< nstores
; i
++)
6664 tree newref
, newoff
;
6665 gimple
*incr
, *assign
;
6666 tree size
= TYPE_SIZE (ltype
);
6667 /* Extract the i'th component. */
6668 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
6669 bitsize_int (i
), size
);
6670 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
6673 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
6677 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6679 newref
= build2 (MEM_REF
, ltype
,
6680 running_off
, this_off
);
6681 vect_copy_ref_info (newref
, DR_REF (first_dr
));
6683 /* And store it to *running_off. */
6684 assign
= gimple_build_assign (newref
, elem
);
6685 vect_finish_stmt_generation (stmt
, assign
, gsi
);
6689 || group_el
== group_size
)
6691 newoff
= copy_ssa_name (running_off
, NULL
);
6692 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6693 running_off
, stride_step
);
6694 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6696 running_off
= newoff
;
6699 if (g
== group_size
- 1
6702 if (j
== 0 && i
== 0)
6703 STMT_VINFO_VEC_STMT (stmt_info
)
6704 = *vec_stmt
= assign
;
6706 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
6707 prev_stmt_info
= vinfo_for_stmt (assign
);
6711 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6716 vec_oprnds
.release ();
6720 auto_vec
<tree
> dr_chain (group_size
);
6721 oprnds
.create (group_size
);
6723 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6724 gcc_assert (alignment_support_scheme
);
6725 vec_loop_masks
*loop_masks
6726 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6727 ? &LOOP_VINFO_MASKS (loop_vinfo
)
6729 /* Targets with store-lane instructions must not require explicit
6730 realignment. vect_supportable_dr_alignment always returns either
6731 dr_aligned or dr_unaligned_supported for masked operations. */
6732 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
6735 || alignment_support_scheme
== dr_aligned
6736 || alignment_support_scheme
== dr_unaligned_supported
);
6738 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
6739 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6740 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6743 tree vec_offset
= NULL_TREE
;
6744 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6746 aggr_type
= NULL_TREE
;
6749 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
6751 aggr_type
= elem_type
;
6752 vect_get_strided_load_store_ops (stmt
, loop_vinfo
, &gs_info
,
6753 &bump
, &vec_offset
);
6757 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6758 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6760 aggr_type
= vectype
;
6761 bump
= vect_get_data_ptr_increment (dr
, aggr_type
, memory_access_type
);
6765 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
6767 /* In case the vectorization factor (VF) is bigger than the number
6768 of elements that we can fit in a vectype (nunits), we have to generate
6769 more than one vector stmt - i.e - we need to "unroll" the
6770 vector stmt by a factor VF/nunits. For more details see documentation in
6771 vect_get_vec_def_for_copy_stmt. */
6773 /* In case of interleaving (non-unit grouped access):
6780 We create vectorized stores starting from base address (the access of the
6781 first stmt in the chain (S2 in the above example), when the last store stmt
6782 of the chain (S4) is reached:
6785 VS2: &base + vec_size*1 = vx0
6786 VS3: &base + vec_size*2 = vx1
6787 VS4: &base + vec_size*3 = vx3
6789 Then permutation statements are generated:
6791 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6792 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6795 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6796 (the order of the data-refs in the output of vect_permute_store_chain
6797 corresponds to the order of scalar stmts in the interleaving chain - see
6798 the documentation of vect_permute_store_chain()).
6800 In case of both multiple types and interleaving, above vector stores and
6801 permutation stmts are created for every copy. The result vector stmts are
6802 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6803 STMT_VINFO_RELATED_STMT for the next copies.
6806 prev_stmt_info
= NULL
;
6807 tree vec_mask
= NULL_TREE
;
6808 for (j
= 0; j
< ncopies
; j
++)
6815 /* Get vectorized arguments for SLP_NODE. */
6816 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
6819 vec_oprnd
= vec_oprnds
[0];
6823 /* For interleaved stores we collect vectorized defs for all the
6824 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6825 used as an input to vect_permute_store_chain(), and OPRNDS as
6826 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6828 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6829 OPRNDS are of size 1. */
6830 next_stmt
= first_stmt
;
6831 for (i
= 0; i
< group_size
; i
++)
6833 /* Since gaps are not supported for interleaved stores,
6834 GROUP_SIZE is the exact number of stmts in the chain.
6835 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6836 there is no interleaving, GROUP_SIZE is 1, and only one
6837 iteration of the loop will be executed. */
6838 op
= vect_get_store_rhs (next_stmt
);
6839 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6840 dr_chain
.quick_push (vec_oprnd
);
6841 oprnds
.quick_push (vec_oprnd
);
6842 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6845 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
6849 /* We should have catched mismatched types earlier. */
6850 gcc_assert (useless_type_conversion_p (vectype
,
6851 TREE_TYPE (vec_oprnd
)));
6852 bool simd_lane_access_p
6853 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6854 if (simd_lane_access_p
6855 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6856 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6857 && integer_zerop (DR_OFFSET (first_dr
))
6858 && integer_zerop (DR_INIT (first_dr
))
6859 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6860 get_alias_set (TREE_TYPE (ref_type
))))
6862 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6863 dataref_offset
= build_int_cst (ref_type
, 0);
6866 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6868 vect_get_gather_scatter_ops (loop
, stmt
, &gs_info
,
6869 &dataref_ptr
, &vec_offset
);
6874 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
6875 simd_lane_access_p
? loop
: NULL
,
6876 offset
, &dummy
, gsi
, &ptr_incr
,
6877 simd_lane_access_p
, &inv_p
,
6879 gcc_assert (bb_vinfo
|| !inv_p
);
6883 /* For interleaved stores we created vectorized defs for all the
6884 defs stored in OPRNDS in the previous iteration (previous copy).
6885 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6886 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6888 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6889 OPRNDS are of size 1. */
6890 for (i
= 0; i
< group_size
; i
++)
6893 vect_is_simple_use (op
, vinfo
, &def_stmt
, &rhs_dt
);
6894 vec_oprnd
= vect_get_vec_def_for_stmt_copy (rhs_dt
, op
);
6895 dr_chain
[i
] = vec_oprnd
;
6896 oprnds
[i
] = vec_oprnd
;
6899 vec_mask
= vect_get_vec_def_for_stmt_copy (mask_dt
, vec_mask
);
6902 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
6903 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6904 vec_offset
= vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
6907 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6911 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6915 /* Get an array into which we can store the individual vectors. */
6916 vec_array
= create_vector_array (vectype
, vec_num
);
6918 /* Invalidate the current contents of VEC_ARRAY. This should
6919 become an RTL clobber too, which prevents the vector registers
6920 from being upward-exposed. */
6921 vect_clobber_variable (stmt
, gsi
, vec_array
);
6923 /* Store the individual vectors into the array. */
6924 for (i
= 0; i
< vec_num
; i
++)
6926 vec_oprnd
= dr_chain
[i
];
6927 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
6930 tree final_mask
= NULL
;
6932 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
6935 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
6942 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
6944 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
6945 tree alias_ptr
= build_int_cst (ref_type
, align
);
6946 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
6947 dataref_ptr
, alias_ptr
,
6948 final_mask
, vec_array
);
6953 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6954 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
6955 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
6957 gimple_call_set_lhs (call
, data_ref
);
6959 gimple_call_set_nothrow (call
, true);
6961 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6963 /* Record that VEC_ARRAY is now dead. */
6964 vect_clobber_variable (stmt
, gsi
, vec_array
);
6972 result_chain
.create (group_size
);
6974 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
6978 next_stmt
= first_stmt
;
6979 for (i
= 0; i
< vec_num
; i
++)
6981 unsigned align
, misalign
;
6983 tree final_mask
= NULL_TREE
;
6985 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
6987 vectype
, vec_num
* j
+ i
);
6989 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
6992 if (memory_access_type
== VMAT_GATHER_SCATTER
)
6994 tree scale
= size_int (gs_info
.scale
);
6997 call
= gimple_build_call_internal
6998 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
6999 scale
, vec_oprnd
, final_mask
);
7001 call
= gimple_build_call_internal
7002 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
7004 gimple_call_set_nothrow (call
, true);
7006 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7011 /* Bump the vector pointer. */
7012 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7016 vec_oprnd
= vec_oprnds
[i
];
7017 else if (grouped_store
)
7018 /* For grouped stores vectorized defs are interleaved in
7019 vect_permute_store_chain(). */
7020 vec_oprnd
= result_chain
[i
];
7022 align
= DR_TARGET_ALIGNMENT (first_dr
);
7023 if (aligned_access_p (first_dr
))
7025 else if (DR_MISALIGNMENT (first_dr
) == -1)
7027 align
= dr_alignment (vect_dr_behavior (first_dr
));
7031 misalign
= DR_MISALIGNMENT (first_dr
);
7032 if (dataref_offset
== NULL_TREE
7033 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7034 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
7037 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7039 tree perm_mask
= perm_mask_for_reverse (vectype
);
7041 = vect_create_destination_var (vect_get_store_rhs (stmt
),
7043 tree new_temp
= make_ssa_name (perm_dest
);
7045 /* Generate the permute statement. */
7047 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
7048 vec_oprnd
, perm_mask
);
7049 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
7051 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7052 vec_oprnd
= new_temp
;
7055 /* Arguments are ready. Create the new vector stmt. */
7058 align
= least_bit_hwi (misalign
| align
);
7059 tree ptr
= build_int_cst (ref_type
, align
);
7061 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
7063 final_mask
, vec_oprnd
);
7064 gimple_call_set_nothrow (call
, true);
7069 data_ref
= fold_build2 (MEM_REF
, vectype
,
7073 : build_int_cst (ref_type
, 0));
7074 if (aligned_access_p (first_dr
))
7076 else if (DR_MISALIGNMENT (first_dr
) == -1)
7077 TREE_TYPE (data_ref
)
7078 = build_aligned_type (TREE_TYPE (data_ref
),
7079 align
* BITS_PER_UNIT
);
7081 TREE_TYPE (data_ref
)
7082 = build_aligned_type (TREE_TYPE (data_ref
),
7083 TYPE_ALIGN (elem_type
));
7084 vect_copy_ref_info (data_ref
, DR_REF (first_dr
));
7085 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
7087 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7092 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
7100 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7102 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7103 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7108 result_chain
.release ();
7109 vec_oprnds
.release ();
7114 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7115 VECTOR_CST mask. No checks are made that the target platform supports the
7116 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7117 vect_gen_perm_mask_checked. */
7120 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
7124 poly_uint64 nunits
= sel
.length ();
7125 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
7127 mask_type
= build_vector_type (ssizetype
, nunits
);
7128 return vec_perm_indices_to_tree (mask_type
, sel
);
7131 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7132 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7135 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
7137 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
7138 return vect_gen_perm_mask_any (vectype
, sel
);
7141 /* Given a vector variable X and Y, that was generated for the scalar
7142 STMT, generate instructions to permute the vector elements of X and Y
7143 using permutation mask MASK_VEC, insert them at *GSI and return the
7144 permuted vector variable. */
7147 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
7148 gimple_stmt_iterator
*gsi
)
7150 tree vectype
= TREE_TYPE (x
);
7151 tree perm_dest
, data_ref
;
7154 tree scalar_dest
= gimple_get_lhs (stmt
);
7155 if (TREE_CODE (scalar_dest
) == SSA_NAME
)
7156 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7158 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
7159 data_ref
= make_ssa_name (perm_dest
);
7161 /* Generate the permute statement. */
7162 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
7163 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
7168 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7169 inserting them on the loops preheader edge. Returns true if we
7170 were successful in doing so (and thus STMT can be moved then),
7171 otherwise returns false. */
7174 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
7180 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
7182 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
7183 if (!gimple_nop_p (def_stmt
)
7184 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
7186 /* Make sure we don't need to recurse. While we could do
7187 so in simple cases when there are more complex use webs
7188 we don't have an easy way to preserve stmt order to fulfil
7189 dependencies within them. */
7192 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
7194 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
7196 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
7197 if (!gimple_nop_p (def_stmt2
)
7198 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
7208 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
7210 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
7211 if (!gimple_nop_p (def_stmt
)
7212 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
7214 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
7215 gsi_remove (&gsi
, false);
7216 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
7223 /* vectorizable_load.
7225 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7227 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7228 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7229 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7232 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
7233 slp_tree slp_node
, slp_instance slp_node_instance
)
7236 tree vec_dest
= NULL
;
7237 tree data_ref
= NULL
;
7238 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7239 stmt_vec_info prev_stmt_info
;
7240 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7241 struct loop
*loop
= NULL
;
7242 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
7243 bool nested_in_vect_loop
= false;
7244 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
7248 gimple
*new_stmt
= NULL
;
7250 enum dr_alignment_support alignment_support_scheme
;
7251 tree dataref_ptr
= NULL_TREE
;
7252 tree dataref_offset
= NULL_TREE
;
7253 gimple
*ptr_incr
= NULL
;
7256 unsigned int group_size
;
7257 poly_uint64 group_gap_adj
;
7258 tree msq
= NULL_TREE
, lsq
;
7259 tree offset
= NULL_TREE
;
7260 tree byte_offset
= NULL_TREE
;
7261 tree realignment_token
= NULL_TREE
;
7263 vec
<tree
> dr_chain
= vNULL
;
7264 bool grouped_load
= false;
7266 gimple
*first_stmt_for_drptr
= NULL
;
7268 bool compute_in_loop
= false;
7269 struct loop
*at_loop
;
7271 bool slp
= (slp_node
!= NULL
);
7272 bool slp_perm
= false;
7273 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7276 gather_scatter_info gs_info
;
7277 vec_info
*vinfo
= stmt_info
->vinfo
;
7279 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7281 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7284 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7288 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7289 if (is_gimple_assign (stmt
))
7291 scalar_dest
= gimple_assign_lhs (stmt
);
7292 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
7295 tree_code code
= gimple_assign_rhs_code (stmt
);
7296 if (code
!= ARRAY_REF
7297 && code
!= BIT_FIELD_REF
7298 && code
!= INDIRECT_REF
7299 && code
!= COMPONENT_REF
7300 && code
!= IMAGPART_EXPR
7301 && code
!= REALPART_EXPR
7303 && TREE_CODE_CLASS (code
) != tcc_declaration
)
7308 gcall
*call
= dyn_cast
<gcall
*> (stmt
);
7309 if (!call
|| !gimple_call_internal_p (call
))
7312 internal_fn ifn
= gimple_call_internal_fn (call
);
7313 if (!internal_load_fn_p (ifn
))
7316 scalar_dest
= gimple_call_lhs (call
);
7320 if (slp_node
!= NULL
)
7322 if (dump_enabled_p ())
7323 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7324 "SLP of masked loads not supported.\n");
7328 int mask_index
= internal_fn_mask_index (ifn
);
7329 if (mask_index
>= 0)
7331 mask
= gimple_call_arg (call
, mask_index
);
7332 if (!vect_check_load_store_mask (stmt
, mask
, &mask_dt
,
7338 if (!STMT_VINFO_DATA_REF (stmt_info
))
7341 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7342 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7346 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7347 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
7348 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7353 /* Multiple types in SLP are handled by creating the appropriate number of
7354 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7359 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7361 gcc_assert (ncopies
>= 1);
7363 /* FORNOW. This restriction should be relaxed. */
7364 if (nested_in_vect_loop
&& ncopies
> 1)
7366 if (dump_enabled_p ())
7367 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7368 "multiple types in nested loop.\n");
7372 /* Invalidate assumptions made by dependence analysis when vectorization
7373 on the unrolled body effectively re-orders stmts. */
7375 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
7376 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
7377 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
7379 if (dump_enabled_p ())
7380 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7381 "cannot perform implicit CSE when unrolling "
7382 "with negative dependence distance\n");
7386 elem_type
= TREE_TYPE (vectype
);
7387 mode
= TYPE_MODE (vectype
);
7389 /* FORNOW. In some cases can vectorize even if data-type not supported
7390 (e.g. - data copies). */
7391 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
7393 if (dump_enabled_p ())
7394 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7395 "Aligned load, but unsupported type.\n");
7399 /* Check if the load is a part of an interleaving chain. */
7400 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7402 grouped_load
= true;
7404 gcc_assert (!nested_in_vect_loop
);
7405 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
7407 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7408 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7410 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7413 /* Invalidate assumptions made by dependence analysis when vectorization
7414 on the unrolled body effectively re-orders stmts. */
7415 if (!PURE_SLP_STMT (stmt_info
)
7416 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
7417 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
7418 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
7420 if (dump_enabled_p ())
7421 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7422 "cannot perform implicit CSE when performing "
7423 "group loads with negative dependence distance\n");
7427 /* Similarly when the stmt is a load that is both part of a SLP
7428 instance and a loop vectorized stmt via the same-dr mechanism
7429 we have to give up. */
7430 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
7431 && (STMT_SLP_TYPE (stmt_info
)
7432 != STMT_SLP_TYPE (vinfo_for_stmt
7433 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
7435 if (dump_enabled_p ())
7436 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7437 "conflicting SLP types for CSEd load\n");
7444 vect_memory_access_type memory_access_type
;
7445 if (!get_load_store_type (stmt
, vectype
, slp
, mask
, VLS_LOAD
, ncopies
,
7446 &memory_access_type
, &gs_info
))
7451 if (memory_access_type
== VMAT_CONTIGUOUS
)
7453 machine_mode vec_mode
= TYPE_MODE (vectype
);
7454 if (!VECTOR_MODE_P (vec_mode
)
7455 || !can_vec_mask_load_store_p (vec_mode
,
7456 TYPE_MODE (mask_vectype
), true))
7459 else if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7461 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7463 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
7464 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
7466 if (dump_enabled_p ())
7467 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7468 "masked gather with integer mask not"
7473 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7474 && memory_access_type
!= VMAT_GATHER_SCATTER
)
7476 if (dump_enabled_p ())
7477 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7478 "unsupported access type for masked load.\n");
7483 if (!vec_stmt
) /* transformation not required. */
7486 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7489 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7490 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
7491 memory_access_type
, &gs_info
);
7493 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
7494 /* The SLP costs are calculated during SLP analysis. */
7496 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
7502 gcc_assert (memory_access_type
7503 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7505 if (dump_enabled_p ())
7506 dump_printf_loc (MSG_NOTE
, vect_location
,
7507 "transform load. ncopies = %d\n", ncopies
);
7511 ensure_base_align (dr
);
7513 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7515 vect_build_gather_load_calls (stmt
, gsi
, vec_stmt
, &gs_info
, mask
,
7520 if (memory_access_type
== VMAT_ELEMENTWISE
7521 || memory_access_type
== VMAT_STRIDED_SLP
)
7523 gimple_stmt_iterator incr_gsi
;
7529 vec
<constructor_elt
, va_gc
> *v
= NULL
;
7530 tree stride_base
, stride_step
, alias_off
;
7531 /* Checked by get_load_store_type. */
7532 unsigned int const_nunits
= nunits
.to_constant ();
7533 unsigned HOST_WIDE_INT cst_offset
= 0;
7535 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7536 gcc_assert (!nested_in_vect_loop
);
7540 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7541 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7548 if (slp
&& grouped_load
)
7550 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7551 ref_type
= get_group_alias_ptr_type (first_stmt
);
7557 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
7558 * vect_get_place_in_interleaving_chain (stmt
, first_stmt
));
7560 ref_type
= reference_alias_ptr_type (DR_REF (dr
));
7564 = fold_build_pointer_plus
7565 (DR_BASE_ADDRESS (first_dr
),
7566 size_binop (PLUS_EXPR
,
7567 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
7568 convert_to_ptrofftype (DR_INIT (first_dr
))));
7569 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
7571 /* For a load with loop-invariant (but other than power-of-2)
7572 stride (i.e. not a grouped access) like so:
7574 for (i = 0; i < n; i += stride)
7577 we generate a new induction variable and new accesses to
7578 form a new vector (or vectors, depending on ncopies):
7580 for (j = 0; ; j += VF*stride)
7582 tmp2 = array[j + stride];
7584 vectemp = {tmp1, tmp2, ...}
7587 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
7588 build_int_cst (TREE_TYPE (stride_step
), vf
));
7590 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7592 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7593 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7594 create_iv (stride_base
, ivstep
, NULL
,
7595 loop
, &incr_gsi
, insert_after
,
7597 incr
= gsi_stmt (incr_gsi
);
7598 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
7600 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7602 prev_stmt_info
= NULL
;
7603 running_off
= offvar
;
7604 alias_off
= build_int_cst (ref_type
, 0);
7605 int nloads
= const_nunits
;
7607 tree ltype
= TREE_TYPE (vectype
);
7608 tree lvectype
= vectype
;
7609 auto_vec
<tree
> dr_chain
;
7610 if (memory_access_type
== VMAT_STRIDED_SLP
)
7612 if (group_size
< const_nunits
)
7614 /* First check if vec_init optab supports construction from
7615 vector elts directly. */
7616 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
7618 if (mode_for_vector (elmode
, group_size
).exists (&vmode
)
7619 && VECTOR_MODE_P (vmode
)
7620 && targetm
.vector_mode_supported_p (vmode
)
7621 && (convert_optab_handler (vec_init_optab
,
7622 TYPE_MODE (vectype
), vmode
)
7623 != CODE_FOR_nothing
))
7625 nloads
= const_nunits
/ group_size
;
7627 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
7631 /* Otherwise avoid emitting a constructor of vector elements
7632 by performing the loads using an integer type of the same
7633 size, constructing a vector of those and then
7634 re-interpreting it as the original vector type.
7635 This avoids a huge runtime penalty due to the general
7636 inability to perform store forwarding from smaller stores
7637 to a larger load. */
7639 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
7640 elmode
= int_mode_for_size (lsize
, 0).require ();
7641 unsigned int lnunits
= const_nunits
/ group_size
;
7642 /* If we can't construct such a vector fall back to
7643 element loads of the original vector type. */
7644 if (mode_for_vector (elmode
, lnunits
).exists (&vmode
)
7645 && VECTOR_MODE_P (vmode
)
7646 && targetm
.vector_mode_supported_p (vmode
)
7647 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
7648 != CODE_FOR_nothing
))
7652 ltype
= build_nonstandard_integer_type (lsize
, 1);
7653 lvectype
= build_vector_type (ltype
, nloads
);
7660 lnel
= const_nunits
;
7663 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
7667 /* For SLP permutation support we need to load the whole group,
7668 not only the number of vector stmts the permutation result
7672 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7674 unsigned int const_vf
= vf
.to_constant ();
7675 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
7676 dr_chain
.create (ncopies
);
7679 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7681 unsigned int group_el
= 0;
7682 unsigned HOST_WIDE_INT
7683 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7684 for (j
= 0; j
< ncopies
; j
++)
7687 vec_alloc (v
, nloads
);
7688 for (i
= 0; i
< nloads
; i
++)
7690 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7691 group_el
* elsz
+ cst_offset
);
7692 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
7693 vect_copy_ref_info (data_ref
, DR_REF (first_dr
));
7694 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
7695 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7697 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
7698 gimple_assign_lhs (new_stmt
));
7702 || group_el
== group_size
)
7704 tree newoff
= copy_ssa_name (running_off
);
7705 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7706 running_off
, stride_step
);
7707 vect_finish_stmt_generation (stmt
, incr
, gsi
);
7709 running_off
= newoff
;
7715 tree vec_inv
= build_constructor (lvectype
, v
);
7716 new_temp
= vect_init_vector (stmt
, vec_inv
, lvectype
, gsi
);
7717 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7718 if (lvectype
!= vectype
)
7720 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
7722 build1 (VIEW_CONVERT_EXPR
,
7723 vectype
, new_temp
));
7724 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7731 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
7733 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7738 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7740 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7741 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7747 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7748 slp_node_instance
, false, &n_perms
);
7753 if (memory_access_type
== VMAT_GATHER_SCATTER
7754 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
7755 grouped_load
= false;
7759 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
7760 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7761 /* For SLP vectorization we directly vectorize a subchain
7762 without permutation. */
7763 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7764 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7765 /* For BB vectorization always use the first stmt to base
7766 the data ref pointer on. */
7768 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7770 /* Check if the chain of loads is already vectorized. */
7771 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
7772 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7773 ??? But we can only do so if there is exactly one
7774 as we have no way to get at the rest. Leave the CSE
7776 ??? With the group load eventually participating
7777 in multiple different permutations (having multiple
7778 slp nodes which refer to the same group) the CSE
7779 is even wrong code. See PR56270. */
7782 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7785 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7788 /* VEC_NUM is the number of vect stmts to be created for this group. */
7791 grouped_load
= false;
7792 /* For SLP permutation support we need to load the whole group,
7793 not only the number of vector stmts the permutation result
7797 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7799 unsigned int const_vf
= vf
.to_constant ();
7800 unsigned int const_nunits
= nunits
.to_constant ();
7801 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
7802 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
7806 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7808 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
7812 vec_num
= group_size
;
7814 ref_type
= get_group_alias_ptr_type (first_stmt
);
7820 group_size
= vec_num
= 1;
7822 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7825 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
7826 gcc_assert (alignment_support_scheme
);
7827 vec_loop_masks
*loop_masks
7828 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7829 ? &LOOP_VINFO_MASKS (loop_vinfo
)
7831 /* Targets with store-lane instructions must not require explicit
7832 realignment. vect_supportable_dr_alignment always returns either
7833 dr_aligned or dr_unaligned_supported for masked operations. */
7834 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
7837 || alignment_support_scheme
== dr_aligned
7838 || alignment_support_scheme
== dr_unaligned_supported
);
7840 /* In case the vectorization factor (VF) is bigger than the number
7841 of elements that we can fit in a vectype (nunits), we have to generate
7842 more than one vector stmt - i.e - we need to "unroll" the
7843 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7844 from one copy of the vector stmt to the next, in the field
7845 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7846 stages to find the correct vector defs to be used when vectorizing
7847 stmts that use the defs of the current stmt. The example below
7848 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7849 need to create 4 vectorized stmts):
7851 before vectorization:
7852 RELATED_STMT VEC_STMT
7856 step 1: vectorize stmt S1:
7857 We first create the vector stmt VS1_0, and, as usual, record a
7858 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7859 Next, we create the vector stmt VS1_1, and record a pointer to
7860 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7861 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7863 RELATED_STMT VEC_STMT
7864 VS1_0: vx0 = memref0 VS1_1 -
7865 VS1_1: vx1 = memref1 VS1_2 -
7866 VS1_2: vx2 = memref2 VS1_3 -
7867 VS1_3: vx3 = memref3 - -
7868 S1: x = load - VS1_0
7871 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7872 information we recorded in RELATED_STMT field is used to vectorize
7875 /* In case of interleaving (non-unit grouped access):
7882 Vectorized loads are created in the order of memory accesses
7883 starting from the access of the first stmt of the chain:
7886 VS2: vx1 = &base + vec_size*1
7887 VS3: vx3 = &base + vec_size*2
7888 VS4: vx4 = &base + vec_size*3
7890 Then permutation statements are generated:
7892 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7893 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7896 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7897 (the order of the data-refs in the output of vect_permute_load_chain
7898 corresponds to the order of scalar stmts in the interleaving chain - see
7899 the documentation of vect_permute_load_chain()).
7900 The generation of permutation stmts and recording them in
7901 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7903 In case of both multiple types and interleaving, the vector loads and
7904 permutation stmts above are created for every copy. The result vector
7905 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7906 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7908 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7909 on a target that supports unaligned accesses (dr_unaligned_supported)
7910 we generate the following code:
7914 p = p + indx * vectype_size;
7919 Otherwise, the data reference is potentially unaligned on a target that
7920 does not support unaligned accesses (dr_explicit_realign_optimized) -
7921 then generate the following code, in which the data in each iteration is
7922 obtained by two vector loads, one from the previous iteration, and one
7923 from the current iteration:
7925 msq_init = *(floor(p1))
7926 p2 = initial_addr + VS - 1;
7927 realignment_token = call target_builtin;
7930 p2 = p2 + indx * vectype_size
7932 vec_dest = realign_load (msq, lsq, realignment_token)
7937 /* If the misalignment remains the same throughout the execution of the
7938 loop, we can create the init_addr and permutation mask at the loop
7939 preheader. Otherwise, it needs to be created inside the loop.
7940 This can only occur when vectorizing memory accesses in the inner-loop
7941 nested within an outer-loop that is being vectorized. */
7943 if (nested_in_vect_loop
7944 && !multiple_p (DR_STEP_ALIGNMENT (dr
),
7945 GET_MODE_SIZE (TYPE_MODE (vectype
))))
7947 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
7948 compute_in_loop
= true;
7951 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7952 || alignment_support_scheme
== dr_explicit_realign
)
7953 && !compute_in_loop
)
7955 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7956 alignment_support_scheme
, NULL_TREE
,
7958 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7960 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7961 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7968 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7969 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7972 tree vec_offset
= NULL_TREE
;
7973 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7975 aggr_type
= NULL_TREE
;
7978 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
7980 aggr_type
= elem_type
;
7981 vect_get_strided_load_store_ops (stmt
, loop_vinfo
, &gs_info
,
7982 &bump
, &vec_offset
);
7986 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7987 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7989 aggr_type
= vectype
;
7990 bump
= vect_get_data_ptr_increment (dr
, aggr_type
, memory_access_type
);
7993 tree vec_mask
= NULL_TREE
;
7994 prev_stmt_info
= NULL
;
7995 poly_uint64 group_elt
= 0;
7996 for (j
= 0; j
< ncopies
; j
++)
7998 /* 1. Create the vector or array pointer update chain. */
8001 bool simd_lane_access_p
8002 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
8003 if (simd_lane_access_p
8004 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
8005 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
8006 && integer_zerop (DR_OFFSET (first_dr
))
8007 && integer_zerop (DR_INIT (first_dr
))
8008 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8009 get_alias_set (TREE_TYPE (ref_type
)))
8010 && (alignment_support_scheme
== dr_aligned
8011 || alignment_support_scheme
== dr_unaligned_supported
))
8013 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
8014 dataref_offset
= build_int_cst (ref_type
, 0);
8017 else if (first_stmt_for_drptr
8018 && first_stmt
!= first_stmt_for_drptr
)
8021 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
8022 at_loop
, offset
, &dummy
, gsi
,
8023 &ptr_incr
, simd_lane_access_p
,
8024 &inv_p
, byte_offset
, bump
);
8025 /* Adjust the pointer by the difference to first_stmt. */
8026 data_reference_p ptrdr
8027 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
8028 tree diff
= fold_convert (sizetype
,
8029 size_binop (MINUS_EXPR
,
8032 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8035 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8037 vect_get_gather_scatter_ops (loop
, stmt
, &gs_info
,
8038 &dataref_ptr
, &vec_offset
);
8043 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
8044 offset
, &dummy
, gsi
, &ptr_incr
,
8045 simd_lane_access_p
, &inv_p
,
8048 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
8054 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
8056 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8057 vec_offset
= vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
8060 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8063 vec_mask
= vect_get_vec_def_for_stmt_copy (mask_dt
, vec_mask
);
8066 if (grouped_load
|| slp_perm
)
8067 dr_chain
.create (vec_num
);
8069 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8073 vec_array
= create_vector_array (vectype
, vec_num
);
8075 tree final_mask
= NULL_TREE
;
8077 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8080 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8087 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8089 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
8090 tree alias_ptr
= build_int_cst (ref_type
, align
);
8091 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
8092 dataref_ptr
, alias_ptr
,
8098 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8099 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8100 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
8102 gimple_call_set_lhs (call
, vec_array
);
8103 gimple_call_set_nothrow (call
, true);
8105 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8107 /* Extract each vector into an SSA_NAME. */
8108 for (i
= 0; i
< vec_num
; i
++)
8110 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
8112 dr_chain
.quick_push (new_temp
);
8115 /* Record the mapping between SSA_NAMEs and statements. */
8116 vect_record_grouped_load_vectors (stmt
, dr_chain
);
8118 /* Record that VEC_ARRAY is now dead. */
8119 vect_clobber_variable (stmt
, gsi
, vec_array
);
8123 for (i
= 0; i
< vec_num
; i
++)
8125 tree final_mask
= NULL_TREE
;
8127 && memory_access_type
!= VMAT_INVARIANT
)
8128 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8130 vectype
, vec_num
* j
+ i
);
8132 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8136 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8139 /* 2. Create the vector-load in the loop. */
8140 switch (alignment_support_scheme
)
8143 case dr_unaligned_supported
:
8145 unsigned int align
, misalign
;
8147 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8149 tree scale
= size_int (gs_info
.scale
);
8152 call
= gimple_build_call_internal
8153 (IFN_MASK_GATHER_LOAD
, 4, dataref_ptr
,
8154 vec_offset
, scale
, final_mask
);
8156 call
= gimple_build_call_internal
8157 (IFN_GATHER_LOAD
, 3, dataref_ptr
,
8159 gimple_call_set_nothrow (call
, true);
8161 data_ref
= NULL_TREE
;
8165 align
= DR_TARGET_ALIGNMENT (dr
);
8166 if (alignment_support_scheme
== dr_aligned
)
8168 gcc_assert (aligned_access_p (first_dr
));
8171 else if (DR_MISALIGNMENT (first_dr
) == -1)
8173 align
= dr_alignment (vect_dr_behavior (first_dr
));
8177 misalign
= DR_MISALIGNMENT (first_dr
);
8178 if (dataref_offset
== NULL_TREE
8179 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8180 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
8185 align
= least_bit_hwi (misalign
| align
);
8186 tree ptr
= build_int_cst (ref_type
, align
);
8188 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
8191 gimple_call_set_nothrow (call
, true);
8193 data_ref
= NULL_TREE
;
8198 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
8201 : build_int_cst (ref_type
, 0));
8202 if (alignment_support_scheme
== dr_aligned
)
8204 else if (DR_MISALIGNMENT (first_dr
) == -1)
8205 TREE_TYPE (data_ref
)
8206 = build_aligned_type (TREE_TYPE (data_ref
),
8207 align
* BITS_PER_UNIT
);
8209 TREE_TYPE (data_ref
)
8210 = build_aligned_type (TREE_TYPE (data_ref
),
8211 TYPE_ALIGN (elem_type
));
8215 case dr_explicit_realign
:
8219 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
8221 if (compute_in_loop
)
8222 msq
= vect_setup_realignment (first_stmt
, gsi
,
8224 dr_explicit_realign
,
8227 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
8228 ptr
= copy_ssa_name (dataref_ptr
);
8230 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
8231 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
8232 new_stmt
= gimple_build_assign
8233 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
8235 (TREE_TYPE (dataref_ptr
),
8236 -(HOST_WIDE_INT
) align
));
8237 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8239 = build2 (MEM_REF
, vectype
, ptr
,
8240 build_int_cst (ref_type
, 0));
8241 vect_copy_ref_info (data_ref
, DR_REF (first_dr
));
8242 vec_dest
= vect_create_destination_var (scalar_dest
,
8244 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
8245 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8246 gimple_assign_set_lhs (new_stmt
, new_temp
);
8247 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
8248 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
8249 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8252 bump
= size_binop (MULT_EXPR
, vs
,
8253 TYPE_SIZE_UNIT (elem_type
));
8254 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
8255 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
8256 new_stmt
= gimple_build_assign
8257 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
8259 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
8260 ptr
= copy_ssa_name (ptr
, new_stmt
);
8261 gimple_assign_set_lhs (new_stmt
, ptr
);
8262 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8264 = build2 (MEM_REF
, vectype
, ptr
,
8265 build_int_cst (ref_type
, 0));
8268 case dr_explicit_realign_optimized
:
8270 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
8271 new_temp
= copy_ssa_name (dataref_ptr
);
8273 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
8274 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
8275 new_stmt
= gimple_build_assign
8276 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
8277 build_int_cst (TREE_TYPE (dataref_ptr
),
8278 -(HOST_WIDE_INT
) align
));
8279 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8281 = build2 (MEM_REF
, vectype
, new_temp
,
8282 build_int_cst (ref_type
, 0));
8288 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8289 /* DATA_REF is null if we've already built the statement. */
8292 vect_copy_ref_info (data_ref
, DR_REF (first_dr
));
8293 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
8295 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8296 gimple_set_lhs (new_stmt
, new_temp
);
8297 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8299 /* 3. Handle explicit realignment if necessary/supported.
8301 vec_dest = realign_load (msq, lsq, realignment_token) */
8302 if (alignment_support_scheme
== dr_explicit_realign_optimized
8303 || alignment_support_scheme
== dr_explicit_realign
)
8305 lsq
= gimple_assign_lhs (new_stmt
);
8306 if (!realignment_token
)
8307 realignment_token
= dataref_ptr
;
8308 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8309 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
8310 msq
, lsq
, realignment_token
);
8311 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8312 gimple_assign_set_lhs (new_stmt
, new_temp
);
8313 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8315 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
8318 if (i
== vec_num
- 1 && j
== ncopies
- 1)
8319 add_phi_arg (phi
, lsq
,
8320 loop_latch_edge (containing_loop
),
8326 /* 4. Handle invariant-load. */
8327 if (inv_p
&& !bb_vinfo
)
8329 gcc_assert (!grouped_load
);
8330 /* If we have versioned for aliasing or the loop doesn't
8331 have any data dependencies that would preclude this,
8332 then we are sure this is a loop invariant load and
8333 thus we can insert it on the preheader edge. */
8334 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8335 && !nested_in_vect_loop
8336 && hoist_defs_of_uses (stmt
, loop
))
8338 if (dump_enabled_p ())
8340 dump_printf_loc (MSG_NOTE
, vect_location
,
8341 "hoisting out of the vectorized "
8343 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8345 tree tem
= copy_ssa_name (scalar_dest
);
8346 gsi_insert_on_edge_immediate
8347 (loop_preheader_edge (loop
),
8348 gimple_build_assign (tem
,
8350 (gimple_assign_rhs1 (stmt
))));
8351 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
8352 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8353 set_vinfo_for_stmt (new_stmt
,
8354 new_stmt_vec_info (new_stmt
, vinfo
));
8358 gimple_stmt_iterator gsi2
= *gsi
;
8360 new_temp
= vect_init_vector (stmt
, scalar_dest
,
8362 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8366 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8368 tree perm_mask
= perm_mask_for_reverse (vectype
);
8369 new_temp
= permute_vec_elements (new_temp
, new_temp
,
8370 perm_mask
, stmt
, gsi
);
8371 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8374 /* Collect vector loads and later create their permutation in
8375 vect_transform_grouped_load (). */
8376 if (grouped_load
|| slp_perm
)
8377 dr_chain
.quick_push (new_temp
);
8379 /* Store vector loads in the corresponding SLP_NODE. */
8380 if (slp
&& !slp_perm
)
8381 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8383 /* With SLP permutation we load the gaps as well, without
8384 we need to skip the gaps after we manage to fully load
8385 all elements. group_gap_adj is GROUP_SIZE here. */
8386 group_elt
+= nunits
;
8387 if (maybe_ne (group_gap_adj
, 0U)
8389 && known_eq (group_elt
, group_size
- group_gap_adj
))
8391 poly_wide_int bump_val
8392 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
8394 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
8395 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8400 /* Bump the vector pointer to account for a gap or for excess
8401 elements loaded for a permuted SLP load. */
8402 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
8404 poly_wide_int bump_val
8405 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
8407 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
8408 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8413 if (slp
&& !slp_perm
)
8419 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
8420 slp_node_instance
, false,
8423 dr_chain
.release ();
8431 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
8432 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
8433 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8438 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8440 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8441 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8444 dr_chain
.release ();
8450 /* Function vect_is_simple_cond.
8453 LOOP - the loop that is being vectorized.
8454 COND - Condition that is checked for simple use.
8457 *COMP_VECTYPE - the vector type for the comparison.
8458 *DTS - The def types for the arguments of the comparison
8460 Returns whether a COND can be vectorized. Checks whether
8461 condition operands are supportable using vec_is_simple_use. */
8464 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
8465 tree
*comp_vectype
, enum vect_def_type
*dts
,
8469 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8472 if (TREE_CODE (cond
) == SSA_NAME
8473 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
8475 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
8476 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
8477 &dts
[0], comp_vectype
)
8479 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
8484 if (!COMPARISON_CLASS_P (cond
))
8487 lhs
= TREE_OPERAND (cond
, 0);
8488 rhs
= TREE_OPERAND (cond
, 1);
8490 if (TREE_CODE (lhs
) == SSA_NAME
)
8492 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
8493 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dts
[0], &vectype1
))
8496 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
8497 || TREE_CODE (lhs
) == FIXED_CST
)
8498 dts
[0] = vect_constant_def
;
8502 if (TREE_CODE (rhs
) == SSA_NAME
)
8504 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
8505 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dts
[1], &vectype2
))
8508 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
8509 || TREE_CODE (rhs
) == FIXED_CST
)
8510 dts
[1] = vect_constant_def
;
8514 if (vectype1
&& vectype2
8515 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
8516 TYPE_VECTOR_SUBPARTS (vectype2
)))
8519 *comp_vectype
= vectype1
? vectype1
: vectype2
;
8520 /* Invariant comparison. */
8521 if (! *comp_vectype
)
8523 tree scalar_type
= TREE_TYPE (lhs
);
8524 /* If we can widen the comparison to match vectype do so. */
8525 if (INTEGRAL_TYPE_P (scalar_type
)
8526 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
8527 TYPE_SIZE (TREE_TYPE (vectype
))))
8528 scalar_type
= build_nonstandard_integer_type
8529 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype
))),
8530 TYPE_UNSIGNED (scalar_type
));
8531 *comp_vectype
= get_vectype_for_scalar_type (scalar_type
);
8537 /* vectorizable_condition.
8539 Check if STMT is conditional modify expression that can be vectorized.
8540 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8541 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8544 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8545 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
8546 else clause if it is 2).
8548 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8551 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8552 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
8555 tree scalar_dest
= NULL_TREE
;
8556 tree vec_dest
= NULL_TREE
;
8557 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
8558 tree then_clause
, else_clause
;
8559 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8560 tree comp_vectype
= NULL_TREE
;
8561 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
8562 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
8565 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8566 enum vect_def_type dts
[4]
8567 = {vect_unknown_def_type
, vect_unknown_def_type
,
8568 vect_unknown_def_type
, vect_unknown_def_type
};
8571 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8572 stmt_vec_info prev_stmt_info
= NULL
;
8574 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8575 vec
<tree
> vec_oprnds0
= vNULL
;
8576 vec
<tree
> vec_oprnds1
= vNULL
;
8577 vec
<tree
> vec_oprnds2
= vNULL
;
8578 vec
<tree
> vec_oprnds3
= vNULL
;
8580 bool masked
= false;
8582 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
8585 vect_reduction_type reduction_type
8586 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
);
8587 if (reduction_type
== TREE_CODE_REDUCTION
)
8589 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8592 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8593 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8597 /* FORNOW: not yet supported. */
8598 if (STMT_VINFO_LIVE_P (stmt_info
))
8600 if (dump_enabled_p ())
8601 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8602 "value used after loop.\n");
8607 /* Is vectorizable conditional operation? */
8608 if (!is_gimple_assign (stmt
))
8611 code
= gimple_assign_rhs_code (stmt
);
8613 if (code
!= COND_EXPR
)
8616 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8617 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8622 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8624 gcc_assert (ncopies
>= 1);
8625 if (reduc_index
&& ncopies
> 1)
8626 return false; /* FORNOW */
8628 cond_expr
= gimple_assign_rhs1 (stmt
);
8629 then_clause
= gimple_assign_rhs2 (stmt
);
8630 else_clause
= gimple_assign_rhs3 (stmt
);
8632 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
8633 &comp_vectype
, &dts
[0], vectype
)
8638 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dts
[2],
8641 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dts
[3],
8645 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
8648 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
8651 masked
= !COMPARISON_CLASS_P (cond_expr
);
8652 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
8654 if (vec_cmp_type
== NULL_TREE
)
8657 cond_code
= TREE_CODE (cond_expr
);
8660 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
8661 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
8664 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
8666 /* Boolean values may have another representation in vectors
8667 and therefore we prefer bit operations over comparison for
8668 them (which also works for scalar masks). We store opcodes
8669 to use in bitop1 and bitop2. Statement is vectorized as
8670 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8671 depending on bitop1 and bitop2 arity. */
8675 bitop1
= BIT_NOT_EXPR
;
8676 bitop2
= BIT_AND_EXPR
;
8679 bitop1
= BIT_NOT_EXPR
;
8680 bitop2
= BIT_IOR_EXPR
;
8683 bitop1
= BIT_NOT_EXPR
;
8684 bitop2
= BIT_AND_EXPR
;
8685 std::swap (cond_expr0
, cond_expr1
);
8688 bitop1
= BIT_NOT_EXPR
;
8689 bitop2
= BIT_IOR_EXPR
;
8690 std::swap (cond_expr0
, cond_expr1
);
8693 bitop1
= BIT_XOR_EXPR
;
8696 bitop1
= BIT_XOR_EXPR
;
8697 bitop2
= BIT_NOT_EXPR
;
8702 cond_code
= SSA_NAME
;
8707 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
8708 if (bitop1
!= NOP_EXPR
)
8710 machine_mode mode
= TYPE_MODE (comp_vectype
);
8713 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
8714 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8717 if (bitop2
!= NOP_EXPR
)
8719 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
8721 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8725 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
8729 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, NULL
, NULL
);
8739 vec_oprnds0
.create (1);
8740 vec_oprnds1
.create (1);
8741 vec_oprnds2
.create (1);
8742 vec_oprnds3
.create (1);
8746 scalar_dest
= gimple_assign_lhs (stmt
);
8747 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
8748 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8750 /* Handle cond expr. */
8751 for (j
= 0; j
< ncopies
; j
++)
8753 gimple
*new_stmt
= NULL
;
8758 auto_vec
<tree
, 4> ops
;
8759 auto_vec
<vec
<tree
>, 4> vec_defs
;
8762 ops
.safe_push (cond_expr
);
8765 ops
.safe_push (cond_expr0
);
8766 ops
.safe_push (cond_expr1
);
8768 ops
.safe_push (then_clause
);
8769 ops
.safe_push (else_clause
);
8770 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8771 vec_oprnds3
= vec_defs
.pop ();
8772 vec_oprnds2
= vec_defs
.pop ();
8774 vec_oprnds1
= vec_defs
.pop ();
8775 vec_oprnds0
= vec_defs
.pop ();
8783 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
8785 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
8791 = vect_get_vec_def_for_operand (cond_expr0
,
8792 stmt
, comp_vectype
);
8793 vect_is_simple_use (cond_expr0
, loop_vinfo
, >emp
, &dts
[0]);
8796 = vect_get_vec_def_for_operand (cond_expr1
,
8797 stmt
, comp_vectype
);
8798 vect_is_simple_use (cond_expr1
, loop_vinfo
, >emp
, &dts
[1]);
8800 if (reduc_index
== 1)
8801 vec_then_clause
= reduc_def
;
8804 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
8806 vect_is_simple_use (then_clause
, loop_vinfo
,
8809 if (reduc_index
== 2)
8810 vec_else_clause
= reduc_def
;
8813 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
8815 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
8822 = vect_get_vec_def_for_stmt_copy (dts
[0],
8823 vec_oprnds0
.pop ());
8826 = vect_get_vec_def_for_stmt_copy (dts
[1],
8827 vec_oprnds1
.pop ());
8829 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
8830 vec_oprnds2
.pop ());
8831 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
8832 vec_oprnds3
.pop ());
8837 vec_oprnds0
.quick_push (vec_cond_lhs
);
8839 vec_oprnds1
.quick_push (vec_cond_rhs
);
8840 vec_oprnds2
.quick_push (vec_then_clause
);
8841 vec_oprnds3
.quick_push (vec_else_clause
);
8844 /* Arguments are ready. Create the new vector stmt. */
8845 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
8847 vec_then_clause
= vec_oprnds2
[i
];
8848 vec_else_clause
= vec_oprnds3
[i
];
8851 vec_compare
= vec_cond_lhs
;
8854 vec_cond_rhs
= vec_oprnds1
[i
];
8855 if (bitop1
== NOP_EXPR
)
8856 vec_compare
= build2 (cond_code
, vec_cmp_type
,
8857 vec_cond_lhs
, vec_cond_rhs
);
8860 new_temp
= make_ssa_name (vec_cmp_type
);
8861 if (bitop1
== BIT_NOT_EXPR
)
8862 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
8866 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
8868 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8869 if (bitop2
== NOP_EXPR
)
8870 vec_compare
= new_temp
;
8871 else if (bitop2
== BIT_NOT_EXPR
)
8873 /* Instead of doing ~x ? y : z do x ? z : y. */
8874 vec_compare
= new_temp
;
8875 std::swap (vec_then_clause
, vec_else_clause
);
8879 vec_compare
= make_ssa_name (vec_cmp_type
);
8881 = gimple_build_assign (vec_compare
, bitop2
,
8882 vec_cond_lhs
, new_temp
);
8883 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8887 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
8889 if (!is_gimple_val (vec_compare
))
8891 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
8892 new_stmt
= gimple_build_assign (vec_compare_name
,
8894 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8895 vec_compare
= vec_compare_name
;
8897 gcc_assert (reduc_index
== 2);
8898 new_stmt
= gimple_build_call_internal
8899 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
8901 gimple_call_set_lhs (new_stmt
, scalar_dest
);
8902 SSA_NAME_DEF_STMT (scalar_dest
) = new_stmt
;
8903 if (stmt
== gsi_stmt (*gsi
))
8904 vect_finish_replace_stmt (stmt
, new_stmt
);
8907 /* In this case we're moving the definition to later in the
8908 block. That doesn't matter because the only uses of the
8909 lhs are in phi statements. */
8910 gimple_stmt_iterator old_gsi
= gsi_for_stmt (stmt
);
8911 gsi_remove (&old_gsi
, true);
8912 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8917 new_temp
= make_ssa_name (vec_dest
);
8918 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
8919 vec_compare
, vec_then_clause
,
8921 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8924 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8931 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8933 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8935 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8938 vec_oprnds0
.release ();
8939 vec_oprnds1
.release ();
8940 vec_oprnds2
.release ();
8941 vec_oprnds3
.release ();
8946 /* vectorizable_comparison.
8948 Check if STMT is comparison expression that can be vectorized.
8949 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8950 comparison, put it in VEC_STMT, and insert it at GSI.
8952 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8955 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8956 gimple
**vec_stmt
, tree reduc_def
,
8959 tree lhs
, rhs1
, rhs2
;
8960 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8961 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8962 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8963 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
8965 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8966 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
8970 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8971 stmt_vec_info prev_stmt_info
= NULL
;
8973 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8974 vec
<tree
> vec_oprnds0
= vNULL
;
8975 vec
<tree
> vec_oprnds1
= vNULL
;
8980 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8983 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
8986 mask_type
= vectype
;
8987 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8992 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8994 gcc_assert (ncopies
>= 1);
8995 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8996 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
9000 if (STMT_VINFO_LIVE_P (stmt_info
))
9002 if (dump_enabled_p ())
9003 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9004 "value used after loop.\n");
9008 if (!is_gimple_assign (stmt
))
9011 code
= gimple_assign_rhs_code (stmt
);
9013 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
9016 rhs1
= gimple_assign_rhs1 (stmt
);
9017 rhs2
= gimple_assign_rhs2 (stmt
);
9019 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
9020 &dts
[0], &vectype1
))
9023 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
9024 &dts
[1], &vectype2
))
9027 if (vectype1
&& vectype2
9028 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
9029 TYPE_VECTOR_SUBPARTS (vectype2
)))
9032 vectype
= vectype1
? vectype1
: vectype2
;
9034 /* Invariant comparison. */
9037 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
9038 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
9041 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
9044 /* Can't compare mask and non-mask types. */
9045 if (vectype1
&& vectype2
9046 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
9049 /* Boolean values may have another representation in vectors
9050 and therefore we prefer bit operations over comparison for
9051 them (which also works for scalar masks). We store opcodes
9052 to use in bitop1 and bitop2. Statement is vectorized as
9053 BITOP2 (rhs1 BITOP1 rhs2) or
9054 rhs1 BITOP2 (BITOP1 rhs2)
9055 depending on bitop1 and bitop2 arity. */
9056 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
9058 if (code
== GT_EXPR
)
9060 bitop1
= BIT_NOT_EXPR
;
9061 bitop2
= BIT_AND_EXPR
;
9063 else if (code
== GE_EXPR
)
9065 bitop1
= BIT_NOT_EXPR
;
9066 bitop2
= BIT_IOR_EXPR
;
9068 else if (code
== LT_EXPR
)
9070 bitop1
= BIT_NOT_EXPR
;
9071 bitop2
= BIT_AND_EXPR
;
9072 std::swap (rhs1
, rhs2
);
9073 std::swap (dts
[0], dts
[1]);
9075 else if (code
== LE_EXPR
)
9077 bitop1
= BIT_NOT_EXPR
;
9078 bitop2
= BIT_IOR_EXPR
;
9079 std::swap (rhs1
, rhs2
);
9080 std::swap (dts
[0], dts
[1]);
9084 bitop1
= BIT_XOR_EXPR
;
9085 if (code
== EQ_EXPR
)
9086 bitop2
= BIT_NOT_EXPR
;
9092 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
9094 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
9095 dts
, ndts
, NULL
, NULL
);
9096 if (bitop1
== NOP_EXPR
)
9097 return expand_vec_cmp_expr_p (vectype
, mask_type
, code
);
9100 machine_mode mode
= TYPE_MODE (vectype
);
9103 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
9104 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9107 if (bitop2
!= NOP_EXPR
)
9109 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
9110 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9120 vec_oprnds0
.create (1);
9121 vec_oprnds1
.create (1);
9125 lhs
= gimple_assign_lhs (stmt
);
9126 mask
= vect_create_destination_var (lhs
, mask_type
);
9128 /* Handle cmp expr. */
9129 for (j
= 0; j
< ncopies
; j
++)
9131 gassign
*new_stmt
= NULL
;
9136 auto_vec
<tree
, 2> ops
;
9137 auto_vec
<vec
<tree
>, 2> vec_defs
;
9139 ops
.safe_push (rhs1
);
9140 ops
.safe_push (rhs2
);
9141 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
9142 vec_oprnds1
= vec_defs
.pop ();
9143 vec_oprnds0
= vec_defs
.pop ();
9147 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
9148 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
9153 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
9154 vec_oprnds0
.pop ());
9155 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
9156 vec_oprnds1
.pop ());
9161 vec_oprnds0
.quick_push (vec_rhs1
);
9162 vec_oprnds1
.quick_push (vec_rhs2
);
9165 /* Arguments are ready. Create the new vector stmt. */
9166 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
9168 vec_rhs2
= vec_oprnds1
[i
];
9170 new_temp
= make_ssa_name (mask
);
9171 if (bitop1
== NOP_EXPR
)
9173 new_stmt
= gimple_build_assign (new_temp
, code
,
9174 vec_rhs1
, vec_rhs2
);
9175 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
9179 if (bitop1
== BIT_NOT_EXPR
)
9180 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
9182 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
9184 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
9185 if (bitop2
!= NOP_EXPR
)
9187 tree res
= make_ssa_name (mask
);
9188 if (bitop2
== BIT_NOT_EXPR
)
9189 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
9191 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
9193 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
9197 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9204 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
9206 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
9208 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
9211 vec_oprnds0
.release ();
9212 vec_oprnds1
.release ();
9217 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9218 can handle all live statements in the node. Otherwise return true
9219 if STMT is not live or if vectorizable_live_operation can handle it.
9220 GSI and VEC_STMT are as for vectorizable_live_operation. */
9223 can_vectorize_live_stmts (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
9224 slp_tree slp_node
, gimple
**vec_stmt
)
9230 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt
)
9232 stmt_vec_info slp_stmt_info
= vinfo_for_stmt (slp_stmt
);
9233 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
9234 && !vectorizable_live_operation (slp_stmt
, gsi
, slp_node
, i
,
9239 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt
))
9240 && !vectorizable_live_operation (stmt
, gsi
, slp_node
, -1, vec_stmt
))
9246 /* Make sure the statement is vectorizable. */
9249 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
,
9250 slp_instance node_instance
)
9252 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9253 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
9254 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
9256 gimple
*pattern_stmt
;
9257 gimple_seq pattern_def_seq
;
9259 if (dump_enabled_p ())
9261 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
9262 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
9265 if (gimple_has_volatile_ops (stmt
))
9267 if (dump_enabled_p ())
9268 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9269 "not vectorized: stmt has volatile operands\n");
9274 /* Skip stmts that do not need to be vectorized. In loops this is expected
9276 - the COND_EXPR which is the loop exit condition
9277 - any LABEL_EXPRs in the loop
9278 - computations that are used only for array indexing or loop control.
9279 In basic blocks we only analyze statements that are a part of some SLP
9280 instance, therefore, all the statements are relevant.
9282 Pattern statement needs to be analyzed instead of the original statement
9283 if the original statement is not relevant. Otherwise, we analyze both
9284 statements. In basic blocks we are called from some SLP instance
9285 traversal, don't analyze pattern stmts instead, the pattern stmts
9286 already will be part of SLP instance. */
9288 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
9289 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
9290 && !STMT_VINFO_LIVE_P (stmt_info
))
9292 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9294 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
9295 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
9297 /* Analyze PATTERN_STMT instead of the original stmt. */
9298 stmt
= pattern_stmt
;
9299 stmt_info
= vinfo_for_stmt (pattern_stmt
);
9300 if (dump_enabled_p ())
9302 dump_printf_loc (MSG_NOTE
, vect_location
,
9303 "==> examining pattern statement: ");
9304 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
9309 if (dump_enabled_p ())
9310 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
9315 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9318 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
9319 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
9321 /* Analyze PATTERN_STMT too. */
9322 if (dump_enabled_p ())
9324 dump_printf_loc (MSG_NOTE
, vect_location
,
9325 "==> examining pattern statement: ");
9326 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
9329 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
,
9334 if (is_pattern_stmt_p (stmt_info
)
9336 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
9338 gimple_stmt_iterator si
;
9340 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
9342 gimple
*pattern_def_stmt
= gsi_stmt (si
);
9343 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
9344 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
9346 /* Analyze def stmt of STMT if it's a pattern stmt. */
9347 if (dump_enabled_p ())
9349 dump_printf_loc (MSG_NOTE
, vect_location
,
9350 "==> examining pattern def statement: ");
9351 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
9354 if (!vect_analyze_stmt (pattern_def_stmt
,
9355 need_to_vectorize
, node
, node_instance
))
9361 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
9363 case vect_internal_def
:
9366 case vect_reduction_def
:
9367 case vect_nested_cycle
:
9368 gcc_assert (!bb_vinfo
9369 && (relevance
== vect_used_in_outer
9370 || relevance
== vect_used_in_outer_by_reduction
9371 || relevance
== vect_used_by_reduction
9372 || relevance
== vect_unused_in_scope
9373 || relevance
== vect_used_only_live
));
9376 case vect_induction_def
:
9377 gcc_assert (!bb_vinfo
);
9380 case vect_constant_def
:
9381 case vect_external_def
:
9382 case vect_unknown_def_type
:
9387 if (STMT_VINFO_RELEVANT_P (stmt_info
))
9389 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
9390 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
9391 || (is_gimple_call (stmt
)
9392 && gimple_call_lhs (stmt
) == NULL_TREE
));
9393 *need_to_vectorize
= true;
9396 if (PURE_SLP_STMT (stmt_info
) && !node
)
9398 dump_printf_loc (MSG_NOTE
, vect_location
,
9399 "handled only by SLP analysis\n");
9405 && (STMT_VINFO_RELEVANT_P (stmt_info
)
9406 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
9407 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
9408 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
9409 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
9410 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
9411 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
9412 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
9413 || vectorizable_call (stmt
, NULL
, NULL
, node
)
9414 || vectorizable_store (stmt
, NULL
, NULL
, node
)
9415 || vectorizable_reduction (stmt
, NULL
, NULL
, node
, node_instance
)
9416 || vectorizable_induction (stmt
, NULL
, NULL
, node
)
9417 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
9418 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
9422 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
9423 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
9424 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
9425 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
9426 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
9427 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
9428 || vectorizable_call (stmt
, NULL
, NULL
, node
)
9429 || vectorizable_store (stmt
, NULL
, NULL
, node
)
9430 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
9431 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
9436 if (dump_enabled_p ())
9438 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9439 "not vectorized: relevant stmt not ");
9440 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
9441 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
9450 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9451 need extra handling, except for vectorizable reductions. */
9452 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
9453 && !can_vectorize_live_stmts (stmt
, NULL
, node
, NULL
))
9455 if (dump_enabled_p ())
9457 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9458 "not vectorized: live stmt not supported: ");
9459 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
9469 /* Function vect_transform_stmt.
9471 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9474 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
9475 bool *grouped_store
, slp_tree slp_node
,
9476 slp_instance slp_node_instance
)
9478 bool is_store
= false;
9479 gimple
*vec_stmt
= NULL
;
9480 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9483 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
9484 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9486 bool nested_p
= (STMT_VINFO_LOOP_VINFO (stmt_info
)
9487 && nested_in_vect_loop_p
9488 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info
)),
9491 switch (STMT_VINFO_TYPE (stmt_info
))
9493 case type_demotion_vec_info_type
:
9494 case type_promotion_vec_info_type
:
9495 case type_conversion_vec_info_type
:
9496 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
9500 case induc_vec_info_type
:
9501 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
, slp_node
);
9505 case shift_vec_info_type
:
9506 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
9510 case op_vec_info_type
:
9511 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
9515 case assignment_vec_info_type
:
9516 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
9520 case load_vec_info_type
:
9521 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
9526 case store_vec_info_type
:
9527 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
9529 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
9531 /* In case of interleaving, the whole chain is vectorized when the
9532 last store in the chain is reached. Store stmts before the last
9533 one are skipped, and there vec_stmt_info shouldn't be freed
9535 *grouped_store
= true;
9536 stmt_vec_info group_info
9537 = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
));
9538 if (GROUP_STORE_COUNT (group_info
) == GROUP_SIZE (group_info
))
9545 case condition_vec_info_type
:
9546 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
9550 case comparison_vec_info_type
:
9551 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
9555 case call_vec_info_type
:
9556 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
9557 stmt
= gsi_stmt (*gsi
);
9560 case call_simd_clone_vec_info_type
:
9561 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
9562 stmt
= gsi_stmt (*gsi
);
9565 case reduc_vec_info_type
:
9566 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
,
9572 if (!STMT_VINFO_LIVE_P (stmt_info
))
9574 if (dump_enabled_p ())
9575 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9576 "stmt not supported.\n");
9581 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9582 This would break hybrid SLP vectorization. */
9584 gcc_assert (!vec_stmt
9585 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
9587 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9588 is being vectorized, but outside the immediately enclosing loop. */
9591 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
9592 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
9593 || STMT_VINFO_RELEVANT (stmt_info
) ==
9594 vect_used_in_outer_by_reduction
))
9596 struct loop
*innerloop
= LOOP_VINFO_LOOP (
9597 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
9598 imm_use_iterator imm_iter
;
9599 use_operand_p use_p
;
9603 if (dump_enabled_p ())
9604 dump_printf_loc (MSG_NOTE
, vect_location
,
9605 "Record the vdef for outer-loop vectorization.\n");
9607 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9608 (to be used when vectorizing outer-loop stmts that use the DEF of
9610 if (gimple_code (stmt
) == GIMPLE_PHI
)
9611 scalar_dest
= PHI_RESULT (stmt
);
9613 scalar_dest
= gimple_assign_lhs (stmt
);
9615 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
9617 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
9619 exit_phi
= USE_STMT (use_p
);
9620 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
9625 /* Handle stmts whose DEF is used outside the loop-nest that is
9626 being vectorized. */
9627 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
9629 done
= can_vectorize_live_stmts (stmt
, gsi
, slp_node
, &vec_stmt
);
9634 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
9640 /* Remove a group of stores (for SLP or interleaving), free their
9644 vect_remove_stores (gimple
*first_stmt
)
9646 gimple
*next
= first_stmt
;
9648 gimple_stmt_iterator next_si
;
9652 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
9654 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
9655 if (is_pattern_stmt_p (stmt_info
))
9656 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
9657 /* Free the attached stmt_vec_info and remove the stmt. */
9658 next_si
= gsi_for_stmt (next
);
9659 unlink_stmt_vdef (next
);
9660 gsi_remove (&next_si
, true);
9661 release_defs (next
);
9662 free_stmt_vec_info (next
);
9668 /* Function new_stmt_vec_info.
9670 Create and initialize a new stmt_vec_info struct for STMT. */
9673 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
9676 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
9678 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
9679 STMT_VINFO_STMT (res
) = stmt
;
9681 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
9682 STMT_VINFO_LIVE_P (res
) = false;
9683 STMT_VINFO_VECTYPE (res
) = NULL
;
9684 STMT_VINFO_VEC_STMT (res
) = NULL
;
9685 STMT_VINFO_VECTORIZABLE (res
) = true;
9686 STMT_VINFO_IN_PATTERN_P (res
) = false;
9687 STMT_VINFO_RELATED_STMT (res
) = NULL
;
9688 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
9689 STMT_VINFO_DATA_REF (res
) = NULL
;
9690 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
9691 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res
) = ERROR_MARK
;
9693 if (gimple_code (stmt
) == GIMPLE_PHI
9694 && is_loop_header_bb_p (gimple_bb (stmt
)))
9695 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
9697 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
9699 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
9700 STMT_SLP_TYPE (res
) = loop_vect
;
9701 STMT_VINFO_NUM_SLP_USES (res
) = 0;
9703 GROUP_FIRST_ELEMENT (res
) = NULL
;
9704 GROUP_NEXT_ELEMENT (res
) = NULL
;
9705 GROUP_SIZE (res
) = 0;
9706 GROUP_STORE_COUNT (res
) = 0;
9707 GROUP_GAP (res
) = 0;
9708 GROUP_SAME_DR_STMT (res
) = NULL
;
9714 /* Create a hash table for stmt_vec_info. */
9717 init_stmt_vec_info_vec (void)
9719 gcc_assert (!stmt_vec_info_vec
.exists ());
9720 stmt_vec_info_vec
.create (50);
9724 /* Free hash table for stmt_vec_info. */
9727 free_stmt_vec_info_vec (void)
9731 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
9733 free_stmt_vec_info (STMT_VINFO_STMT (info
));
9734 gcc_assert (stmt_vec_info_vec
.exists ());
9735 stmt_vec_info_vec
.release ();
9739 /* Free stmt vectorization related info. */
9742 free_stmt_vec_info (gimple
*stmt
)
9744 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9749 /* Check if this statement has a related "pattern stmt"
9750 (introduced by the vectorizer during the pattern recognition
9751 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9753 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
9755 stmt_vec_info patt_info
9756 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9759 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
9760 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
9761 gimple_set_bb (patt_stmt
, NULL
);
9762 tree lhs
= gimple_get_lhs (patt_stmt
);
9763 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9764 release_ssa_name (lhs
);
9767 gimple_stmt_iterator si
;
9768 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
9770 gimple
*seq_stmt
= gsi_stmt (si
);
9771 gimple_set_bb (seq_stmt
, NULL
);
9772 lhs
= gimple_get_lhs (seq_stmt
);
9773 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9774 release_ssa_name (lhs
);
9775 free_stmt_vec_info (seq_stmt
);
9778 free_stmt_vec_info (patt_stmt
);
9782 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
9783 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
9784 set_vinfo_for_stmt (stmt
, NULL
);
9789 /* Function get_vectype_for_scalar_type_and_size.
9791 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9795 get_vectype_for_scalar_type_and_size (tree scalar_type
, poly_uint64 size
)
9797 tree orig_scalar_type
= scalar_type
;
9798 scalar_mode inner_mode
;
9799 machine_mode simd_mode
;
9803 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
9804 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
9807 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
9809 /* For vector types of elements whose mode precision doesn't
9810 match their types precision we use a element type of mode
9811 precision. The vectorization routines will have to make sure
9812 they support the proper result truncation/extension.
9813 We also make sure to build vector types with INTEGER_TYPE
9814 component type only. */
9815 if (INTEGRAL_TYPE_P (scalar_type
)
9816 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
9817 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
9818 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
9819 TYPE_UNSIGNED (scalar_type
));
9821 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9822 When the component mode passes the above test simply use a type
9823 corresponding to that mode. The theory is that any use that
9824 would cause problems with this will disable vectorization anyway. */
9825 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
9826 && !INTEGRAL_TYPE_P (scalar_type
))
9827 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
9829 /* We can't build a vector type of elements with alignment bigger than
9831 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
9832 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
9833 TYPE_UNSIGNED (scalar_type
));
9835 /* If we felt back to using the mode fail if there was
9836 no scalar type for it. */
9837 if (scalar_type
== NULL_TREE
)
9840 /* If no size was supplied use the mode the target prefers. Otherwise
9841 lookup a vector mode of the specified size. */
9842 if (known_eq (size
, 0U))
9843 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
9844 else if (!multiple_p (size
, nbytes
, &nunits
)
9845 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
9847 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9848 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
))
9851 vectype
= build_vector_type (scalar_type
, nunits
);
9853 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
9854 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
9857 /* Re-attach the address-space qualifier if we canonicalized the scalar
9859 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
9860 return build_qualified_type
9861 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
9866 poly_uint64 current_vector_size
;
9868 /* Function get_vectype_for_scalar_type.
9870 Returns the vector type corresponding to SCALAR_TYPE as supported
9874 get_vectype_for_scalar_type (tree scalar_type
)
9877 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
9878 current_vector_size
);
9880 && known_eq (current_vector_size
, 0U))
9881 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
9885 /* Function get_mask_type_for_scalar_type.
9887 Returns the mask type corresponding to a result of comparison
9888 of vectors of specified SCALAR_TYPE as supported by target. */
9891 get_mask_type_for_scalar_type (tree scalar_type
)
9893 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
9898 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
9899 current_vector_size
);
9902 /* Function get_same_sized_vectype
9904 Returns a vector type corresponding to SCALAR_TYPE of size
9905 VECTOR_TYPE if supported by the target. */
9908 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
9910 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
9911 return build_same_sized_truth_vector_type (vector_type
);
9913 return get_vectype_for_scalar_type_and_size
9914 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
9917 /* Function vect_is_simple_use.
9920 VINFO - the vect info of the loop or basic block that is being vectorized.
9921 OPERAND - operand in the loop or bb.
9923 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9924 DT - the type of definition
9926 Returns whether a stmt with OPERAND can be vectorized.
9927 For loops, supportable operands are constants, loop invariants, and operands
9928 that are defined by the current iteration of the loop. Unsupportable
9929 operands are those that are defined by a previous iteration of the loop (as
9930 is the case in reduction/induction computations).
9931 For basic blocks, supportable operands are constants and bb invariants.
9932 For now, operands defined outside the basic block are not supported. */
9935 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
9936 gimple
**def_stmt
, enum vect_def_type
*dt
)
9939 *dt
= vect_unknown_def_type
;
9941 if (dump_enabled_p ())
9943 dump_printf_loc (MSG_NOTE
, vect_location
,
9944 "vect_is_simple_use: operand ");
9945 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
9946 dump_printf (MSG_NOTE
, "\n");
9949 if (CONSTANT_CLASS_P (operand
))
9951 *dt
= vect_constant_def
;
9955 if (is_gimple_min_invariant (operand
))
9957 *dt
= vect_external_def
;
9961 if (TREE_CODE (operand
) != SSA_NAME
)
9963 if (dump_enabled_p ())
9964 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9969 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
9971 *dt
= vect_external_def
;
9975 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
9976 if (dump_enabled_p ())
9978 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
9979 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
9982 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
9983 *dt
= vect_external_def
;
9986 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
9987 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
9990 if (dump_enabled_p ())
9992 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
9995 case vect_uninitialized_def
:
9996 dump_printf (MSG_NOTE
, "uninitialized\n");
9998 case vect_constant_def
:
9999 dump_printf (MSG_NOTE
, "constant\n");
10001 case vect_external_def
:
10002 dump_printf (MSG_NOTE
, "external\n");
10004 case vect_internal_def
:
10005 dump_printf (MSG_NOTE
, "internal\n");
10007 case vect_induction_def
:
10008 dump_printf (MSG_NOTE
, "induction\n");
10010 case vect_reduction_def
:
10011 dump_printf (MSG_NOTE
, "reduction\n");
10013 case vect_double_reduction_def
:
10014 dump_printf (MSG_NOTE
, "double reduction\n");
10016 case vect_nested_cycle
:
10017 dump_printf (MSG_NOTE
, "nested cycle\n");
10019 case vect_unknown_def_type
:
10020 dump_printf (MSG_NOTE
, "unknown\n");
10025 if (*dt
== vect_unknown_def_type
)
10027 if (dump_enabled_p ())
10028 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10029 "Unsupported pattern.\n");
10033 switch (gimple_code (*def_stmt
))
10036 case GIMPLE_ASSIGN
:
10040 if (dump_enabled_p ())
10041 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10042 "unsupported defining stmt:\n");
10049 /* Function vect_is_simple_use.
10051 Same as vect_is_simple_use but also determines the vector operand
10052 type of OPERAND and stores it to *VECTYPE. If the definition of
10053 OPERAND is vect_uninitialized_def, vect_constant_def or
10054 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10055 is responsible to compute the best suited vector type for the
10059 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
10060 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
10062 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
10065 /* Now get a vector type if the def is internal, otherwise supply
10066 NULL_TREE and leave it up to the caller to figure out a proper
10067 type for the use stmt. */
10068 if (*dt
== vect_internal_def
10069 || *dt
== vect_induction_def
10070 || *dt
== vect_reduction_def
10071 || *dt
== vect_double_reduction_def
10072 || *dt
== vect_nested_cycle
)
10074 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
10076 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10077 && !STMT_VINFO_RELEVANT (stmt_info
)
10078 && !STMT_VINFO_LIVE_P (stmt_info
))
10079 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
10081 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10082 gcc_assert (*vectype
!= NULL_TREE
);
10084 else if (*dt
== vect_uninitialized_def
10085 || *dt
== vect_constant_def
10086 || *dt
== vect_external_def
)
10087 *vectype
= NULL_TREE
;
10089 gcc_unreachable ();
10095 /* Function supportable_widening_operation
10097 Check whether an operation represented by the code CODE is a
10098 widening operation that is supported by the target platform in
10099 vector form (i.e., when operating on arguments of type VECTYPE_IN
10100 producing a result of type VECTYPE_OUT).
10102 Widening operations we currently support are NOP (CONVERT), FLOAT
10103 and WIDEN_MULT. This function checks if these operations are supported
10104 by the target platform either directly (via vector tree-codes), or via
10108 - CODE1 and CODE2 are codes of vector operations to be used when
10109 vectorizing the operation, if available.
10110 - MULTI_STEP_CVT determines the number of required intermediate steps in
10111 case of multi-step conversion (like char->short->int - in that case
10112 MULTI_STEP_CVT will be 1).
10113 - INTERM_TYPES contains the intermediate type required to perform the
10114 widening operation (short in the above example). */
10117 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
10118 tree vectype_out
, tree vectype_in
,
10119 enum tree_code
*code1
, enum tree_code
*code2
,
10120 int *multi_step_cvt
,
10121 vec
<tree
> *interm_types
)
10123 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
10124 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
10125 struct loop
*vect_loop
= NULL
;
10126 machine_mode vec_mode
;
10127 enum insn_code icode1
, icode2
;
10128 optab optab1
, optab2
;
10129 tree vectype
= vectype_in
;
10130 tree wide_vectype
= vectype_out
;
10131 enum tree_code c1
, c2
;
10133 tree prev_type
, intermediate_type
;
10134 machine_mode intermediate_mode
, prev_mode
;
10135 optab optab3
, optab4
;
10137 *multi_step_cvt
= 0;
10139 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
10143 case WIDEN_MULT_EXPR
:
10144 /* The result of a vectorized widening operation usually requires
10145 two vectors (because the widened results do not fit into one vector).
10146 The generated vector results would normally be expected to be
10147 generated in the same order as in the original scalar computation,
10148 i.e. if 8 results are generated in each vector iteration, they are
10149 to be organized as follows:
10150 vect1: [res1,res2,res3,res4],
10151 vect2: [res5,res6,res7,res8].
10153 However, in the special case that the result of the widening
10154 operation is used in a reduction computation only, the order doesn't
10155 matter (because when vectorizing a reduction we change the order of
10156 the computation). Some targets can take advantage of this and
10157 generate more efficient code. For example, targets like Altivec,
10158 that support widen_mult using a sequence of {mult_even,mult_odd}
10159 generate the following vectors:
10160 vect1: [res1,res3,res5,res7],
10161 vect2: [res2,res4,res6,res8].
10163 When vectorizing outer-loops, we execute the inner-loop sequentially
10164 (each vectorized inner-loop iteration contributes to VF outer-loop
10165 iterations in parallel). We therefore don't allow to change the
10166 order of the computation in the inner-loop during outer-loop
10168 /* TODO: Another case in which order doesn't *really* matter is when we
10169 widen and then contract again, e.g. (short)((int)x * y >> 8).
10170 Normally, pack_trunc performs an even/odd permute, whereas the
10171 repack from an even/odd expansion would be an interleave, which
10172 would be significantly simpler for e.g. AVX2. */
10173 /* In any case, in order to avoid duplicating the code below, recurse
10174 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10175 are properly set up for the caller. If we fail, we'll continue with
10176 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10178 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
10179 && !nested_in_vect_loop_p (vect_loop
, stmt
)
10180 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
10181 stmt
, vectype_out
, vectype_in
,
10182 code1
, code2
, multi_step_cvt
,
10185 /* Elements in a vector with vect_used_by_reduction property cannot
10186 be reordered if the use chain with this property does not have the
10187 same operation. One such an example is s += a * b, where elements
10188 in a and b cannot be reordered. Here we check if the vector defined
10189 by STMT is only directly used in the reduction statement. */
10190 tree lhs
= gimple_assign_lhs (stmt
);
10191 use_operand_p dummy
;
10193 stmt_vec_info use_stmt_info
= NULL
;
10194 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
10195 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
10196 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
10199 c1
= VEC_WIDEN_MULT_LO_EXPR
;
10200 c2
= VEC_WIDEN_MULT_HI_EXPR
;
10203 case DOT_PROD_EXPR
:
10204 c1
= DOT_PROD_EXPR
;
10205 c2
= DOT_PROD_EXPR
;
10213 case VEC_WIDEN_MULT_EVEN_EXPR
:
10214 /* Support the recursion induced just above. */
10215 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
10216 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
10219 case WIDEN_LSHIFT_EXPR
:
10220 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
10221 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
10225 c1
= VEC_UNPACK_LO_EXPR
;
10226 c2
= VEC_UNPACK_HI_EXPR
;
10230 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
10231 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
10234 case FIX_TRUNC_EXPR
:
10235 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
10236 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
10237 computing the operation. */
10241 gcc_unreachable ();
10244 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
10245 std::swap (c1
, c2
);
10247 if (code
== FIX_TRUNC_EXPR
)
10249 /* The signedness is determined from output operand. */
10250 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
10251 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
10255 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
10256 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
10259 if (!optab1
|| !optab2
)
10262 vec_mode
= TYPE_MODE (vectype
);
10263 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
10264 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
10270 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
10271 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
10272 /* For scalar masks we may have different boolean
10273 vector types having the same QImode. Thus we
10274 add additional check for elements number. */
10275 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10276 || known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
10277 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2));
10279 /* Check if it's a multi-step conversion that can be done using intermediate
10282 prev_type
= vectype
;
10283 prev_mode
= vec_mode
;
10285 if (!CONVERT_EXPR_CODE_P (code
))
10288 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10289 intermediate steps in promotion sequence. We try
10290 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10292 interm_types
->create (MAX_INTERM_CVT_STEPS
);
10293 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
10295 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
10296 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
10298 intermediate_type
= vect_halve_mask_nunits (prev_type
);
10299 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
10304 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
10305 TYPE_UNSIGNED (prev_type
));
10307 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
10308 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
10310 if (!optab3
|| !optab4
10311 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
10312 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
10313 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
10314 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
10315 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
10316 == CODE_FOR_nothing
)
10317 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
10318 == CODE_FOR_nothing
))
10321 interm_types
->quick_push (intermediate_type
);
10322 (*multi_step_cvt
)++;
10324 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
10325 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
10326 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10327 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
10328 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2));
10330 prev_type
= intermediate_type
;
10331 prev_mode
= intermediate_mode
;
10334 interm_types
->release ();
10339 /* Function supportable_narrowing_operation
10341 Check whether an operation represented by the code CODE is a
10342 narrowing operation that is supported by the target platform in
10343 vector form (i.e., when operating on arguments of type VECTYPE_IN
10344 and producing a result of type VECTYPE_OUT).
10346 Narrowing operations we currently support are NOP (CONVERT) and
10347 FIX_TRUNC. This function checks if these operations are supported by
10348 the target platform directly via vector tree-codes.
10351 - CODE1 is the code of a vector operation to be used when
10352 vectorizing the operation, if available.
10353 - MULTI_STEP_CVT determines the number of required intermediate steps in
10354 case of multi-step conversion (like int->short->char - in that case
10355 MULTI_STEP_CVT will be 1).
10356 - INTERM_TYPES contains the intermediate type required to perform the
10357 narrowing operation (short in the above example). */
10360 supportable_narrowing_operation (enum tree_code code
,
10361 tree vectype_out
, tree vectype_in
,
10362 enum tree_code
*code1
, int *multi_step_cvt
,
10363 vec
<tree
> *interm_types
)
10365 machine_mode vec_mode
;
10366 enum insn_code icode1
;
10367 optab optab1
, interm_optab
;
10368 tree vectype
= vectype_in
;
10369 tree narrow_vectype
= vectype_out
;
10371 tree intermediate_type
, prev_type
;
10372 machine_mode intermediate_mode
, prev_mode
;
10376 *multi_step_cvt
= 0;
10380 c1
= VEC_PACK_TRUNC_EXPR
;
10383 case FIX_TRUNC_EXPR
:
10384 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
10388 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
10389 tree code and optabs used for computing the operation. */
10393 gcc_unreachable ();
10396 if (code
== FIX_TRUNC_EXPR
)
10397 /* The signedness is determined from output operand. */
10398 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
10400 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
10405 vec_mode
= TYPE_MODE (vectype
);
10406 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
10411 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
10412 /* For scalar masks we may have different boolean
10413 vector types having the same QImode. Thus we
10414 add additional check for elements number. */
10415 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10416 || known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
10417 TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
10419 /* Check if it's a multi-step conversion that can be done using intermediate
10421 prev_mode
= vec_mode
;
10422 prev_type
= vectype
;
10423 if (code
== FIX_TRUNC_EXPR
)
10424 uns
= TYPE_UNSIGNED (vectype_out
);
10426 uns
= TYPE_UNSIGNED (vectype
);
10428 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10429 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10430 costly than signed. */
10431 if (code
== FIX_TRUNC_EXPR
&& uns
)
10433 enum insn_code icode2
;
10436 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
10438 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
10439 if (interm_optab
!= unknown_optab
10440 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
10441 && insn_data
[icode1
].operand
[0].mode
10442 == insn_data
[icode2
].operand
[0].mode
)
10445 optab1
= interm_optab
;
10450 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10451 intermediate steps in promotion sequence. We try
10452 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10453 interm_types
->create (MAX_INTERM_CVT_STEPS
);
10454 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
10456 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
10457 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
10459 intermediate_type
= vect_double_mask_nunits (prev_type
);
10460 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
10465 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
10467 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
10470 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
10471 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
10472 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
10473 == CODE_FOR_nothing
))
10476 interm_types
->quick_push (intermediate_type
);
10477 (*multi_step_cvt
)++;
10479 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
10480 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10481 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
10482 TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
10484 prev_mode
= intermediate_mode
;
10485 prev_type
= intermediate_type
;
10486 optab1
= interm_optab
;
10489 interm_types
->release ();
10493 /* Generate and return a statement that sets vector mask MASK such that
10494 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10497 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
10499 tree cmp_type
= TREE_TYPE (start_index
);
10500 tree mask_type
= TREE_TYPE (mask
);
10501 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
10502 cmp_type
, mask_type
,
10503 OPTIMIZE_FOR_SPEED
));
10504 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
10505 start_index
, end_index
,
10506 build_zero_cst (mask_type
));
10507 gimple_call_set_lhs (call
, mask
);
10511 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10512 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10515 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
10518 tree tmp
= make_ssa_name (mask_type
);
10519 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
10520 gimple_seq_add_stmt (seq
, call
);
10521 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);