1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
62 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
64 return STMT_VINFO_VECTYPE (stmt_info
);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
70 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
72 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
73 basic_block bb
= gimple_bb (stmt
);
74 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
80 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
82 return (bb
->loop_father
== loop
->inner
);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
90 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
91 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
92 int misalign
, enum vect_cost_model_location where
)
94 if ((kind
== vector_load
|| kind
== unaligned_load
)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
96 kind
= vector_gather_load
;
97 if ((kind
== vector_store
|| kind
== unaligned_store
)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
99 kind
= vector_scatter_store
;
101 stmt_info_for_cost si
= { count
, kind
, where
,
102 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
104 body_cost_vec
->safe_push (si
);
106 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
108 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
111 /* Return a variable of type ELEM_TYPE[NELEMS]. */
114 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
116 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
120 /* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT and the vector is associated
123 with scalar destination SCALAR_DEST. */
126 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
127 tree array
, unsigned HOST_WIDE_INT n
)
129 tree vect_type
, vect
, vect_name
, array_ref
;
132 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
133 vect_type
= TREE_TYPE (TREE_TYPE (array
));
134 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
135 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
136 build_int_cst (size_type_node
, n
),
137 NULL_TREE
, NULL_TREE
);
139 new_stmt
= gimple_build_assign (vect
, array_ref
);
140 vect_name
= make_ssa_name (vect
, new_stmt
);
141 gimple_assign_set_lhs (new_stmt
, vect_name
);
142 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
147 /* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT. */
152 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
153 tree array
, unsigned HOST_WIDE_INT n
)
158 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
159 build_int_cst (size_type_node
, n
),
160 NULL_TREE
, NULL_TREE
);
162 new_stmt
= gimple_build_assign (array_ref
, vect
);
163 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
166 /* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
171 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
175 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
176 /* Arrays have the same alignment as their type. */
177 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
181 /* Add a clobber of variable VAR to the vectorization of STMT.
182 Emit the clobber before *GSI. */
185 vect_clobber_variable (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree var
)
187 tree clobber
= build_clobber (TREE_TYPE (var
));
188 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
189 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
192 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
194 /* Function vect_mark_relevant.
196 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
199 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
200 enum vect_relevant relevant
, bool live_p
)
202 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
203 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
204 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
205 gimple
*pattern_stmt
;
207 if (dump_enabled_p ())
209 dump_printf_loc (MSG_NOTE
, vect_location
,
210 "mark relevant %d, live %d: ", relevant
, live_p
);
211 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
227 if (dump_enabled_p ())
228 dump_printf_loc (MSG_NOTE
, vect_location
,
229 "last stmt in pattern. don't mark"
230 " relevant/live.\n");
231 stmt_info
= vinfo_for_stmt (pattern_stmt
);
232 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
233 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
234 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
238 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
239 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
240 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
242 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
243 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
245 if (dump_enabled_p ())
246 dump_printf_loc (MSG_NOTE
, vect_location
,
247 "already marked relevant/live.\n");
251 worklist
->safe_push (stmt
);
255 /* Function is_simple_and_all_uses_invariant
257 Return true if STMT is simple and all uses of it are invariant. */
260 is_simple_and_all_uses_invariant (gimple
*stmt
, loop_vec_info loop_vinfo
)
265 if (!is_gimple_assign (stmt
))
268 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
270 enum vect_def_type dt
= vect_uninitialized_def
;
272 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
276 "use not simple.\n");
280 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
299 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
300 enum vect_relevant
*relevant
, bool *live_p
)
302 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
304 imm_use_iterator imm_iter
;
308 *relevant
= vect_unused_in_scope
;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt
)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
314 != loop_exit_ctrl_vec_info_type
)
315 *relevant
= vect_used_in_scope
;
317 /* changing memory. */
318 if (gimple_code (stmt
) != GIMPLE_PHI
)
319 if (gimple_vdef (stmt
)
320 && !gimple_clobber_p (stmt
))
322 if (dump_enabled_p ())
323 dump_printf_loc (MSG_NOTE
, vect_location
,
324 "vec_stmt_relevant_p: stmt has vdefs.\n");
325 *relevant
= vect_used_in_scope
;
328 /* uses outside the loop. */
329 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
331 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
333 basic_block bb
= gimple_bb (USE_STMT (use_p
));
334 if (!flow_bb_inside_loop_p (loop
, bb
))
336 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE
, vect_location
,
338 "vec_stmt_relevant_p: used out of loop.\n");
340 if (is_gimple_debug (USE_STMT (use_p
)))
343 /* We expect all such uses to be in the loop exit phis
344 (because of loop closed form) */
345 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
346 gcc_assert (bb
== single_exit (loop
)->dest
);
353 if (*live_p
&& *relevant
== vect_unused_in_scope
354 && !is_simple_and_all_uses_invariant (stmt
, loop_vinfo
))
356 if (dump_enabled_p ())
357 dump_printf_loc (MSG_NOTE
, vect_location
,
358 "vec_stmt_relevant_p: stmt live but not relevant.\n");
359 *relevant
= vect_used_only_live
;
362 return (*live_p
|| *relevant
);
366 /* Function exist_non_indexing_operands_for_use_p
368 USE is one of the uses attached to STMT. Check if USE is
369 used in STMT for anything other than indexing an array. */
372 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
375 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
377 /* USE corresponds to some operand in STMT. If there is no data
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info
))
383 /* STMT has a data_ref. FORNOW this means that its of one of
387 (This should have been verified in analyze_data_refs).
389 'var' in the second case corresponds to a def, not a use,
390 so USE cannot correspond to any operands that are not used
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
396 if (!gimple_assign_copy_p (stmt
))
398 if (is_gimple_call (stmt
)
399 && gimple_call_internal_p (stmt
))
401 internal_fn ifn
= gimple_call_internal_fn (stmt
);
402 int mask_index
= internal_fn_mask_index (ifn
);
404 && use
== gimple_call_arg (stmt
, mask_index
))
406 int stored_value_index
= internal_fn_stored_value_index (ifn
);
407 if (stored_value_index
>= 0
408 && use
== gimple_call_arg (stmt
, stored_value_index
))
410 if (internal_gather_scatter_fn_p (ifn
)
411 && use
== gimple_call_arg (stmt
, 1))
417 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
419 operand
= gimple_assign_rhs1 (stmt
);
420 if (TREE_CODE (operand
) != SSA_NAME
)
431 Function process_use.
434 - a USE in STMT in a loop represented by LOOP_VINFO
435 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
436 that defined USE. This is done by calling mark_relevant and passing it
437 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
438 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
442 Generally, LIVE_P and RELEVANT are used to define the liveness and
443 relevance info of the DEF_STMT of this USE:
444 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
445 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
447 - case 1: If USE is used only for address computations (e.g. array indexing),
448 which does not need to be directly vectorized, then the liveness/relevance
449 of the respective DEF_STMT is left unchanged.
450 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
451 skip DEF_STMT cause it had already been processed.
452 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
453 be modified accordingly.
455 Return true if everything is as expected. Return false otherwise. */
458 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
,
459 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
462 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
463 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
464 stmt_vec_info dstmt_vinfo
;
465 basic_block bb
, def_bb
;
467 enum vect_def_type dt
;
469 /* case 1: we are only interested in uses that need to be vectorized. Uses
470 that are used for address computation are not considered relevant. */
471 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
474 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &def_stmt
))
476 if (dump_enabled_p ())
477 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
478 "not vectorized: unsupported use in stmt.\n");
482 if (!def_stmt
|| gimple_nop_p (def_stmt
))
485 def_bb
= gimple_bb (def_stmt
);
486 if (!flow_bb_inside_loop_p (loop
, def_bb
))
488 if (dump_enabled_p ())
489 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
493 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
494 DEF_STMT must have already been processed, because this should be the
495 only way that STMT, which is a reduction-phi, was put in the worklist,
496 as there should be no other uses for DEF_STMT in the loop. So we just
497 check that everything is as expected, and we are done. */
498 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
499 bb
= gimple_bb (stmt
);
500 if (gimple_code (stmt
) == GIMPLE_PHI
501 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
502 && gimple_code (def_stmt
) != GIMPLE_PHI
503 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
504 && bb
->loop_father
== def_bb
->loop_father
)
506 if (dump_enabled_p ())
507 dump_printf_loc (MSG_NOTE
, vect_location
,
508 "reduc-stmt defining reduc-phi in the same nest.\n");
509 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
510 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
511 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
515 /* case 3a: outer-loop stmt defining an inner-loop stmt:
516 outer-loop-header-bb:
522 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
524 if (dump_enabled_p ())
525 dump_printf_loc (MSG_NOTE
, vect_location
,
526 "outer-loop def-stmt defining inner-loop stmt.\n");
530 case vect_unused_in_scope
:
531 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
532 vect_used_in_scope
: vect_unused_in_scope
;
535 case vect_used_in_outer_by_reduction
:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
537 relevant
= vect_used_by_reduction
;
540 case vect_used_in_outer
:
541 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
542 relevant
= vect_used_in_scope
;
545 case vect_used_in_scope
:
553 /* case 3b: inner-loop stmt defining an outer-loop stmt:
554 outer-loop-header-bb:
558 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
560 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
562 if (dump_enabled_p ())
563 dump_printf_loc (MSG_NOTE
, vect_location
,
564 "inner-loop def-stmt defining outer-loop stmt.\n");
568 case vect_unused_in_scope
:
569 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
570 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
571 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
574 case vect_used_by_reduction
:
575 case vect_used_only_live
:
576 relevant
= vect_used_in_outer_by_reduction
;
579 case vect_used_in_scope
:
580 relevant
= vect_used_in_outer
;
587 /* We are also not interested in uses on loop PHI backedges that are
588 inductions. Otherwise we'll needlessly vectorize the IV increment
589 and cause hybrid SLP for SLP inductions. Unless the PHI is live
591 else if (gimple_code (stmt
) == GIMPLE_PHI
592 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
593 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
594 && (PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (bb
->loop_father
))
597 if (dump_enabled_p ())
598 dump_printf_loc (MSG_NOTE
, vect_location
,
599 "induction value on backedge.\n");
604 vect_mark_relevant (worklist
, def_stmt
, relevant
, false);
609 /* Function vect_mark_stmts_to_be_vectorized.
611 Not all stmts in the loop need to be vectorized. For example:
620 Stmt 1 and 3 do not need to be vectorized, because loop control and
621 addressing of vectorized data-refs are handled differently.
623 This pass detects such stmts. */
626 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
628 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
629 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
630 unsigned int nbbs
= loop
->num_nodes
;
631 gimple_stmt_iterator si
;
634 stmt_vec_info stmt_vinfo
;
638 enum vect_relevant relevant
;
640 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
642 auto_vec
<gimple
*, 64> worklist
;
644 /* 1. Init worklist. */
645 for (i
= 0; i
< nbbs
; i
++)
648 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
651 if (dump_enabled_p ())
653 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
654 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
657 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
658 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
660 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
662 stmt
= gsi_stmt (si
);
663 if (dump_enabled_p ())
665 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
666 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
669 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
670 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
674 /* 2. Process_worklist */
675 while (worklist
.length () > 0)
680 stmt
= worklist
.pop ();
681 if (dump_enabled_p ())
683 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
684 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
687 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
688 (DEF_STMT) as relevant/irrelevant according to the relevance property
690 stmt_vinfo
= vinfo_for_stmt (stmt
);
691 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
693 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
694 propagated as is to the DEF_STMTs of its USEs.
696 One exception is when STMT has been identified as defining a reduction
697 variable; in this case we set the relevance to vect_used_by_reduction.
698 This is because we distinguish between two kinds of relevant stmts -
699 those that are used by a reduction computation, and those that are
700 (also) used by a regular computation. This allows us later on to
701 identify stmts that are used solely by a reduction, and therefore the
702 order of the results that they produce does not have to be kept. */
704 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
706 case vect_reduction_def
:
707 gcc_assert (relevant
!= vect_unused_in_scope
);
708 if (relevant
!= vect_unused_in_scope
709 && relevant
!= vect_used_in_scope
710 && relevant
!= vect_used_by_reduction
711 && relevant
!= vect_used_only_live
)
713 if (dump_enabled_p ())
714 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
715 "unsupported use of reduction.\n");
720 case vect_nested_cycle
:
721 if (relevant
!= vect_unused_in_scope
722 && relevant
!= vect_used_in_outer_by_reduction
723 && relevant
!= vect_used_in_outer
)
725 if (dump_enabled_p ())
726 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
727 "unsupported use of nested cycle.\n");
733 case vect_double_reduction_def
:
734 if (relevant
!= vect_unused_in_scope
735 && relevant
!= vect_used_by_reduction
736 && relevant
!= vect_used_only_live
)
738 if (dump_enabled_p ())
739 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
740 "unsupported use of double reduction.\n");
750 if (is_pattern_stmt_p (stmt_vinfo
))
752 /* Pattern statements are not inserted into the code, so
753 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
754 have to scan the RHS or function arguments instead. */
755 if (is_gimple_assign (stmt
))
757 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
758 tree op
= gimple_assign_rhs1 (stmt
);
761 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
763 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
764 relevant
, &worklist
, false)
765 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
766 relevant
, &worklist
, false))
770 for (; i
< gimple_num_ops (stmt
); i
++)
772 op
= gimple_op (stmt
, i
);
773 if (TREE_CODE (op
) == SSA_NAME
774 && !process_use (stmt
, op
, loop_vinfo
, relevant
,
779 else if (is_gimple_call (stmt
))
781 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
783 tree arg
= gimple_call_arg (stmt
, i
);
784 if (!process_use (stmt
, arg
, loop_vinfo
, relevant
,
791 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
793 tree op
= USE_FROM_PTR (use_p
);
794 if (!process_use (stmt
, op
, loop_vinfo
, relevant
,
799 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
801 gather_scatter_info gs_info
;
802 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
))
804 if (!process_use (stmt
, gs_info
.offset
, loop_vinfo
, relevant
,
808 } /* while worklist */
813 /* Compute the prologue cost for invariant or constant operands. */
816 vect_prologue_cost_for_slp_op (slp_tree node
, stmt_vec_info stmt_info
,
817 unsigned opno
, enum vect_def_type dt
,
818 stmt_vector_for_cost
*cost_vec
)
820 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
821 tree op
= gimple_op (stmt
, opno
);
822 unsigned prologue_cost
= 0;
824 /* Without looking at the actual initializer a vector of
825 constants can be implemented as load from the constant pool.
826 When all elements are the same we can use a splat. */
827 tree vectype
= get_vectype_for_scalar_type (TREE_TYPE (op
));
828 unsigned group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
829 unsigned num_vects_to_check
;
830 unsigned HOST_WIDE_INT const_nunits
;
832 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
833 && ! multiple_p (const_nunits
, group_size
))
835 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
836 nelt_limit
= const_nunits
;
840 /* If either the vector has variable length or the vectors
841 are composed of repeated whole groups we only need to
842 cost construction once. All vectors will be the same. */
843 num_vects_to_check
= 1;
844 nelt_limit
= group_size
;
846 tree elt
= NULL_TREE
;
848 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
850 unsigned si
= j
% group_size
;
852 elt
= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
], opno
);
853 /* ??? We're just tracking whether all operands of a single
854 vector initializer are the same, ideally we'd check if
855 we emitted the same one already. */
856 else if (elt
!= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
],
860 if (nelt
== nelt_limit
)
862 /* ??? We need to pass down stmt_info for a vector type
863 even if it points to the wrong stmt. */
864 prologue_cost
+= record_stmt_cost
866 dt
== vect_external_def
867 ? (elt
? scalar_to_vec
: vec_construct
)
869 stmt_info
, 0, vect_prologue
);
874 return prologue_cost
;
877 /* Function vect_model_simple_cost.
879 Models cost for simple operations, i.e. those that only emit ncopies of a
880 single op. Right now, this does not account for multiple insns that could
881 be generated for the single vector op. We will handle that shortly. */
884 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
885 enum vect_def_type
*dt
,
888 stmt_vector_for_cost
*cost_vec
)
890 int inside_cost
= 0, prologue_cost
= 0;
892 gcc_assert (cost_vec
!= NULL
);
894 /* ??? Somehow we need to fix this at the callers. */
896 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
900 /* Scan operands and account for prologue cost of constants/externals.
901 ??? This over-estimates cost for multiple uses and should be
903 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
904 tree lhs
= gimple_get_lhs (stmt
);
905 for (unsigned i
= 0; i
< gimple_num_ops (stmt
); ++i
)
907 tree op
= gimple_op (stmt
, i
);
908 enum vect_def_type dt
;
909 if (!op
|| op
== lhs
)
911 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &dt
)
912 && (dt
== vect_constant_def
|| dt
== vect_external_def
))
913 prologue_cost
+= vect_prologue_cost_for_slp_op (node
, stmt_info
,
918 /* Cost the "broadcast" of a scalar operand in to a vector operand.
919 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
921 for (int i
= 0; i
< ndts
; i
++)
922 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
923 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
924 stmt_info
, 0, vect_prologue
);
926 /* Adjust for two-operator SLP nodes. */
927 if (node
&& SLP_TREE_TWO_OPERATORS (node
))
930 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_perm
,
931 stmt_info
, 0, vect_body
);
934 /* Pass the inside-of-loop statements to the target-specific cost model. */
935 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vector_stmt
,
936 stmt_info
, 0, vect_body
);
938 if (dump_enabled_p ())
939 dump_printf_loc (MSG_NOTE
, vect_location
,
940 "vect_model_simple_cost: inside_cost = %d, "
941 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
945 /* Model cost for type demotion and promotion operations. PWR is normally
946 zero for single-step promotions and demotions. It will be one if
947 two-step promotion/demotion is required, and so on. Each additional
948 step doubles the number of instructions required. */
951 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
952 enum vect_def_type
*dt
, int pwr
,
953 stmt_vector_for_cost
*cost_vec
)
956 int inside_cost
= 0, prologue_cost
= 0;
958 for (i
= 0; i
< pwr
+ 1; i
++)
960 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
962 inside_cost
+= record_stmt_cost (cost_vec
, vect_pow2 (tmp
),
963 vec_promote_demote
, stmt_info
, 0,
967 /* FORNOW: Assuming maximum 2 args per stmts. */
968 for (i
= 0; i
< 2; i
++)
969 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
970 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
971 stmt_info
, 0, vect_prologue
);
973 if (dump_enabled_p ())
974 dump_printf_loc (MSG_NOTE
, vect_location
,
975 "vect_model_promotion_demotion_cost: inside_cost = %d, "
976 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
979 /* Function vect_model_store_cost
981 Models cost for stores. In the case of grouped accesses, one access
982 has the overhead of the grouped access attributed to it. */
985 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
986 enum vect_def_type dt
,
987 vect_memory_access_type memory_access_type
,
988 vec_load_store_type vls_type
, slp_tree slp_node
,
989 stmt_vector_for_cost
*cost_vec
)
991 unsigned int inside_cost
= 0, prologue_cost
= 0;
992 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
993 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
995 /* ??? Somehow we need to fix this at the callers. */
997 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
999 if (vls_type
== VLS_STORE_INVARIANT
)
1002 prologue_cost
+= vect_prologue_cost_for_slp_op (slp_node
, stmt_info
,
1005 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
1006 stmt_info
, 0, vect_prologue
);
1009 /* Grouped stores update all elements in the group at once,
1010 so we want the DR for the first statement. */
1011 if (!slp_node
&& grouped_access_p
)
1012 first_stmt
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1014 /* True if we should include any once-per-group costs as well as
1015 the cost of the statement itself. For SLP we only get called
1016 once per group anyhow. */
1017 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
1019 /* We assume that the cost of a single store-lanes instruction is
1020 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1021 access is instead being provided by a permute-and-store operation,
1022 include the cost of the permutes. */
1024 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1026 /* Uses a high and low interleave or shuffle operations for each
1028 int group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1029 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1030 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1031 stmt_info
, 0, vect_body
);
1033 if (dump_enabled_p ())
1034 dump_printf_loc (MSG_NOTE
, vect_location
,
1035 "vect_model_store_cost: strided group_size = %d .\n",
1039 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1040 /* Costs of the stores. */
1041 if (memory_access_type
== VMAT_ELEMENTWISE
1042 || memory_access_type
== VMAT_GATHER_SCATTER
)
1044 /* N scalar stores plus extracting the elements. */
1045 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1046 inside_cost
+= record_stmt_cost (cost_vec
,
1047 ncopies
* assumed_nunits
,
1048 scalar_store
, stmt_info
, 0, vect_body
);
1051 vect_get_store_cost (stmt_info
, ncopies
, &inside_cost
, cost_vec
);
1053 if (memory_access_type
== VMAT_ELEMENTWISE
1054 || memory_access_type
== VMAT_STRIDED_SLP
)
1056 /* N scalar stores plus extracting the elements. */
1057 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1058 inside_cost
+= record_stmt_cost (cost_vec
,
1059 ncopies
* assumed_nunits
,
1060 vec_to_scalar
, stmt_info
, 0, vect_body
);
1063 if (dump_enabled_p ())
1064 dump_printf_loc (MSG_NOTE
, vect_location
,
1065 "vect_model_store_cost: inside_cost = %d, "
1066 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1070 /* Calculate cost of DR's memory access. */
1072 vect_get_store_cost (stmt_vec_info stmt_info
, int ncopies
,
1073 unsigned int *inside_cost
,
1074 stmt_vector_for_cost
*body_cost_vec
)
1076 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1077 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1079 switch (alignment_support_scheme
)
1083 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1084 vector_store
, stmt_info
, 0,
1087 if (dump_enabled_p ())
1088 dump_printf_loc (MSG_NOTE
, vect_location
,
1089 "vect_model_store_cost: aligned.\n");
1093 case dr_unaligned_supported
:
1095 /* Here, we assign an additional cost for the unaligned store. */
1096 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1097 unaligned_store
, stmt_info
,
1098 DR_MISALIGNMENT (dr
), vect_body
);
1099 if (dump_enabled_p ())
1100 dump_printf_loc (MSG_NOTE
, vect_location
,
1101 "vect_model_store_cost: unaligned supported by "
1106 case dr_unaligned_unsupported
:
1108 *inside_cost
= VECT_MAX_COST
;
1110 if (dump_enabled_p ())
1111 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1112 "vect_model_store_cost: unsupported access.\n");
1122 /* Function vect_model_load_cost
1124 Models cost for loads. In the case of grouped accesses, one access has
1125 the overhead of the grouped access attributed to it. Since unaligned
1126 accesses are supported for loads, we also account for the costs of the
1127 access scheme chosen. */
1130 vect_model_load_cost (stmt_vec_info stmt_info
, unsigned ncopies
,
1131 vect_memory_access_type memory_access_type
,
1132 slp_instance instance
,
1134 stmt_vector_for_cost
*cost_vec
)
1136 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
1137 unsigned int inside_cost
= 0, prologue_cost
= 0;
1138 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1140 gcc_assert (cost_vec
);
1142 /* ??? Somehow we need to fix this at the callers. */
1144 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1146 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1148 /* If the load is permuted then the alignment is determined by
1149 the first group element not by the first scalar stmt DR. */
1150 gimple
*stmt
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1151 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1152 /* Record the cost for the permutation. */
1154 unsigned assumed_nunits
1155 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info
));
1156 unsigned slp_vf
= (ncopies
* assumed_nunits
) / instance
->group_size
;
1157 vect_transform_slp_perm_load (slp_node
, vNULL
, NULL
,
1158 slp_vf
, instance
, true,
1160 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1161 stmt_info
, 0, vect_body
);
1162 /* And adjust the number of loads performed. This handles
1163 redundancies as well as loads that are later dead. */
1164 auto_sbitmap
perm (DR_GROUP_SIZE (stmt_info
));
1165 bitmap_clear (perm
);
1166 for (unsigned i
= 0;
1167 i
< SLP_TREE_LOAD_PERMUTATION (slp_node
).length (); ++i
)
1168 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (slp_node
)[i
]);
1170 bool load_seen
= false;
1171 for (unsigned i
= 0; i
< DR_GROUP_SIZE (stmt_info
); ++i
)
1173 if (i
% assumed_nunits
== 0)
1179 if (bitmap_bit_p (perm
, i
))
1185 <= (DR_GROUP_SIZE (stmt_info
) - DR_GROUP_GAP (stmt_info
)
1186 + assumed_nunits
- 1) / assumed_nunits
);
1189 /* Grouped loads read all elements in the group at once,
1190 so we want the DR for the first statement. */
1191 if (!slp_node
&& grouped_access_p
)
1192 first_stmt
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1194 /* True if we should include any once-per-group costs as well as
1195 the cost of the statement itself. For SLP we only get called
1196 once per group anyhow. */
1197 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
1199 /* We assume that the cost of a single load-lanes instruction is
1200 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1201 access is instead being provided by a load-and-permute operation,
1202 include the cost of the permutes. */
1204 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1206 /* Uses an even and odd extract operations or shuffle operations
1207 for each needed permute. */
1208 int group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1209 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1210 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1211 stmt_info
, 0, vect_body
);
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_NOTE
, vect_location
,
1215 "vect_model_load_cost: strided group_size = %d .\n",
1219 /* The loads themselves. */
1220 if (memory_access_type
== VMAT_ELEMENTWISE
1221 || memory_access_type
== VMAT_GATHER_SCATTER
)
1223 /* N scalar loads plus gathering them into a vector. */
1224 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1225 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1226 inside_cost
+= record_stmt_cost (cost_vec
,
1227 ncopies
* assumed_nunits
,
1228 scalar_load
, stmt_info
, 0, vect_body
);
1231 vect_get_load_cost (stmt_info
, ncopies
, first_stmt_p
,
1232 &inside_cost
, &prologue_cost
,
1233 cost_vec
, cost_vec
, true);
1234 if (memory_access_type
== VMAT_ELEMENTWISE
1235 || memory_access_type
== VMAT_STRIDED_SLP
)
1236 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1237 stmt_info
, 0, vect_body
);
1239 if (dump_enabled_p ())
1240 dump_printf_loc (MSG_NOTE
, vect_location
,
1241 "vect_model_load_cost: inside_cost = %d, "
1242 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1246 /* Calculate cost of DR's memory access. */
1248 vect_get_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1249 bool add_realign_cost
, unsigned int *inside_cost
,
1250 unsigned int *prologue_cost
,
1251 stmt_vector_for_cost
*prologue_cost_vec
,
1252 stmt_vector_for_cost
*body_cost_vec
,
1253 bool record_prologue_costs
)
1255 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1256 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1258 switch (alignment_support_scheme
)
1262 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1263 stmt_info
, 0, vect_body
);
1265 if (dump_enabled_p ())
1266 dump_printf_loc (MSG_NOTE
, vect_location
,
1267 "vect_model_load_cost: aligned.\n");
1271 case dr_unaligned_supported
:
1273 /* Here, we assign an additional cost for the unaligned load. */
1274 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1275 unaligned_load
, stmt_info
,
1276 DR_MISALIGNMENT (dr
), vect_body
);
1278 if (dump_enabled_p ())
1279 dump_printf_loc (MSG_NOTE
, vect_location
,
1280 "vect_model_load_cost: unaligned supported by "
1285 case dr_explicit_realign
:
1287 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1288 vector_load
, stmt_info
, 0, vect_body
);
1289 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1290 vec_perm
, stmt_info
, 0, vect_body
);
1292 /* FIXME: If the misalignment remains fixed across the iterations of
1293 the containing loop, the following cost should be added to the
1295 if (targetm
.vectorize
.builtin_mask_for_load
)
1296 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1297 stmt_info
, 0, vect_body
);
1299 if (dump_enabled_p ())
1300 dump_printf_loc (MSG_NOTE
, vect_location
,
1301 "vect_model_load_cost: explicit realign\n");
1305 case dr_explicit_realign_optimized
:
1307 if (dump_enabled_p ())
1308 dump_printf_loc (MSG_NOTE
, vect_location
,
1309 "vect_model_load_cost: unaligned software "
1312 /* Unaligned software pipeline has a load of an address, an initial
1313 load, and possibly a mask operation to "prime" the loop. However,
1314 if this is an access in a group of loads, which provide grouped
1315 access, then the above cost should only be considered for one
1316 access in the group. Inside the loop, there is a load op
1317 and a realignment op. */
1319 if (add_realign_cost
&& record_prologue_costs
)
1321 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1322 vector_stmt
, stmt_info
,
1324 if (targetm
.vectorize
.builtin_mask_for_load
)
1325 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1326 vector_stmt
, stmt_info
,
1330 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1331 stmt_info
, 0, vect_body
);
1332 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1333 stmt_info
, 0, vect_body
);
1335 if (dump_enabled_p ())
1336 dump_printf_loc (MSG_NOTE
, vect_location
,
1337 "vect_model_load_cost: explicit realign optimized"
1343 case dr_unaligned_unsupported
:
1345 *inside_cost
= VECT_MAX_COST
;
1347 if (dump_enabled_p ())
1348 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1349 "vect_model_load_cost: unsupported access.\n");
1358 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1359 the loop preheader for the vectorized stmt STMT. */
1362 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1365 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1368 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1369 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1373 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1377 if (nested_in_vect_loop_p (loop
, stmt
))
1380 pe
= loop_preheader_edge (loop
);
1381 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1382 gcc_assert (!new_bb
);
1386 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1388 gimple_stmt_iterator gsi_bb_start
;
1390 gcc_assert (bb_vinfo
);
1391 bb
= BB_VINFO_BB (bb_vinfo
);
1392 gsi_bb_start
= gsi_after_labels (bb
);
1393 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1397 if (dump_enabled_p ())
1399 dump_printf_loc (MSG_NOTE
, vect_location
,
1400 "created new init_stmt: ");
1401 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1405 /* Function vect_init_vector.
1407 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1408 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1409 vector type a vector with all elements equal to VAL is created first.
1410 Place the initialization at BSI if it is not NULL. Otherwise, place the
1411 initialization at the loop preheader.
1412 Return the DEF of INIT_STMT.
1413 It will be used in the vectorization of STMT. */
1416 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1421 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1422 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1424 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1425 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1427 /* Scalar boolean value should be transformed into
1428 all zeros or all ones value before building a vector. */
1429 if (VECTOR_BOOLEAN_TYPE_P (type
))
1431 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1432 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1434 if (CONSTANT_CLASS_P (val
))
1435 val
= integer_zerop (val
) ? false_val
: true_val
;
1438 new_temp
= make_ssa_name (TREE_TYPE (type
));
1439 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1440 val
, true_val
, false_val
);
1441 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1445 else if (CONSTANT_CLASS_P (val
))
1446 val
= fold_convert (TREE_TYPE (type
), val
);
1449 new_temp
= make_ssa_name (TREE_TYPE (type
));
1450 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1451 init_stmt
= gimple_build_assign (new_temp
,
1452 fold_build1 (VIEW_CONVERT_EXPR
,
1456 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1457 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1461 val
= build_vector_from_val (type
, val
);
1464 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1465 init_stmt
= gimple_build_assign (new_temp
, val
);
1466 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1470 /* Function vect_get_vec_def_for_operand_1.
1472 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1473 DT that will be used in the vectorized stmt. */
1476 vect_get_vec_def_for_operand_1 (gimple
*def_stmt
, enum vect_def_type dt
)
1480 stmt_vec_info def_stmt_info
= NULL
;
1484 /* operand is a constant or a loop invariant. */
1485 case vect_constant_def
:
1486 case vect_external_def
:
1487 /* Code should use vect_get_vec_def_for_operand. */
1490 /* operand is defined inside the loop. */
1491 case vect_internal_def
:
1493 /* Get the def from the vectorized stmt. */
1494 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1496 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1497 /* Get vectorized pattern statement. */
1499 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1500 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1501 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1502 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1503 gcc_assert (vec_stmt
);
1504 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1505 vec_oprnd
= PHI_RESULT (vec_stmt
);
1506 else if (is_gimple_call (vec_stmt
))
1507 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1509 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1513 /* operand is defined by a loop header phi. */
1514 case vect_reduction_def
:
1515 case vect_double_reduction_def
:
1516 case vect_nested_cycle
:
1517 case vect_induction_def
:
1519 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1521 /* Get the def from the vectorized stmt. */
1522 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1523 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1524 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1525 vec_oprnd
= PHI_RESULT (vec_stmt
);
1527 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1537 /* Function vect_get_vec_def_for_operand.
1539 OP is an operand in STMT. This function returns a (vector) def that will be
1540 used in the vectorized stmt for STMT.
1542 In the case that OP is an SSA_NAME which is defined in the loop, then
1543 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1545 In case OP is an invariant or constant, a new stmt that creates a vector def
1546 needs to be introduced. VECTYPE may be used to specify a required type for
1547 vector invariant. */
1550 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1553 enum vect_def_type dt
;
1555 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1556 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1558 if (dump_enabled_p ())
1560 dump_printf_loc (MSG_NOTE
, vect_location
,
1561 "vect_get_vec_def_for_operand: ");
1562 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1563 dump_printf (MSG_NOTE
, "\n");
1566 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
, &def_stmt
);
1567 gcc_assert (is_simple_use
);
1568 if (def_stmt
&& dump_enabled_p ())
1570 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1571 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1574 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1576 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1580 vector_type
= vectype
;
1581 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1582 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1583 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1585 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1587 gcc_assert (vector_type
);
1588 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1591 return vect_get_vec_def_for_operand_1 (def_stmt
, dt
);
1595 /* Function vect_get_vec_def_for_stmt_copy
1597 Return a vector-def for an operand. This function is used when the
1598 vectorized stmt to be created (by the caller to this function) is a "copy"
1599 created in case the vectorized result cannot fit in one vector, and several
1600 copies of the vector-stmt are required. In this case the vector-def is
1601 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1602 of the stmt that defines VEC_OPRND.
1603 DT is the type of the vector def VEC_OPRND.
1606 In case the vectorization factor (VF) is bigger than the number
1607 of elements that can fit in a vectype (nunits), we have to generate
1608 more than one vector stmt to vectorize the scalar stmt. This situation
1609 arises when there are multiple data-types operated upon in the loop; the
1610 smallest data-type determines the VF, and as a result, when vectorizing
1611 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1612 vector stmt (each computing a vector of 'nunits' results, and together
1613 computing 'VF' results in each iteration). This function is called when
1614 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1615 which VF=16 and nunits=4, so the number of copies required is 4):
1617 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1619 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1620 VS1.1: vx.1 = memref1 VS1.2
1621 VS1.2: vx.2 = memref2 VS1.3
1622 VS1.3: vx.3 = memref3
1624 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1625 VSnew.1: vz1 = vx.1 + ... VSnew.2
1626 VSnew.2: vz2 = vx.2 + ... VSnew.3
1627 VSnew.3: vz3 = vx.3 + ...
1629 The vectorization of S1 is explained in vectorizable_load.
1630 The vectorization of S2:
1631 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1632 the function 'vect_get_vec_def_for_operand' is called to
1633 get the relevant vector-def for each operand of S2. For operand x it
1634 returns the vector-def 'vx.0'.
1636 To create the remaining copies of the vector-stmt (VSnew.j), this
1637 function is called to get the relevant vector-def for each operand. It is
1638 obtained from the respective VS1.j stmt, which is recorded in the
1639 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1641 For example, to obtain the vector-def 'vx.1' in order to create the
1642 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1643 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1644 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1645 and return its def ('vx.1').
1646 Overall, to create the above sequence this function will be called 3 times:
1647 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1648 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1649 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1652 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1654 gimple
*vec_stmt_for_operand
;
1655 stmt_vec_info def_stmt_info
;
1657 /* Do nothing; can reuse same def. */
1658 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1661 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1662 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1663 gcc_assert (def_stmt_info
);
1664 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1665 gcc_assert (vec_stmt_for_operand
);
1666 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1667 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1669 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1674 /* Get vectorized definitions for the operands to create a copy of an original
1675 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1678 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1679 vec
<tree
> *vec_oprnds0
,
1680 vec
<tree
> *vec_oprnds1
)
1682 tree vec_oprnd
= vec_oprnds0
->pop ();
1684 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1685 vec_oprnds0
->quick_push (vec_oprnd
);
1687 if (vec_oprnds1
&& vec_oprnds1
->length ())
1689 vec_oprnd
= vec_oprnds1
->pop ();
1690 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1691 vec_oprnds1
->quick_push (vec_oprnd
);
1696 /* Get vectorized definitions for OP0 and OP1. */
1699 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1700 vec
<tree
> *vec_oprnds0
,
1701 vec
<tree
> *vec_oprnds1
,
1706 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1707 auto_vec
<tree
> ops (nops
);
1708 auto_vec
<vec
<tree
> > vec_defs (nops
);
1710 ops
.quick_push (op0
);
1712 ops
.quick_push (op1
);
1714 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
1716 *vec_oprnds0
= vec_defs
[0];
1718 *vec_oprnds1
= vec_defs
[1];
1724 vec_oprnds0
->create (1);
1725 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1726 vec_oprnds0
->quick_push (vec_oprnd
);
1730 vec_oprnds1
->create (1);
1731 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1732 vec_oprnds1
->quick_push (vec_oprnd
);
1737 /* Helper function called by vect_finish_replace_stmt and
1738 vect_finish_stmt_generation. Set the location of the new
1739 statement and create a stmt_vec_info for it. */
1742 vect_finish_stmt_generation_1 (gimple
*stmt
, gimple
*vec_stmt
)
1744 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1745 vec_info
*vinfo
= stmt_info
->vinfo
;
1747 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1749 if (dump_enabled_p ())
1751 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1752 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1755 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1757 /* While EH edges will generally prevent vectorization, stmt might
1758 e.g. be in a must-not-throw region. Ensure newly created stmts
1759 that could throw are part of the same region. */
1760 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1761 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1762 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1765 /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1766 which sets the same scalar result as STMT did. */
1769 vect_finish_replace_stmt (gimple
*stmt
, gimple
*vec_stmt
)
1771 gcc_assert (gimple_get_lhs (stmt
) == gimple_get_lhs (vec_stmt
));
1773 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1774 gsi_replace (&gsi
, vec_stmt
, false);
1776 vect_finish_stmt_generation_1 (stmt
, vec_stmt
);
1779 /* Function vect_finish_stmt_generation.
1781 Insert a new stmt. */
1784 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1785 gimple_stmt_iterator
*gsi
)
1787 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1789 if (!gsi_end_p (*gsi
)
1790 && gimple_has_mem_ops (vec_stmt
))
1792 gimple
*at_stmt
= gsi_stmt (*gsi
);
1793 tree vuse
= gimple_vuse (at_stmt
);
1794 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1796 tree vdef
= gimple_vdef (at_stmt
);
1797 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1798 /* If we have an SSA vuse and insert a store, update virtual
1799 SSA form to avoid triggering the renamer. Do so only
1800 if we can easily see all uses - which is what almost always
1801 happens with the way vectorized stmts are inserted. */
1802 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1803 && ((is_gimple_assign (vec_stmt
)
1804 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1805 || (is_gimple_call (vec_stmt
)
1806 && !(gimple_call_flags (vec_stmt
)
1807 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1809 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1810 gimple_set_vdef (vec_stmt
, new_vdef
);
1811 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1815 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1816 vect_finish_stmt_generation_1 (stmt
, vec_stmt
);
1819 /* We want to vectorize a call to combined function CFN with function
1820 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1821 as the types of all inputs. Check whether this is possible using
1822 an internal function, returning its code if so or IFN_LAST if not. */
1825 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1826 tree vectype_out
, tree vectype_in
)
1829 if (internal_fn_p (cfn
))
1830 ifn
= as_internal_fn (cfn
);
1832 ifn
= associated_internal_fn (fndecl
);
1833 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1835 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1836 if (info
.vectorizable
)
1838 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1839 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1840 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1841 OPTIMIZE_FOR_SPEED
))
1849 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1850 gimple_stmt_iterator
*);
1852 /* Check whether a load or store statement in the loop described by
1853 LOOP_VINFO is possible in a fully-masked loop. This is testing
1854 whether the vectorizer pass has the appropriate support, as well as
1855 whether the target does.
1857 VLS_TYPE says whether the statement is a load or store and VECTYPE
1858 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1859 says how the load or store is going to be implemented and GROUP_SIZE
1860 is the number of load or store statements in the containing group.
1861 If the access is a gather load or scatter store, GS_INFO describes
1864 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1865 supported, otherwise record the required mask types. */
1868 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1869 vec_load_store_type vls_type
, int group_size
,
1870 vect_memory_access_type memory_access_type
,
1871 gather_scatter_info
*gs_info
)
1873 /* Invariant loads need no special support. */
1874 if (memory_access_type
== VMAT_INVARIANT
)
1877 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1878 machine_mode vecmode
= TYPE_MODE (vectype
);
1879 bool is_load
= (vls_type
== VLS_LOAD
);
1880 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1883 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1884 : !vect_store_lanes_supported (vectype
, group_size
, true))
1886 if (dump_enabled_p ())
1887 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1888 "can't use a fully-masked loop because the"
1889 " target doesn't have an appropriate masked"
1890 " load/store-lanes instruction.\n");
1891 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1894 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1895 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1899 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1901 internal_fn ifn
= (is_load
1902 ? IFN_MASK_GATHER_LOAD
1903 : IFN_MASK_SCATTER_STORE
);
1904 tree offset_type
= TREE_TYPE (gs_info
->offset
);
1905 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1906 gs_info
->memory_type
,
1907 TYPE_SIGN (offset_type
),
1910 if (dump_enabled_p ())
1911 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1912 "can't use a fully-masked loop because the"
1913 " target doesn't have an appropriate masked"
1914 " gather load or scatter store instruction.\n");
1915 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1918 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1919 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1923 if (memory_access_type
!= VMAT_CONTIGUOUS
1924 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1926 /* Element X of the data must come from iteration i * VF + X of the
1927 scalar loop. We need more work to support other mappings. */
1928 if (dump_enabled_p ())
1929 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1930 "can't use a fully-masked loop because an access"
1931 " isn't contiguous.\n");
1932 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1936 machine_mode mask_mode
;
1937 if (!(targetm
.vectorize
.get_mask_mode
1938 (GET_MODE_NUNITS (vecmode
),
1939 GET_MODE_SIZE (vecmode
)).exists (&mask_mode
))
1940 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1942 if (dump_enabled_p ())
1943 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1944 "can't use a fully-masked loop because the target"
1945 " doesn't have the appropriate masked load or"
1947 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1950 /* We might load more scalars than we need for permuting SLP loads.
1951 We checked in get_group_load_store_type that the extra elements
1952 don't leak into a new vector. */
1953 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1954 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1955 unsigned int nvectors
;
1956 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1957 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
);
1962 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1963 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1964 that needs to be applied to all loads and stores in a vectorized loop.
1965 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1967 MASK_TYPE is the type of both masks. If new statements are needed,
1968 insert them before GSI. */
1971 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1972 gimple_stmt_iterator
*gsi
)
1974 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1978 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1979 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1980 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1981 vec_mask
, loop_mask
);
1982 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1986 /* Determine whether we can use a gather load or scatter store to vectorize
1987 strided load or store STMT by truncating the current offset to a smaller
1988 width. We need to be able to construct an offset vector:
1990 { 0, X, X*2, X*3, ... }
1992 without loss of precision, where X is STMT's DR_STEP.
1994 Return true if this is possible, describing the gather load or scatter
1995 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1998 vect_truncate_gather_scatter_offset (gimple
*stmt
, loop_vec_info loop_vinfo
,
2000 gather_scatter_info
*gs_info
)
2002 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2003 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2004 tree step
= DR_STEP (dr
);
2005 if (TREE_CODE (step
) != INTEGER_CST
)
2007 /* ??? Perhaps we could use range information here? */
2008 if (dump_enabled_p ())
2009 dump_printf_loc (MSG_NOTE
, vect_location
,
2010 "cannot truncate variable step.\n");
2014 /* Get the number of bits in an element. */
2015 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2016 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
2017 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
2019 /* Set COUNT to the upper limit on the number of elements - 1.
2020 Start with the maximum vectorization factor. */
2021 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
2023 /* Try lowering COUNT to the number of scalar latch iterations. */
2024 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2025 widest_int max_iters
;
2026 if (max_loop_iterations (loop
, &max_iters
)
2027 && max_iters
< count
)
2028 count
= max_iters
.to_shwi ();
2030 /* Try scales of 1 and the element size. */
2031 int scales
[] = { 1, vect_get_scalar_dr_size (dr
) };
2032 wi::overflow_type overflow
= wi::OVF_NONE
;
2033 for (int i
= 0; i
< 2; ++i
)
2035 int scale
= scales
[i
];
2037 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
2040 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2041 in OFFSET_BITS bits. */
2042 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
2045 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
2046 if (wi::min_precision (range
, sign
) > element_bits
)
2048 overflow
= wi::OVF_UNKNOWN
;
2052 /* See whether the target supports the operation. */
2053 tree memory_type
= TREE_TYPE (DR_REF (dr
));
2054 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr
), masked_p
, vectype
,
2055 memory_type
, element_bits
, sign
, scale
,
2056 &gs_info
->ifn
, &gs_info
->element_type
))
2059 tree offset_type
= build_nonstandard_integer_type (element_bits
,
2062 gs_info
->decl
= NULL_TREE
;
2063 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2064 but we don't need to store that here. */
2065 gs_info
->base
= NULL_TREE
;
2066 gs_info
->offset
= fold_convert (offset_type
, step
);
2067 gs_info
->offset_dt
= vect_constant_def
;
2068 gs_info
->offset_vectype
= NULL_TREE
;
2069 gs_info
->scale
= scale
;
2070 gs_info
->memory_type
= memory_type
;
2074 if (overflow
&& dump_enabled_p ())
2075 dump_printf_loc (MSG_NOTE
, vect_location
,
2076 "truncating gather/scatter offset to %d bits"
2077 " might change its value.\n", element_bits
);
2082 /* Return true if we can use gather/scatter internal functions to
2083 vectorize STMT, which is a grouped or strided load or store.
2084 MASKED_P is true if load or store is conditional. When returning
2085 true, fill in GS_INFO with the information required to perform the
2089 vect_use_strided_gather_scatters_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
2091 gather_scatter_info
*gs_info
)
2093 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, gs_info
)
2095 return vect_truncate_gather_scatter_offset (stmt
, loop_vinfo
,
2098 scalar_mode element_mode
= SCALAR_TYPE_MODE (gs_info
->element_type
);
2099 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
2100 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2101 unsigned int offset_bits
= TYPE_PRECISION (offset_type
);
2103 /* Enforced by vect_check_gather_scatter. */
2104 gcc_assert (element_bits
>= offset_bits
);
2106 /* If the elements are wider than the offset, convert the offset to the
2107 same width, without changing its sign. */
2108 if (element_bits
> offset_bits
)
2110 bool unsigned_p
= TYPE_UNSIGNED (offset_type
);
2111 offset_type
= build_nonstandard_integer_type (element_bits
, unsigned_p
);
2112 gs_info
->offset
= fold_convert (offset_type
, gs_info
->offset
);
2115 if (dump_enabled_p ())
2116 dump_printf_loc (MSG_NOTE
, vect_location
,
2117 "using gather/scatter for strided/grouped access,"
2118 " scale = %d\n", gs_info
->scale
);
2123 /* STMT is a non-strided load or store, meaning that it accesses
2124 elements with a known constant step. Return -1 if that step
2125 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2128 compare_step_with_zero (gimple
*stmt
)
2130 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2131 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2132 return tree_int_cst_compare (vect_dr_behavior (dr
)->step
,
2136 /* If the target supports a permute mask that reverses the elements in
2137 a vector of type VECTYPE, return that mask, otherwise return null. */
2140 perm_mask_for_reverse (tree vectype
)
2142 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2144 /* The encoding has a single stepped pattern. */
2145 vec_perm_builder
sel (nunits
, 1, 3);
2146 for (int i
= 0; i
< 3; ++i
)
2147 sel
.quick_push (nunits
- 1 - i
);
2149 vec_perm_indices
indices (sel
, 1, nunits
);
2150 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
2152 return vect_gen_perm_mask_checked (vectype
, indices
);
2155 /* STMT is either a masked or unconditional store. Return the value
2159 vect_get_store_rhs (gimple
*stmt
)
2161 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt
))
2163 gcc_assert (gimple_assign_single_p (assign
));
2164 return gimple_assign_rhs1 (assign
);
2166 if (gcall
*call
= dyn_cast
<gcall
*> (stmt
))
2168 internal_fn ifn
= gimple_call_internal_fn (call
);
2169 int index
= internal_fn_stored_value_index (ifn
);
2170 gcc_assert (index
>= 0);
2171 return gimple_call_arg (stmt
, index
);
2176 /* A subroutine of get_load_store_type, with a subset of the same
2177 arguments. Handle the case where STMT is part of a grouped load
2180 For stores, the statements in the group are all consecutive
2181 and there is no gap at the end. For loads, the statements in the
2182 group might not be consecutive; there can be gaps between statements
2183 as well as at the end. */
2186 get_group_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
2187 bool masked_p
, vec_load_store_type vls_type
,
2188 vect_memory_access_type
*memory_access_type
,
2189 gather_scatter_info
*gs_info
)
2191 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2192 vec_info
*vinfo
= stmt_info
->vinfo
;
2193 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2194 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2195 gimple
*first_stmt
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2196 data_reference
*first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
2197 unsigned int group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
2198 bool single_element_p
= (stmt
== first_stmt
2199 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2200 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (vinfo_for_stmt (first_stmt
));
2201 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2203 /* True if the vectorized statements would access beyond the last
2204 statement in the group. */
2205 bool overrun_p
= false;
2207 /* True if we can cope with such overrun by peeling for gaps, so that
2208 there is at least one final scalar iteration after the vector loop. */
2209 bool can_overrun_p
= (!masked_p
2210 && vls_type
== VLS_LOAD
2214 /* There can only be a gap at the end of the group if the stride is
2215 known at compile time. */
2216 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info
) || gap
== 0);
2218 /* Stores can't yet have gaps. */
2219 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
2223 if (STMT_VINFO_STRIDED_P (stmt_info
))
2225 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2226 separated by the stride, until we have a complete vector.
2227 Fall back to scalar accesses if that isn't possible. */
2228 if (multiple_p (nunits
, group_size
))
2229 *memory_access_type
= VMAT_STRIDED_SLP
;
2231 *memory_access_type
= VMAT_ELEMENTWISE
;
2235 overrun_p
= loop_vinfo
&& gap
!= 0;
2236 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2238 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2239 "Grouped store with gaps requires"
2240 " non-consecutive accesses\n");
2243 /* An overrun is fine if the trailing elements are smaller
2244 than the alignment boundary B. Every vector access will
2245 be a multiple of B and so we are guaranteed to access a
2246 non-gap element in the same B-sized block. */
2248 && gap
< (vect_known_alignment_in_bytes (first_dr
)
2249 / vect_get_scalar_dr_size (first_dr
)))
2251 if (overrun_p
&& !can_overrun_p
)
2253 if (dump_enabled_p ())
2254 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2255 "Peeling for outer loop is not supported\n");
2258 *memory_access_type
= VMAT_CONTIGUOUS
;
2263 /* We can always handle this case using elementwise accesses,
2264 but see if something more efficient is available. */
2265 *memory_access_type
= VMAT_ELEMENTWISE
;
2267 /* If there is a gap at the end of the group then these optimizations
2268 would access excess elements in the last iteration. */
2269 bool would_overrun_p
= (gap
!= 0);
2270 /* An overrun is fine if the trailing elements are smaller than the
2271 alignment boundary B. Every vector access will be a multiple of B
2272 and so we are guaranteed to access a non-gap element in the
2273 same B-sized block. */
2276 && gap
< (vect_known_alignment_in_bytes (first_dr
)
2277 / vect_get_scalar_dr_size (first_dr
)))
2278 would_overrun_p
= false;
2280 if (!STMT_VINFO_STRIDED_P (stmt_info
)
2281 && (can_overrun_p
|| !would_overrun_p
)
2282 && compare_step_with_zero (stmt
) > 0)
2284 /* First cope with the degenerate case of a single-element
2286 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2287 *memory_access_type
= VMAT_CONTIGUOUS
;
2289 /* Otherwise try using LOAD/STORE_LANES. */
2290 if (*memory_access_type
== VMAT_ELEMENTWISE
2291 && (vls_type
== VLS_LOAD
2292 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2293 : vect_store_lanes_supported (vectype
, group_size
,
2296 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2297 overrun_p
= would_overrun_p
;
2300 /* If that fails, try using permuting loads. */
2301 if (*memory_access_type
== VMAT_ELEMENTWISE
2302 && (vls_type
== VLS_LOAD
2303 ? vect_grouped_load_supported (vectype
, single_element_p
,
2305 : vect_grouped_store_supported (vectype
, group_size
)))
2307 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2308 overrun_p
= would_overrun_p
;
2312 /* As a last resort, trying using a gather load or scatter store.
2314 ??? Although the code can handle all group sizes correctly,
2315 it probably isn't a win to use separate strided accesses based
2316 on nearby locations. Or, even if it's a win over scalar code,
2317 it might not be a win over vectorizing at a lower VF, if that
2318 allows us to use contiguous accesses. */
2319 if (*memory_access_type
== VMAT_ELEMENTWISE
2322 && vect_use_strided_gather_scatters_p (stmt
, loop_vinfo
,
2324 *memory_access_type
= VMAT_GATHER_SCATTER
;
2327 if (vls_type
!= VLS_LOAD
&& first_stmt
== stmt
)
2329 /* STMT is the leader of the group. Check the operands of all the
2330 stmts of the group. */
2331 gimple
*next_stmt
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2334 tree op
= vect_get_store_rhs (next_stmt
);
2335 enum vect_def_type dt
;
2336 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2338 if (dump_enabled_p ())
2339 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2340 "use not simple.\n");
2343 next_stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
2349 gcc_assert (can_overrun_p
);
2350 if (dump_enabled_p ())
2351 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2352 "Data access with gaps requires scalar "
2354 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2360 /* A subroutine of get_load_store_type, with a subset of the same
2361 arguments. Handle the case where STMT is a load or store that
2362 accesses consecutive elements with a negative step. */
2364 static vect_memory_access_type
2365 get_negative_load_store_type (gimple
*stmt
, tree vectype
,
2366 vec_load_store_type vls_type
,
2367 unsigned int ncopies
)
2369 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2370 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2371 dr_alignment_support alignment_support_scheme
;
2375 if (dump_enabled_p ())
2376 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2377 "multiple types with negative step.\n");
2378 return VMAT_ELEMENTWISE
;
2381 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
2382 if (alignment_support_scheme
!= dr_aligned
2383 && alignment_support_scheme
!= dr_unaligned_supported
)
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2387 "negative step but alignment required.\n");
2388 return VMAT_ELEMENTWISE
;
2391 if (vls_type
== VLS_STORE_INVARIANT
)
2393 if (dump_enabled_p ())
2394 dump_printf_loc (MSG_NOTE
, vect_location
,
2395 "negative step with invariant source;"
2396 " no permute needed.\n");
2397 return VMAT_CONTIGUOUS_DOWN
;
2400 if (!perm_mask_for_reverse (vectype
))
2402 if (dump_enabled_p ())
2403 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2404 "negative step and reversing not supported.\n");
2405 return VMAT_ELEMENTWISE
;
2408 return VMAT_CONTIGUOUS_REVERSE
;
2411 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
2412 if there is a memory access type that the vectorized form can use,
2413 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2414 or scatters, fill in GS_INFO accordingly.
2416 SLP says whether we're performing SLP rather than loop vectorization.
2417 MASKED_P is true if the statement is conditional on a vectorized mask.
2418 VECTYPE is the vector type that the vectorized statements will use.
2419 NCOPIES is the number of vector statements that will be needed. */
2422 get_load_store_type (gimple
*stmt
, tree vectype
, bool slp
, bool masked_p
,
2423 vec_load_store_type vls_type
, unsigned int ncopies
,
2424 vect_memory_access_type
*memory_access_type
,
2425 gather_scatter_info
*gs_info
)
2427 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2428 vec_info
*vinfo
= stmt_info
->vinfo
;
2429 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2430 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2431 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2433 *memory_access_type
= VMAT_GATHER_SCATTER
;
2434 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, gs_info
))
2436 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2437 &gs_info
->offset_dt
,
2438 &gs_info
->offset_vectype
))
2440 if (dump_enabled_p ())
2441 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2442 "%s index use not simple.\n",
2443 vls_type
== VLS_LOAD
? "gather" : "scatter");
2447 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2449 if (!get_group_load_store_type (stmt
, vectype
, slp
, masked_p
, vls_type
,
2450 memory_access_type
, gs_info
))
2453 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2457 && vect_use_strided_gather_scatters_p (stmt
, loop_vinfo
,
2459 *memory_access_type
= VMAT_GATHER_SCATTER
;
2461 *memory_access_type
= VMAT_ELEMENTWISE
;
2465 int cmp
= compare_step_with_zero (stmt
);
2467 *memory_access_type
= get_negative_load_store_type
2468 (stmt
, vectype
, vls_type
, ncopies
);
2471 gcc_assert (vls_type
== VLS_LOAD
);
2472 *memory_access_type
= VMAT_INVARIANT
;
2475 *memory_access_type
= VMAT_CONTIGUOUS
;
2478 if ((*memory_access_type
== VMAT_ELEMENTWISE
2479 || *memory_access_type
== VMAT_STRIDED_SLP
)
2480 && !nunits
.is_constant ())
2482 if (dump_enabled_p ())
2483 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2484 "Not using elementwise accesses due to variable "
2485 "vectorization factor.\n");
2489 /* FIXME: At the moment the cost model seems to underestimate the
2490 cost of using elementwise accesses. This check preserves the
2491 traditional behavior until that can be fixed. */
2492 if (*memory_access_type
== VMAT_ELEMENTWISE
2493 && !STMT_VINFO_STRIDED_P (stmt_info
)
2494 && !(stmt
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2495 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2496 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2498 if (dump_enabled_p ())
2499 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2500 "not falling back to elementwise accesses\n");
2506 /* Return true if boolean argument MASK is suitable for vectorizing
2507 conditional load or store STMT. When returning true, store the type
2508 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2509 in *MASK_VECTYPE_OUT. */
2512 vect_check_load_store_mask (gimple
*stmt
, tree mask
,
2513 vect_def_type
*mask_dt_out
,
2514 tree
*mask_vectype_out
)
2516 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2518 if (dump_enabled_p ())
2519 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2520 "mask argument is not a boolean.\n");
2524 if (TREE_CODE (mask
) != SSA_NAME
)
2526 if (dump_enabled_p ())
2527 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2528 "mask argument is not an SSA name.\n");
2532 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2533 enum vect_def_type mask_dt
;
2535 if (!vect_is_simple_use (mask
, stmt_info
->vinfo
, &mask_dt
, &mask_vectype
))
2537 if (dump_enabled_p ())
2538 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2539 "mask use not simple.\n");
2543 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2545 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2547 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2549 if (dump_enabled_p ())
2550 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2551 "could not find an appropriate vector mask type.\n");
2555 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2556 TYPE_VECTOR_SUBPARTS (vectype
)))
2558 if (dump_enabled_p ())
2560 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2561 "vector mask type ");
2562 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, mask_vectype
);
2563 dump_printf (MSG_MISSED_OPTIMIZATION
,
2564 " does not match vector data type ");
2565 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, vectype
);
2566 dump_printf (MSG_MISSED_OPTIMIZATION
, ".\n");
2571 *mask_dt_out
= mask_dt
;
2572 *mask_vectype_out
= mask_vectype
;
2576 /* Return true if stored value RHS is suitable for vectorizing store
2577 statement STMT. When returning true, store the type of the
2578 definition in *RHS_DT_OUT, the type of the vectorized store value in
2579 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2582 vect_check_store_rhs (gimple
*stmt
, tree rhs
, vect_def_type
*rhs_dt_out
,
2583 tree
*rhs_vectype_out
, vec_load_store_type
*vls_type_out
)
2585 /* In the case this is a store from a constant make sure
2586 native_encode_expr can handle it. */
2587 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2589 if (dump_enabled_p ())
2590 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2591 "cannot encode constant as a byte sequence.\n");
2595 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2596 enum vect_def_type rhs_dt
;
2598 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &rhs_dt
, &rhs_vectype
))
2600 if (dump_enabled_p ())
2601 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2602 "use not simple.\n");
2606 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2607 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2609 if (dump_enabled_p ())
2610 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2611 "incompatible vector types.\n");
2615 *rhs_dt_out
= rhs_dt
;
2616 *rhs_vectype_out
= rhs_vectype
;
2617 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2618 *vls_type_out
= VLS_STORE_INVARIANT
;
2620 *vls_type_out
= VLS_STORE
;
2624 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2625 Note that we support masks with floating-point type, in which case the
2626 floats are interpreted as a bitmask. */
2629 vect_build_all_ones_mask (gimple
*stmt
, tree masktype
)
2631 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2632 return build_int_cst (masktype
, -1);
2633 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2635 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2636 mask
= build_vector_from_val (masktype
, mask
);
2637 return vect_init_vector (stmt
, mask
, masktype
, NULL
);
2639 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2643 for (int j
= 0; j
< 6; ++j
)
2645 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2646 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2647 mask
= build_vector_from_val (masktype
, mask
);
2648 return vect_init_vector (stmt
, mask
, masktype
, NULL
);
2653 /* Build an all-zero merge value of type VECTYPE while vectorizing
2654 STMT as a gather load. */
2657 vect_build_zero_merge_argument (gimple
*stmt
, tree vectype
)
2660 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2661 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2662 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2666 for (int j
= 0; j
< 6; ++j
)
2668 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2669 merge
= build_real (TREE_TYPE (vectype
), r
);
2673 merge
= build_vector_from_val (vectype
, merge
);
2674 return vect_init_vector (stmt
, merge
, vectype
, NULL
);
2677 /* Build a gather load call while vectorizing STMT. Insert new instructions
2678 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2679 operation. If the load is conditional, MASK is the unvectorized
2680 condition and MASK_DT is its definition type, otherwise MASK is null. */
2683 vect_build_gather_load_calls (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2684 gimple
**vec_stmt
, gather_scatter_info
*gs_info
,
2685 tree mask
, vect_def_type mask_dt
)
2687 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2688 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2689 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2690 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2691 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2692 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2693 edge pe
= loop_preheader_edge (loop
);
2694 enum { NARROW
, NONE
, WIDEN
} modifier
;
2695 poly_uint64 gather_off_nunits
2696 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2698 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2699 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2700 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2701 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2702 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2703 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2704 tree scaletype
= TREE_VALUE (arglist
);
2705 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2706 && (!mask
|| types_compatible_p (srctype
, masktype
)));
2708 tree perm_mask
= NULL_TREE
;
2709 tree mask_perm_mask
= NULL_TREE
;
2710 if (known_eq (nunits
, gather_off_nunits
))
2712 else if (known_eq (nunits
* 2, gather_off_nunits
))
2716 /* Currently widening gathers and scatters are only supported for
2717 fixed-length vectors. */
2718 int count
= gather_off_nunits
.to_constant ();
2719 vec_perm_builder
sel (count
, count
, 1);
2720 for (int i
= 0; i
< count
; ++i
)
2721 sel
.quick_push (i
| (count
/ 2));
2723 vec_perm_indices
indices (sel
, 1, count
);
2724 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2727 else if (known_eq (nunits
, gather_off_nunits
* 2))
2731 /* Currently narrowing gathers and scatters are only supported for
2732 fixed-length vectors. */
2733 int count
= nunits
.to_constant ();
2734 vec_perm_builder
sel (count
, count
, 1);
2735 sel
.quick_grow (count
);
2736 for (int i
= 0; i
< count
; ++i
)
2737 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2738 vec_perm_indices
indices (sel
, 2, count
);
2739 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2745 for (int i
= 0; i
< count
; ++i
)
2746 sel
[i
] = i
| (count
/ 2);
2747 indices
.new_vector (sel
, 2, count
);
2748 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2754 tree vec_dest
= vect_create_destination_var (gimple_get_lhs (stmt
),
2757 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2758 if (!is_gimple_min_invariant (ptr
))
2761 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2762 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2763 gcc_assert (!new_bb
);
2766 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2768 tree vec_oprnd0
= NULL_TREE
;
2769 tree vec_mask
= NULL_TREE
;
2770 tree src_op
= NULL_TREE
;
2771 tree mask_op
= NULL_TREE
;
2772 tree prev_res
= NULL_TREE
;
2773 stmt_vec_info prev_stmt_info
= NULL
;
2777 src_op
= vect_build_zero_merge_argument (stmt
, rettype
);
2778 mask_op
= vect_build_all_ones_mask (stmt
, masktype
);
2781 for (int j
= 0; j
< ncopies
; ++j
)
2785 if (modifier
== WIDEN
&& (j
& 1))
2786 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2787 perm_mask
, stmt
, gsi
);
2790 = vect_get_vec_def_for_operand (gs_info
->offset
, stmt
);
2793 = vect_get_vec_def_for_stmt_copy (gs_info
->offset_dt
, vec_oprnd0
);
2795 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2797 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2798 TYPE_VECTOR_SUBPARTS (idxtype
)));
2799 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2800 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2801 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2802 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2808 if (mask_perm_mask
&& (j
& 1))
2809 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2810 mask_perm_mask
, stmt
, gsi
);
2814 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2816 vec_mask
= vect_get_vec_def_for_stmt_copy (mask_dt
, vec_mask
);
2819 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2822 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
)),
2823 TYPE_VECTOR_SUBPARTS (masktype
)));
2824 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2825 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2826 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
,
2828 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2835 new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2838 if (!useless_type_conversion_p (vectype
, rettype
))
2840 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2841 TYPE_VECTOR_SUBPARTS (rettype
)));
2842 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2843 gimple_call_set_lhs (new_stmt
, op
);
2844 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2845 var
= make_ssa_name (vec_dest
);
2846 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2847 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2851 var
= make_ssa_name (vec_dest
, new_stmt
);
2852 gimple_call_set_lhs (new_stmt
, var
);
2855 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2857 if (modifier
== NARROW
)
2864 var
= permute_vec_elements (prev_res
, var
, perm_mask
, stmt
, gsi
);
2865 new_stmt
= SSA_NAME_DEF_STMT (var
);
2868 if (prev_stmt_info
== NULL
)
2869 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2871 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2872 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2876 /* Prepare the base and offset in GS_INFO for vectorization.
2877 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2878 to the vectorized offset argument for the first copy of STMT. STMT
2879 is the statement described by GS_INFO and LOOP is the containing loop. */
2882 vect_get_gather_scatter_ops (struct loop
*loop
, gimple
*stmt
,
2883 gather_scatter_info
*gs_info
,
2884 tree
*dataref_ptr
, tree
*vec_offset
)
2886 gimple_seq stmts
= NULL
;
2887 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2891 edge pe
= loop_preheader_edge (loop
);
2892 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2893 gcc_assert (!new_bb
);
2895 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2896 tree offset_vectype
= get_vectype_for_scalar_type (offset_type
);
2897 *vec_offset
= vect_get_vec_def_for_operand (gs_info
->offset
, stmt
,
2901 /* Prepare to implement a grouped or strided load or store using
2902 the gather load or scatter store operation described by GS_INFO.
2903 STMT is the load or store statement.
2905 Set *DATAREF_BUMP to the amount that should be added to the base
2906 address after each copy of the vectorized statement. Set *VEC_OFFSET
2907 to an invariant offset vector in which element I has the value
2908 I * DR_STEP / SCALE. */
2911 vect_get_strided_load_store_ops (gimple
*stmt
, loop_vec_info loop_vinfo
,
2912 gather_scatter_info
*gs_info
,
2913 tree
*dataref_bump
, tree
*vec_offset
)
2915 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2916 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2917 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2918 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2921 tree bump
= size_binop (MULT_EXPR
,
2922 fold_convert (sizetype
, DR_STEP (dr
)),
2923 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2924 *dataref_bump
= force_gimple_operand (bump
, &stmts
, true, NULL_TREE
);
2926 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2928 /* The offset given in GS_INFO can have pointer type, so use the element
2929 type of the vector instead. */
2930 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2931 tree offset_vectype
= get_vectype_for_scalar_type (offset_type
);
2932 offset_type
= TREE_TYPE (offset_vectype
);
2934 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2935 tree step
= size_binop (EXACT_DIV_EXPR
, DR_STEP (dr
),
2936 ssize_int (gs_info
->scale
));
2937 step
= fold_convert (offset_type
, step
);
2938 step
= force_gimple_operand (step
, &stmts
, true, NULL_TREE
);
2940 /* Create {0, X, X*2, X*3, ...}. */
2941 *vec_offset
= gimple_build (&stmts
, VEC_SERIES_EXPR
, offset_vectype
,
2942 build_zero_cst (offset_type
), step
);
2944 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2947 /* Return the amount that should be added to a vector pointer to move
2948 to the next or previous copy of AGGR_TYPE. DR is the data reference
2949 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2953 vect_get_data_ptr_increment (data_reference
*dr
, tree aggr_type
,
2954 vect_memory_access_type memory_access_type
)
2956 if (memory_access_type
== VMAT_INVARIANT
)
2957 return size_zero_node
;
2959 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
2960 tree step
= vect_dr_behavior (dr
)->step
;
2961 if (tree_int_cst_sgn (step
) == -1)
2962 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
2966 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2969 vectorizable_bswap (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2970 gimple
**vec_stmt
, slp_tree slp_node
,
2971 tree vectype_in
, enum vect_def_type
*dt
,
2972 stmt_vector_for_cost
*cost_vec
)
2975 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2976 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2978 unsigned HOST_WIDE_INT nunits
, num_bytes
;
2980 op
= gimple_call_arg (stmt
, 0);
2981 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2983 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
2986 /* Multiple types in SLP are handled by creating the appropriate number of
2987 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2992 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2994 gcc_assert (ncopies
>= 1);
2996 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3000 if (!TYPE_VECTOR_SUBPARTS (char_vectype
).is_constant (&num_bytes
))
3003 unsigned word_bytes
= num_bytes
/ nunits
;
3005 /* The encoding uses one stepped pattern for each byte in the word. */
3006 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3007 for (unsigned i
= 0; i
< 3; ++i
)
3008 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3009 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3011 vec_perm_indices
indices (elts
, 1, num_bytes
);
3012 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
3017 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3018 DUMP_VECT_SCOPE ("vectorizable_bswap");
3021 record_stmt_cost (cost_vec
,
3022 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3023 record_stmt_cost (cost_vec
,
3024 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
3029 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3032 vec
<tree
> vec_oprnds
= vNULL
;
3033 gimple
*new_stmt
= NULL
;
3034 stmt_vec_info prev_stmt_info
= NULL
;
3035 for (unsigned j
= 0; j
< ncopies
; j
++)
3039 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
3041 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
3043 /* Arguments are ready. create the new vector stmt. */
3046 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3048 tree tem
= make_ssa_name (char_vectype
);
3049 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3050 char_vectype
, vop
));
3051 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3052 tree tem2
= make_ssa_name (char_vectype
);
3053 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3054 tem
, tem
, bswap_vconst
);
3055 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3056 tem
= make_ssa_name (vectype
);
3057 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3059 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3061 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3068 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3070 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3072 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3075 vec_oprnds
.release ();
3079 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3080 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3081 in a single step. On success, store the binary pack code in
3085 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3086 tree_code
*convert_code
)
3088 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3089 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3093 int multi_step_cvt
= 0;
3094 auto_vec
<tree
, 8> interm_types
;
3095 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3096 &code
, &multi_step_cvt
,
3101 *convert_code
= code
;
3105 /* Function vectorizable_call.
3107 Check if GS performs a function call that can be vectorized.
3108 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3109 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3110 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3113 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
3114 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
3120 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3121 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
3122 tree vectype_out
, vectype_in
;
3123 poly_uint64 nunits_in
;
3124 poly_uint64 nunits_out
;
3125 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3126 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3127 vec_info
*vinfo
= stmt_info
->vinfo
;
3128 tree fndecl
, new_temp
, rhs_type
;
3129 enum vect_def_type dt
[3]
3130 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
3132 gimple
*new_stmt
= NULL
;
3134 vec
<tree
> vargs
= vNULL
;
3135 enum { NARROW
, NONE
, WIDEN
} modifier
;
3139 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3142 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3146 /* Is GS a vectorizable call? */
3147 stmt
= dyn_cast
<gcall
*> (gs
);
3151 if (gimple_call_internal_p (stmt
)
3152 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3153 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3154 /* Handled by vectorizable_load and vectorizable_store. */
3157 if (gimple_call_lhs (stmt
) == NULL_TREE
3158 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3161 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3163 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3165 /* Process function arguments. */
3166 rhs_type
= NULL_TREE
;
3167 vectype_in
= NULL_TREE
;
3168 nargs
= gimple_call_num_args (stmt
);
3170 /* Bail out if the function has more than three arguments, we do not have
3171 interesting builtin functions to vectorize with more than two arguments
3172 except for fma. No arguments is also not good. */
3173 if (nargs
== 0 || nargs
> 3)
3176 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3177 if (gimple_call_internal_p (stmt
)
3178 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
3181 rhs_type
= unsigned_type_node
;
3184 for (i
= 0; i
< nargs
; i
++)
3188 op
= gimple_call_arg (stmt
, i
);
3190 /* We can only handle calls with arguments of the same type. */
3192 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3194 if (dump_enabled_p ())
3195 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3196 "argument types differ.\n");
3200 rhs_type
= TREE_TYPE (op
);
3202 if (!vect_is_simple_use (op
, vinfo
, &dt
[i
], &opvectype
))
3204 if (dump_enabled_p ())
3205 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3206 "use not simple.\n");
3211 vectype_in
= opvectype
;
3213 && opvectype
!= vectype_in
)
3215 if (dump_enabled_p ())
3216 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3217 "argument vector types differ.\n");
3221 /* If all arguments are external or constant defs use a vector type with
3222 the same size as the output vector type. */
3224 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3226 gcc_assert (vectype_in
);
3229 if (dump_enabled_p ())
3231 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3232 "no vectype for scalar type ");
3233 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3234 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3241 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3242 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3243 if (known_eq (nunits_in
* 2, nunits_out
))
3245 else if (known_eq (nunits_out
, nunits_in
))
3247 else if (known_eq (nunits_out
* 2, nunits_in
))
3252 /* We only handle functions that do not read or clobber memory. */
3253 if (gimple_vuse (stmt
))
3255 if (dump_enabled_p ())
3256 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3257 "function reads from or writes to memory.\n");
3261 /* For now, we only vectorize functions if a target specific builtin
3262 is available. TODO -- in some cases, it might be profitable to
3263 insert the calls for pieces of the vector, in order to be able
3264 to vectorize other operations in the loop. */
3266 internal_fn ifn
= IFN_LAST
;
3267 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3268 tree callee
= gimple_call_fndecl (stmt
);
3270 /* First try using an internal function. */
3271 tree_code convert_code
= ERROR_MARK
;
3273 && (modifier
== NONE
3274 || (modifier
== NARROW
3275 && simple_integer_narrowing (vectype_out
, vectype_in
,
3277 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3280 /* If that fails, try asking for a target-specific built-in function. */
3281 if (ifn
== IFN_LAST
)
3283 if (cfn
!= CFN_LAST
)
3284 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3285 (cfn
, vectype_out
, vectype_in
);
3287 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3288 (callee
, vectype_out
, vectype_in
);
3291 if (ifn
== IFN_LAST
&& !fndecl
)
3293 if (cfn
== CFN_GOMP_SIMD_LANE
3296 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3297 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3298 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3299 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3301 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3302 { 0, 1, 2, ... vf - 1 } vector. */
3303 gcc_assert (nargs
== 0);
3305 else if (modifier
== NONE
3306 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3307 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3308 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
3309 return vectorizable_bswap (stmt
, gsi
, vec_stmt
, slp_node
,
3310 vectype_in
, dt
, cost_vec
);
3313 if (dump_enabled_p ())
3314 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3315 "function is not vectorizable.\n");
3322 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3323 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3325 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3327 /* Sanity check: make sure that at least one copy of the vectorized stmt
3328 needs to be generated. */
3329 gcc_assert (ncopies
>= 1);
3331 if (!vec_stmt
) /* transformation not required. */
3333 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3334 DUMP_VECT_SCOPE ("vectorizable_call");
3335 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3336 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3337 record_stmt_cost (cost_vec
, ncopies
/ 2,
3338 vec_promote_demote
, stmt_info
, 0, vect_body
);
3345 if (dump_enabled_p ())
3346 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3349 scalar_dest
= gimple_call_lhs (stmt
);
3350 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3352 prev_stmt_info
= NULL
;
3353 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3355 tree prev_res
= NULL_TREE
;
3356 for (j
= 0; j
< ncopies
; ++j
)
3358 /* Build argument list for the vectorized call. */
3360 vargs
.create (nargs
);
3366 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3367 vec
<tree
> vec_oprnds0
;
3369 for (i
= 0; i
< nargs
; i
++)
3370 vargs
.quick_push (gimple_call_arg (stmt
, i
));
3371 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3372 vec_oprnds0
= vec_defs
[0];
3374 /* Arguments are ready. Create the new vector stmt. */
3375 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3378 for (k
= 0; k
< nargs
; k
++)
3380 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3381 vargs
[k
] = vec_oprndsk
[i
];
3383 if (modifier
== NARROW
)
3385 tree half_res
= make_ssa_name (vectype_in
);
3387 = gimple_build_call_internal_vec (ifn
, vargs
);
3388 gimple_call_set_lhs (call
, half_res
);
3389 gimple_call_set_nothrow (call
, true);
3391 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3394 prev_res
= half_res
;
3397 new_temp
= make_ssa_name (vec_dest
);
3398 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3399 prev_res
, half_res
);
3404 if (ifn
!= IFN_LAST
)
3405 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3407 call
= gimple_build_call_vec (fndecl
, vargs
);
3408 new_temp
= make_ssa_name (vec_dest
, call
);
3409 gimple_call_set_lhs (call
, new_temp
);
3410 gimple_call_set_nothrow (call
, true);
3413 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3414 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3417 for (i
= 0; i
< nargs
; i
++)
3419 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3420 vec_oprndsi
.release ();
3425 for (i
= 0; i
< nargs
; i
++)
3427 op
= gimple_call_arg (stmt
, i
);
3430 = vect_get_vec_def_for_operand (op
, stmt
);
3433 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
3435 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3438 vargs
.quick_push (vec_oprnd0
);
3441 if (gimple_call_internal_p (stmt
)
3442 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
3444 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3446 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3447 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3448 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
3449 new_temp
= make_ssa_name (vec_dest
);
3450 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3452 else if (modifier
== NARROW
)
3454 tree half_res
= make_ssa_name (vectype_in
);
3455 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3456 gimple_call_set_lhs (call
, half_res
);
3457 gimple_call_set_nothrow (call
, true);
3459 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3462 prev_res
= half_res
;
3465 new_temp
= make_ssa_name (vec_dest
);
3466 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3467 prev_res
, half_res
);
3472 if (ifn
!= IFN_LAST
)
3473 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3475 call
= gimple_build_call_vec (fndecl
, vargs
);
3476 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3477 gimple_call_set_lhs (call
, new_temp
);
3478 gimple_call_set_nothrow (call
, true);
3481 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3483 if (j
== (modifier
== NARROW
? 1 : 0))
3484 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3486 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3488 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3491 else if (modifier
== NARROW
)
3493 for (j
= 0; j
< ncopies
; ++j
)
3495 /* Build argument list for the vectorized call. */
3497 vargs
.create (nargs
* 2);
3503 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3504 vec
<tree
> vec_oprnds0
;
3506 for (i
= 0; i
< nargs
; i
++)
3507 vargs
.quick_push (gimple_call_arg (stmt
, i
));
3508 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3509 vec_oprnds0
= vec_defs
[0];
3511 /* Arguments are ready. Create the new vector stmt. */
3512 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3516 for (k
= 0; k
< nargs
; k
++)
3518 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3519 vargs
.quick_push (vec_oprndsk
[i
]);
3520 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3523 if (ifn
!= IFN_LAST
)
3524 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3526 call
= gimple_build_call_vec (fndecl
, vargs
);
3527 new_temp
= make_ssa_name (vec_dest
, call
);
3528 gimple_call_set_lhs (call
, new_temp
);
3529 gimple_call_set_nothrow (call
, true);
3531 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3532 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3535 for (i
= 0; i
< nargs
; i
++)
3537 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3538 vec_oprndsi
.release ();
3543 for (i
= 0; i
< nargs
; i
++)
3545 op
= gimple_call_arg (stmt
, i
);
3549 = vect_get_vec_def_for_operand (op
, stmt
);
3551 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3555 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
3557 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
3559 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3562 vargs
.quick_push (vec_oprnd0
);
3563 vargs
.quick_push (vec_oprnd1
);
3566 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3567 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3568 gimple_call_set_lhs (new_stmt
, new_temp
);
3569 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3572 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3574 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3576 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3579 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3582 /* No current target implements this case. */
3587 /* The call in STMT might prevent it from being removed in dce.
3588 We however cannot remove it here, due to the way the ssa name
3589 it defines is mapped to the new definition. So just replace
3590 rhs of the statement with something harmless. */
3595 if (is_pattern_stmt_p (stmt_info
))
3596 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
3597 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3599 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3600 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3601 set_vinfo_for_stmt (stmt_info
->stmt
, NULL
);
3602 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3603 gsi_replace (gsi
, new_stmt
, false);
3609 struct simd_call_arg_info
3613 HOST_WIDE_INT linear_step
;
3614 enum vect_def_type dt
;
3616 bool simd_lane_linear
;
3619 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3620 is linear within simd lane (but not within whole loop), note it in
3624 vect_simd_lane_linear (tree op
, struct loop
*loop
,
3625 struct simd_call_arg_info
*arginfo
)
3627 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3629 if (!is_gimple_assign (def_stmt
)
3630 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3631 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3634 tree base
= gimple_assign_rhs1 (def_stmt
);
3635 HOST_WIDE_INT linear_step
= 0;
3636 tree v
= gimple_assign_rhs2 (def_stmt
);
3637 while (TREE_CODE (v
) == SSA_NAME
)
3640 def_stmt
= SSA_NAME_DEF_STMT (v
);
3641 if (is_gimple_assign (def_stmt
))
3642 switch (gimple_assign_rhs_code (def_stmt
))
3645 t
= gimple_assign_rhs2 (def_stmt
);
3646 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3648 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3649 v
= gimple_assign_rhs1 (def_stmt
);
3652 t
= gimple_assign_rhs2 (def_stmt
);
3653 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3655 linear_step
= tree_to_shwi (t
);
3656 v
= gimple_assign_rhs1 (def_stmt
);
3659 t
= gimple_assign_rhs1 (def_stmt
);
3660 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3661 || (TYPE_PRECISION (TREE_TYPE (v
))
3662 < TYPE_PRECISION (TREE_TYPE (t
))))
3671 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3673 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3674 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3679 arginfo
->linear_step
= linear_step
;
3681 arginfo
->simd_lane_linear
= true;
3687 /* Return the number of elements in vector type VECTYPE, which is associated
3688 with a SIMD clone. At present these vectors always have a constant
3691 static unsigned HOST_WIDE_INT
3692 simd_clone_subparts (tree vectype
)
3694 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3697 /* Function vectorizable_simd_clone_call.
3699 Check if STMT performs a function call that can be vectorized
3700 by calling a simd clone of the function.
3701 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3702 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3703 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3706 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3707 gimple
**vec_stmt
, slp_tree slp_node
,
3708 stmt_vector_for_cost
*)
3713 tree vec_oprnd0
= NULL_TREE
;
3714 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
3716 unsigned int nunits
;
3717 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3718 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3719 vec_info
*vinfo
= stmt_info
->vinfo
;
3720 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3721 tree fndecl
, new_temp
;
3722 gimple
*new_stmt
= NULL
;
3724 auto_vec
<simd_call_arg_info
> arginfo
;
3725 vec
<tree
> vargs
= vNULL
;
3727 tree lhs
, rtype
, ratype
;
3728 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3730 /* Is STMT a vectorizable call? */
3731 if (!is_gimple_call (stmt
))
3734 fndecl
= gimple_call_fndecl (stmt
);
3735 if (fndecl
== NULL_TREE
)
3738 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3739 if (node
== NULL
|| node
->simd_clones
== NULL
)
3742 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3745 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3749 if (gimple_call_lhs (stmt
)
3750 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3753 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3755 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3757 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
3764 /* Process function arguments. */
3765 nargs
= gimple_call_num_args (stmt
);
3767 /* Bail out if the function has zero arguments. */
3771 arginfo
.reserve (nargs
, true);
3773 for (i
= 0; i
< nargs
; i
++)
3775 simd_call_arg_info thisarginfo
;
3778 thisarginfo
.linear_step
= 0;
3779 thisarginfo
.align
= 0;
3780 thisarginfo
.op
= NULL_TREE
;
3781 thisarginfo
.simd_lane_linear
= false;
3783 op
= gimple_call_arg (stmt
, i
);
3784 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3785 &thisarginfo
.vectype
)
3786 || thisarginfo
.dt
== vect_uninitialized_def
)
3788 if (dump_enabled_p ())
3789 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3790 "use not simple.\n");
3794 if (thisarginfo
.dt
== vect_constant_def
3795 || thisarginfo
.dt
== vect_external_def
)
3796 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3798 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3800 /* For linear arguments, the analyze phase should have saved
3801 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3802 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3803 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3805 gcc_assert (vec_stmt
);
3806 thisarginfo
.linear_step
3807 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3809 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3810 thisarginfo
.simd_lane_linear
3811 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3812 == boolean_true_node
);
3813 /* If loop has been peeled for alignment, we need to adjust it. */
3814 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3815 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3816 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3818 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3819 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3820 tree opt
= TREE_TYPE (thisarginfo
.op
);
3821 bias
= fold_convert (TREE_TYPE (step
), bias
);
3822 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3824 = fold_build2 (POINTER_TYPE_P (opt
)
3825 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3826 thisarginfo
.op
, bias
);
3830 && thisarginfo
.dt
!= vect_constant_def
3831 && thisarginfo
.dt
!= vect_external_def
3833 && TREE_CODE (op
) == SSA_NAME
3834 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3836 && tree_fits_shwi_p (iv
.step
))
3838 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3839 thisarginfo
.op
= iv
.base
;
3841 else if ((thisarginfo
.dt
== vect_constant_def
3842 || thisarginfo
.dt
== vect_external_def
)
3843 && POINTER_TYPE_P (TREE_TYPE (op
)))
3844 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3845 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3847 if (POINTER_TYPE_P (TREE_TYPE (op
))
3848 && !thisarginfo
.linear_step
3850 && thisarginfo
.dt
!= vect_constant_def
3851 && thisarginfo
.dt
!= vect_external_def
3854 && TREE_CODE (op
) == SSA_NAME
)
3855 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3857 arginfo
.quick_push (thisarginfo
);
3860 unsigned HOST_WIDE_INT vf
;
3861 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
3863 if (dump_enabled_p ())
3864 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3865 "not considering SIMD clones; not yet supported"
3866 " for variable-width vectors.\n");
3870 unsigned int badness
= 0;
3871 struct cgraph_node
*bestn
= NULL
;
3872 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3873 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3875 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3876 n
= n
->simdclone
->next_clone
)
3878 unsigned int this_badness
= 0;
3879 if (n
->simdclone
->simdlen
> vf
3880 || n
->simdclone
->nargs
!= nargs
)
3882 if (n
->simdclone
->simdlen
< vf
)
3883 this_badness
+= (exact_log2 (vf
)
3884 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3885 if (n
->simdclone
->inbranch
)
3886 this_badness
+= 2048;
3887 int target_badness
= targetm
.simd_clone
.usable (n
);
3888 if (target_badness
< 0)
3890 this_badness
+= target_badness
* 512;
3891 /* FORNOW: Have to add code to add the mask argument. */
3892 if (n
->simdclone
->inbranch
)
3894 for (i
= 0; i
< nargs
; i
++)
3896 switch (n
->simdclone
->args
[i
].arg_type
)
3898 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3899 if (!useless_type_conversion_p
3900 (n
->simdclone
->args
[i
].orig_type
,
3901 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3903 else if (arginfo
[i
].dt
== vect_constant_def
3904 || arginfo
[i
].dt
== vect_external_def
3905 || arginfo
[i
].linear_step
)
3908 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3909 if (arginfo
[i
].dt
!= vect_constant_def
3910 && arginfo
[i
].dt
!= vect_external_def
)
3913 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3914 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3915 if (arginfo
[i
].dt
== vect_constant_def
3916 || arginfo
[i
].dt
== vect_external_def
3917 || (arginfo
[i
].linear_step
3918 != n
->simdclone
->args
[i
].linear_step
))
3921 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3922 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3923 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3924 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3925 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3926 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3930 case SIMD_CLONE_ARG_TYPE_MASK
:
3933 if (i
== (size_t) -1)
3935 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3940 if (arginfo
[i
].align
)
3941 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3942 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3944 if (i
== (size_t) -1)
3946 if (bestn
== NULL
|| this_badness
< badness
)
3949 badness
= this_badness
;
3956 for (i
= 0; i
< nargs
; i
++)
3957 if ((arginfo
[i
].dt
== vect_constant_def
3958 || arginfo
[i
].dt
== vect_external_def
)
3959 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3962 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3964 if (arginfo
[i
].vectype
== NULL
3965 || (simd_clone_subparts (arginfo
[i
].vectype
)
3966 > bestn
->simdclone
->simdlen
))
3970 fndecl
= bestn
->decl
;
3971 nunits
= bestn
->simdclone
->simdlen
;
3972 ncopies
= vf
/ nunits
;
3974 /* If the function isn't const, only allow it in simd loops where user
3975 has asserted that at least nunits consecutive iterations can be
3976 performed using SIMD instructions. */
3977 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3978 && gimple_vuse (stmt
))
3981 /* Sanity check: make sure that at least one copy of the vectorized stmt
3982 needs to be generated. */
3983 gcc_assert (ncopies
>= 1);
3985 if (!vec_stmt
) /* transformation not required. */
3987 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3988 for (i
= 0; i
< nargs
; i
++)
3989 if ((bestn
->simdclone
->args
[i
].arg_type
3990 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3991 || (bestn
->simdclone
->args
[i
].arg_type
3992 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3994 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3996 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3997 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3998 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3999 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4000 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4001 tree sll
= arginfo
[i
].simd_lane_linear
4002 ? boolean_true_node
: boolean_false_node
;
4003 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4005 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4006 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4007 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4013 if (dump_enabled_p ())
4014 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4017 scalar_dest
= gimple_call_lhs (stmt
);
4018 vec_dest
= NULL_TREE
;
4023 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4024 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4025 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4028 rtype
= TREE_TYPE (ratype
);
4032 prev_stmt_info
= NULL
;
4033 for (j
= 0; j
< ncopies
; ++j
)
4035 /* Build argument list for the vectorized call. */
4037 vargs
.create (nargs
);
4041 for (i
= 0; i
< nargs
; i
++)
4043 unsigned int k
, l
, m
, o
;
4045 op
= gimple_call_arg (stmt
, i
);
4046 switch (bestn
->simdclone
->args
[i
].arg_type
)
4048 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4049 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4050 o
= nunits
/ simd_clone_subparts (atype
);
4051 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4053 if (simd_clone_subparts (atype
)
4054 < simd_clone_subparts (arginfo
[i
].vectype
))
4056 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4057 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4058 / simd_clone_subparts (atype
));
4059 gcc_assert ((k
& (k
- 1)) == 0);
4062 = vect_get_vec_def_for_operand (op
, stmt
);
4065 vec_oprnd0
= arginfo
[i
].op
;
4066 if ((m
& (k
- 1)) == 0)
4068 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
4071 arginfo
[i
].op
= vec_oprnd0
;
4073 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4075 bitsize_int ((m
& (k
- 1)) * prec
));
4077 = gimple_build_assign (make_ssa_name (atype
),
4079 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4080 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4084 k
= (simd_clone_subparts (atype
)
4085 / simd_clone_subparts (arginfo
[i
].vectype
));
4086 gcc_assert ((k
& (k
- 1)) == 0);
4087 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4089 vec_alloc (ctor_elts
, k
);
4092 for (l
= 0; l
< k
; l
++)
4094 if (m
== 0 && l
== 0)
4096 = vect_get_vec_def_for_operand (op
, stmt
);
4099 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
4101 arginfo
[i
].op
= vec_oprnd0
;
4104 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4108 vargs
.safe_push (vec_oprnd0
);
4111 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4113 = gimple_build_assign (make_ssa_name (atype
),
4115 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4116 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4121 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4122 vargs
.safe_push (op
);
4124 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4125 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4130 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
4135 edge pe
= loop_preheader_edge (loop
);
4136 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4137 gcc_assert (!new_bb
);
4139 if (arginfo
[i
].simd_lane_linear
)
4141 vargs
.safe_push (arginfo
[i
].op
);
4144 tree phi_res
= copy_ssa_name (op
);
4145 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4146 set_vinfo_for_stmt (new_phi
,
4147 new_stmt_vec_info (new_phi
, loop_vinfo
));
4148 add_phi_arg (new_phi
, arginfo
[i
].op
,
4149 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4151 = POINTER_TYPE_P (TREE_TYPE (op
))
4152 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4153 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4154 ? sizetype
: TREE_TYPE (op
);
4156 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4158 tree tcst
= wide_int_to_tree (type
, cst
);
4159 tree phi_arg
= copy_ssa_name (op
);
4161 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4162 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4163 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4164 set_vinfo_for_stmt (new_stmt
,
4165 new_stmt_vec_info (new_stmt
, loop_vinfo
));
4166 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4168 arginfo
[i
].op
= phi_res
;
4169 vargs
.safe_push (phi_res
);
4174 = POINTER_TYPE_P (TREE_TYPE (op
))
4175 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4176 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4177 ? sizetype
: TREE_TYPE (op
);
4179 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4181 tree tcst
= wide_int_to_tree (type
, cst
);
4182 new_temp
= make_ssa_name (TREE_TYPE (op
));
4183 new_stmt
= gimple_build_assign (new_temp
, code
,
4184 arginfo
[i
].op
, tcst
);
4185 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4186 vargs
.safe_push (new_temp
);
4189 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4190 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4191 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4192 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4193 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4194 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4200 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
4203 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
4205 new_temp
= create_tmp_var (ratype
);
4206 else if (simd_clone_subparts (vectype
)
4207 == simd_clone_subparts (rtype
))
4208 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4210 new_temp
= make_ssa_name (rtype
, new_stmt
);
4211 gimple_call_set_lhs (new_stmt
, new_temp
);
4213 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4217 if (simd_clone_subparts (vectype
) < nunits
)
4220 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4221 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4222 k
= nunits
/ simd_clone_subparts (vectype
);
4223 gcc_assert ((k
& (k
- 1)) == 0);
4224 for (l
= 0; l
< k
; l
++)
4229 t
= build_fold_addr_expr (new_temp
);
4230 t
= build2 (MEM_REF
, vectype
, t
,
4231 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4234 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4235 bitsize_int (prec
), bitsize_int (l
* prec
));
4237 = gimple_build_assign (make_ssa_name (vectype
), t
);
4238 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4239 if (j
== 0 && l
== 0)
4240 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4242 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4244 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4248 vect_clobber_variable (stmt
, gsi
, new_temp
);
4251 else if (simd_clone_subparts (vectype
) > nunits
)
4253 unsigned int k
= (simd_clone_subparts (vectype
)
4254 / simd_clone_subparts (rtype
));
4255 gcc_assert ((k
& (k
- 1)) == 0);
4256 if ((j
& (k
- 1)) == 0)
4257 vec_alloc (ret_ctor_elts
, k
);
4260 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
4261 for (m
= 0; m
< o
; m
++)
4263 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4264 size_int (m
), NULL_TREE
, NULL_TREE
);
4266 = gimple_build_assign (make_ssa_name (rtype
), tem
);
4267 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4268 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4269 gimple_assign_lhs (new_stmt
));
4271 vect_clobber_variable (stmt
, gsi
, new_temp
);
4274 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4275 if ((j
& (k
- 1)) != k
- 1)
4277 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4279 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4280 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4282 if ((unsigned) j
== k
- 1)
4283 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4285 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4287 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4292 tree t
= build_fold_addr_expr (new_temp
);
4293 t
= build2 (MEM_REF
, vectype
, t
,
4294 build_int_cst (TREE_TYPE (t
), 0));
4296 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
4297 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4298 vect_clobber_variable (stmt
, gsi
, new_temp
);
4303 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4305 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4307 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4312 /* The call in STMT might prevent it from being removed in dce.
4313 We however cannot remove it here, due to the way the ssa name
4314 it defines is mapped to the new definition. So just replace
4315 rhs of the statement with something harmless. */
4322 type
= TREE_TYPE (scalar_dest
);
4323 if (is_pattern_stmt_p (stmt_info
))
4324 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
4326 lhs
= gimple_call_lhs (stmt
);
4327 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4330 new_stmt
= gimple_build_nop ();
4331 set_vinfo_for_stmt (new_stmt
, stmt_info
);
4332 set_vinfo_for_stmt (stmt
, NULL
);
4333 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
4334 gsi_replace (gsi
, new_stmt
, true);
4335 unlink_stmt_vdef (stmt
);
4341 /* Function vect_gen_widened_results_half
4343 Create a vector stmt whose code, type, number of arguments, and result
4344 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4345 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4346 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4347 needs to be created (DECL is a function-decl of a target-builtin).
4348 STMT is the original scalar stmt that we are vectorizing. */
4351 vect_gen_widened_results_half (enum tree_code code
,
4353 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4354 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4360 /* Generate half of the widened result: */
4361 if (code
== CALL_EXPR
)
4363 /* Target specific support */
4364 if (op_type
== binary_op
)
4365 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
4367 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
4368 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4369 gimple_call_set_lhs (new_stmt
, new_temp
);
4373 /* Generic support */
4374 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4375 if (op_type
!= binary_op
)
4377 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4378 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4379 gimple_assign_set_lhs (new_stmt
, new_temp
);
4381 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4387 /* Get vectorized definitions for loop-based vectorization. For the first
4388 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4389 scalar operand), and for the rest we get a copy with
4390 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4391 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4392 The vectors are collected into VEC_OPRNDS. */
4395 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
4396 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
4400 /* Get first vector operand. */
4401 /* All the vector operands except the very first one (that is scalar oprnd)
4403 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4404 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
4406 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
4408 vec_oprnds
->quick_push (vec_oprnd
);
4410 /* Get second vector operand. */
4411 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
4412 vec_oprnds
->quick_push (vec_oprnd
);
4416 /* For conversion in multiple steps, continue to get operands
4419 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
4423 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4424 For multi-step conversions store the resulting vectors and call the function
4428 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
4429 int multi_step_cvt
, gimple
*stmt
,
4431 gimple_stmt_iterator
*gsi
,
4432 slp_tree slp_node
, enum tree_code code
,
4433 stmt_vec_info
*prev_stmt_info
)
4436 tree vop0
, vop1
, new_tmp
, vec_dest
;
4438 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4440 vec_dest
= vec_dsts
.pop ();
4442 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4444 /* Create demotion operation. */
4445 vop0
= (*vec_oprnds
)[i
];
4446 vop1
= (*vec_oprnds
)[i
+ 1];
4447 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4448 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4449 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4450 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4453 /* Store the resulting vector for next recursive call. */
4454 (*vec_oprnds
)[i
/2] = new_tmp
;
4457 /* This is the last step of the conversion sequence. Store the
4458 vectors in SLP_NODE or in vector info of the scalar statement
4459 (or in STMT_VINFO_RELATED_STMT chain). */
4461 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4464 if (!*prev_stmt_info
)
4465 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4467 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
4469 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4474 /* For multi-step demotion operations we first generate demotion operations
4475 from the source type to the intermediate types, and then combine the
4476 results (stored in VEC_OPRNDS) in demotion operation to the destination
4480 /* At each level of recursion we have half of the operands we had at the
4482 vec_oprnds
->truncate ((i
+1)/2);
4483 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
4484 stmt
, vec_dsts
, gsi
, slp_node
,
4485 VEC_PACK_TRUNC_EXPR
,
4489 vec_dsts
.quick_push (vec_dest
);
4493 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4494 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4495 the resulting vectors and call the function recursively. */
4498 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4499 vec
<tree
> *vec_oprnds1
,
4500 gimple
*stmt
, tree vec_dest
,
4501 gimple_stmt_iterator
*gsi
,
4502 enum tree_code code1
,
4503 enum tree_code code2
, tree decl1
,
4504 tree decl2
, int op_type
)
4507 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4508 gimple
*new_stmt1
, *new_stmt2
;
4509 vec
<tree
> vec_tmp
= vNULL
;
4511 vec_tmp
.create (vec_oprnds0
->length () * 2);
4512 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4514 if (op_type
== binary_op
)
4515 vop1
= (*vec_oprnds1
)[i
];
4519 /* Generate the two halves of promotion operation. */
4520 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4521 op_type
, vec_dest
, gsi
, stmt
);
4522 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4523 op_type
, vec_dest
, gsi
, stmt
);
4524 if (is_gimple_call (new_stmt1
))
4526 new_tmp1
= gimple_call_lhs (new_stmt1
);
4527 new_tmp2
= gimple_call_lhs (new_stmt2
);
4531 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4532 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4535 /* Store the results for the next step. */
4536 vec_tmp
.quick_push (new_tmp1
);
4537 vec_tmp
.quick_push (new_tmp2
);
4540 vec_oprnds0
->release ();
4541 *vec_oprnds0
= vec_tmp
;
4545 /* Check if STMT performs a conversion operation, that can be vectorized.
4546 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4547 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4548 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4551 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4552 gimple
**vec_stmt
, slp_tree slp_node
,
4553 stmt_vector_for_cost
*cost_vec
)
4557 tree op0
, op1
= NULL_TREE
;
4558 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4559 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4560 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4561 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4562 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4563 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4565 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4567 gimple
*new_stmt
= NULL
;
4568 stmt_vec_info prev_stmt_info
;
4569 poly_uint64 nunits_in
;
4570 poly_uint64 nunits_out
;
4571 tree vectype_out
, vectype_in
;
4573 tree lhs_type
, rhs_type
;
4574 enum { NARROW
, NONE
, WIDEN
} modifier
;
4575 vec
<tree
> vec_oprnds0
= vNULL
;
4576 vec
<tree
> vec_oprnds1
= vNULL
;
4578 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4579 vec_info
*vinfo
= stmt_info
->vinfo
;
4580 int multi_step_cvt
= 0;
4581 vec
<tree
> interm_types
= vNULL
;
4582 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4584 unsigned short fltsz
;
4586 /* Is STMT a vectorizable conversion? */
4588 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4591 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4595 if (!is_gimple_assign (stmt
))
4598 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4601 code
= gimple_assign_rhs_code (stmt
);
4602 if (!CONVERT_EXPR_CODE_P (code
)
4603 && code
!= FIX_TRUNC_EXPR
4604 && code
!= FLOAT_EXPR
4605 && code
!= WIDEN_MULT_EXPR
4606 && code
!= WIDEN_LSHIFT_EXPR
)
4609 op_type
= TREE_CODE_LENGTH (code
);
4611 /* Check types of lhs and rhs. */
4612 scalar_dest
= gimple_assign_lhs (stmt
);
4613 lhs_type
= TREE_TYPE (scalar_dest
);
4614 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4616 op0
= gimple_assign_rhs1 (stmt
);
4617 rhs_type
= TREE_TYPE (op0
);
4619 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4620 && !((INTEGRAL_TYPE_P (lhs_type
)
4621 && INTEGRAL_TYPE_P (rhs_type
))
4622 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4623 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4626 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4627 && ((INTEGRAL_TYPE_P (lhs_type
)
4628 && !type_has_mode_precision_p (lhs_type
))
4629 || (INTEGRAL_TYPE_P (rhs_type
)
4630 && !type_has_mode_precision_p (rhs_type
))))
4632 if (dump_enabled_p ())
4633 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4634 "type conversion to/from bit-precision unsupported."
4639 /* Check the operands of the operation. */
4640 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype_in
))
4642 if (dump_enabled_p ())
4643 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4644 "use not simple.\n");
4647 if (op_type
== binary_op
)
4651 op1
= gimple_assign_rhs2 (stmt
);
4652 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4653 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4655 if (CONSTANT_CLASS_P (op0
))
4656 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1], &vectype_in
);
4658 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1]);
4662 if (dump_enabled_p ())
4663 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4664 "use not simple.\n");
4669 /* If op0 is an external or constant defs use a vector type of
4670 the same size as the output vector type. */
4672 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4674 gcc_assert (vectype_in
);
4677 if (dump_enabled_p ())
4679 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4680 "no vectype for scalar type ");
4681 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4682 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4688 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4689 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4691 if (dump_enabled_p ())
4693 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4694 "can't convert between boolean and non "
4696 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4697 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4703 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4704 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4705 if (known_eq (nunits_out
, nunits_in
))
4707 else if (multiple_p (nunits_out
, nunits_in
))
4711 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4715 /* Multiple types in SLP are handled by creating the appropriate number of
4716 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4720 else if (modifier
== NARROW
)
4721 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4723 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4725 /* Sanity check: make sure that at least one copy of the vectorized stmt
4726 needs to be generated. */
4727 gcc_assert (ncopies
>= 1);
4729 bool found_mode
= false;
4730 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4731 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4732 opt_scalar_mode rhs_mode_iter
;
4734 /* Supportable by target? */
4738 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4740 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4745 if (dump_enabled_p ())
4746 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4747 "conversion not supported by target.\n");
4751 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
4752 &code1
, &code2
, &multi_step_cvt
,
4755 /* Binary widening operation can only be supported directly by the
4757 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4761 if (code
!= FLOAT_EXPR
4762 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4765 fltsz
= GET_MODE_SIZE (lhs_mode
);
4766 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4768 rhs_mode
= rhs_mode_iter
.require ();
4769 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4773 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4774 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4775 if (cvt_type
== NULL_TREE
)
4778 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4780 if (!supportable_convert_operation (code
, vectype_out
,
4781 cvt_type
, &decl1
, &codecvt1
))
4784 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
4785 cvt_type
, &codecvt1
,
4786 &codecvt2
, &multi_step_cvt
,
4790 gcc_assert (multi_step_cvt
== 0);
4792 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
4793 vectype_in
, &code1
, &code2
,
4794 &multi_step_cvt
, &interm_types
))
4804 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4805 codecvt2
= ERROR_MARK
;
4809 interm_types
.safe_push (cvt_type
);
4810 cvt_type
= NULL_TREE
;
4815 gcc_assert (op_type
== unary_op
);
4816 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4817 &code1
, &multi_step_cvt
,
4821 if (code
!= FIX_TRUNC_EXPR
4822 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4826 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4827 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4828 if (cvt_type
== NULL_TREE
)
4830 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4833 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4834 &code1
, &multi_step_cvt
,
4843 if (!vec_stmt
) /* transformation not required. */
4845 DUMP_VECT_SCOPE ("vectorizable_conversion");
4846 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4848 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4849 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
,
4852 else if (modifier
== NARROW
)
4854 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4855 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
,
4860 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4861 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
,
4864 interm_types
.release ();
4869 if (dump_enabled_p ())
4870 dump_printf_loc (MSG_NOTE
, vect_location
,
4871 "transform conversion. ncopies = %d.\n", ncopies
);
4873 if (op_type
== binary_op
)
4875 if (CONSTANT_CLASS_P (op0
))
4876 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4877 else if (CONSTANT_CLASS_P (op1
))
4878 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4881 /* In case of multi-step conversion, we first generate conversion operations
4882 to the intermediate types, and then from that types to the final one.
4883 We create vector destinations for the intermediate type (TYPES) received
4884 from supportable_*_operation, and store them in the correct order
4885 for future use in vect_create_vectorized_*_stmts (). */
4886 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4887 vec_dest
= vect_create_destination_var (scalar_dest
,
4888 (cvt_type
&& modifier
== WIDEN
)
4889 ? cvt_type
: vectype_out
);
4890 vec_dsts
.quick_push (vec_dest
);
4894 for (i
= interm_types
.length () - 1;
4895 interm_types
.iterate (i
, &intermediate_type
); i
--)
4897 vec_dest
= vect_create_destination_var (scalar_dest
,
4899 vec_dsts
.quick_push (vec_dest
);
4904 vec_dest
= vect_create_destination_var (scalar_dest
,
4906 ? vectype_out
: cvt_type
);
4910 if (modifier
== WIDEN
)
4912 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
4913 if (op_type
== binary_op
)
4914 vec_oprnds1
.create (1);
4916 else if (modifier
== NARROW
)
4917 vec_oprnds0
.create (
4918 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4920 else if (code
== WIDEN_LSHIFT_EXPR
)
4921 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4924 prev_stmt_info
= NULL
;
4928 for (j
= 0; j
< ncopies
; j
++)
4931 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
);
4933 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
4935 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4937 /* Arguments are ready, create the new vector stmt. */
4938 if (code1
== CALL_EXPR
)
4940 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4941 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4942 gimple_call_set_lhs (new_stmt
, new_temp
);
4946 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4947 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4948 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4949 gimple_assign_set_lhs (new_stmt
, new_temp
);
4952 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4954 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4957 if (!prev_stmt_info
)
4958 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4960 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4961 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4968 /* In case the vectorization factor (VF) is bigger than the number
4969 of elements that we can fit in a vectype (nunits), we have to
4970 generate more than one vector stmt - i.e - we need to "unroll"
4971 the vector stmt by a factor VF/nunits. */
4972 for (j
= 0; j
< ncopies
; j
++)
4979 if (code
== WIDEN_LSHIFT_EXPR
)
4984 /* Store vec_oprnd1 for every vector stmt to be created
4985 for SLP_NODE. We check during the analysis that all
4986 the shift arguments are the same. */
4987 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4988 vec_oprnds1
.quick_push (vec_oprnd1
);
4990 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4994 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4995 &vec_oprnds1
, slp_node
);
4999 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
5000 vec_oprnds0
.quick_push (vec_oprnd0
);
5001 if (op_type
== binary_op
)
5003 if (code
== WIDEN_LSHIFT_EXPR
)
5006 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
5007 vec_oprnds1
.quick_push (vec_oprnd1
);
5013 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
5014 vec_oprnds0
.truncate (0);
5015 vec_oprnds0
.quick_push (vec_oprnd0
);
5016 if (op_type
== binary_op
)
5018 if (code
== WIDEN_LSHIFT_EXPR
)
5021 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
5023 vec_oprnds1
.truncate (0);
5024 vec_oprnds1
.quick_push (vec_oprnd1
);
5028 /* Arguments are ready. Create the new vector stmts. */
5029 for (i
= multi_step_cvt
; i
>= 0; i
--)
5031 tree this_dest
= vec_dsts
[i
];
5032 enum tree_code c1
= code1
, c2
= code2
;
5033 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5038 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
5040 stmt
, this_dest
, gsi
,
5041 c1
, c2
, decl1
, decl2
,
5045 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5049 if (codecvt1
== CALL_EXPR
)
5051 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5052 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5053 gimple_call_set_lhs (new_stmt
, new_temp
);
5057 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5058 new_temp
= make_ssa_name (vec_dest
);
5059 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
5063 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5066 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
5069 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5072 if (!prev_stmt_info
)
5073 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
5075 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5076 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5081 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5085 /* In case the vectorization factor (VF) is bigger than the number
5086 of elements that we can fit in a vectype (nunits), we have to
5087 generate more than one vector stmt - i.e - we need to "unroll"
5088 the vector stmt by a factor VF/nunits. */
5089 for (j
= 0; j
< ncopies
; j
++)
5093 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5097 vec_oprnds0
.truncate (0);
5098 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
5099 vect_pow2 (multi_step_cvt
) - 1);
5102 /* Arguments are ready. Create the new vector stmts. */
5104 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5106 if (codecvt1
== CALL_EXPR
)
5108 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5109 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5110 gimple_call_set_lhs (new_stmt
, new_temp
);
5114 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5115 new_temp
= make_ssa_name (vec_dest
);
5116 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
5120 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5121 vec_oprnds0
[i
] = new_temp
;
5124 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
5125 stmt
, vec_dsts
, gsi
,
5130 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5134 vec_oprnds0
.release ();
5135 vec_oprnds1
.release ();
5136 interm_types
.release ();
5142 /* Function vectorizable_assignment.
5144 Check if STMT performs an assignment (copy) that can be vectorized.
5145 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5146 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5147 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5150 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5151 gimple
**vec_stmt
, slp_tree slp_node
,
5152 stmt_vector_for_cost
*cost_vec
)
5157 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5158 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5160 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5164 vec
<tree
> vec_oprnds
= vNULL
;
5166 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5167 vec_info
*vinfo
= stmt_info
->vinfo
;
5168 gimple
*new_stmt
= NULL
;
5169 stmt_vec_info prev_stmt_info
= NULL
;
5170 enum tree_code code
;
5173 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5176 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5180 /* Is vectorizable assignment? */
5181 if (!is_gimple_assign (stmt
))
5184 scalar_dest
= gimple_assign_lhs (stmt
);
5185 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5188 code
= gimple_assign_rhs_code (stmt
);
5189 if (gimple_assign_single_p (stmt
)
5190 || code
== PAREN_EXPR
5191 || CONVERT_EXPR_CODE_P (code
))
5192 op
= gimple_assign_rhs1 (stmt
);
5196 if (code
== VIEW_CONVERT_EXPR
)
5197 op
= TREE_OPERAND (op
, 0);
5199 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5200 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5202 /* Multiple types in SLP are handled by creating the appropriate number of
5203 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5208 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5210 gcc_assert (ncopies
>= 1);
5212 if (!vect_is_simple_use (op
, vinfo
, &dt
[0], &vectype_in
))
5214 if (dump_enabled_p ())
5215 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5216 "use not simple.\n");
5220 /* We can handle NOP_EXPR conversions that do not change the number
5221 of elements or the vector size. */
5222 if ((CONVERT_EXPR_CODE_P (code
)
5223 || code
== VIEW_CONVERT_EXPR
)
5225 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5226 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5227 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5230 /* We do not handle bit-precision changes. */
5231 if ((CONVERT_EXPR_CODE_P (code
)
5232 || code
== VIEW_CONVERT_EXPR
)
5233 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5234 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5235 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5236 /* But a conversion that does not change the bit-pattern is ok. */
5237 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5238 > TYPE_PRECISION (TREE_TYPE (op
)))
5239 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5240 /* Conversion between boolean types of different sizes is
5241 a simple assignment in case their vectypes are same
5243 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
5244 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
5246 if (dump_enabled_p ())
5247 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5248 "type conversion to/from bit-precision "
5253 if (!vec_stmt
) /* transformation not required. */
5255 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5256 DUMP_VECT_SCOPE ("vectorizable_assignment");
5257 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5262 if (dump_enabled_p ())
5263 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5266 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5269 for (j
= 0; j
< ncopies
; j
++)
5273 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
5275 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
5277 /* Arguments are ready. create the new vector stmt. */
5278 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5280 if (CONVERT_EXPR_CODE_P (code
)
5281 || code
== VIEW_CONVERT_EXPR
)
5282 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5283 new_stmt
= gimple_build_assign (vec_dest
, vop
);
5284 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5285 gimple_assign_set_lhs (new_stmt
, new_temp
);
5286 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5288 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5295 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5297 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5299 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5302 vec_oprnds
.release ();
5307 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5308 either as shift by a scalar or by a vector. */
5311 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
5314 machine_mode vec_mode
;
5319 vectype
= get_vectype_for_scalar_type (scalar_type
);
5323 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5325 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5327 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5329 || (optab_handler (optab
, TYPE_MODE (vectype
))
5330 == CODE_FOR_nothing
))
5334 vec_mode
= TYPE_MODE (vectype
);
5335 icode
= (int) optab_handler (optab
, vec_mode
);
5336 if (icode
== CODE_FOR_nothing
)
5343 /* Function vectorizable_shift.
5345 Check if STMT performs a shift operation that can be vectorized.
5346 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5347 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5348 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5351 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5352 gimple
**vec_stmt
, slp_tree slp_node
,
5353 stmt_vector_for_cost
*cost_vec
)
5357 tree op0
, op1
= NULL
;
5358 tree vec_oprnd1
= NULL_TREE
;
5359 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5361 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5362 enum tree_code code
;
5363 machine_mode vec_mode
;
5367 machine_mode optab_op2_mode
;
5368 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5370 gimple
*new_stmt
= NULL
;
5371 stmt_vec_info prev_stmt_info
;
5372 poly_uint64 nunits_in
;
5373 poly_uint64 nunits_out
;
5378 vec
<tree
> vec_oprnds0
= vNULL
;
5379 vec
<tree
> vec_oprnds1
= vNULL
;
5382 bool scalar_shift_arg
= true;
5383 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5384 vec_info
*vinfo
= stmt_info
->vinfo
;
5386 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5389 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5393 /* Is STMT a vectorizable binary/unary operation? */
5394 if (!is_gimple_assign (stmt
))
5397 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5400 code
= gimple_assign_rhs_code (stmt
);
5402 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5403 || code
== RROTATE_EXPR
))
5406 scalar_dest
= gimple_assign_lhs (stmt
);
5407 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5408 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5410 if (dump_enabled_p ())
5411 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5412 "bit-precision shifts not supported.\n");
5416 op0
= gimple_assign_rhs1 (stmt
);
5417 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5419 if (dump_enabled_p ())
5420 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5421 "use not simple.\n");
5424 /* If op0 is an external or constant def use a vector type with
5425 the same size as the output vector type. */
5427 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5429 gcc_assert (vectype
);
5432 if (dump_enabled_p ())
5433 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5434 "no vectype for scalar type\n");
5438 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5439 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5440 if (maybe_ne (nunits_out
, nunits_in
))
5443 op1
= gimple_assign_rhs2 (stmt
);
5444 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1], &op1_vectype
))
5446 if (dump_enabled_p ())
5447 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5448 "use not simple.\n");
5452 /* Multiple types in SLP are handled by creating the appropriate number of
5453 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5458 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5460 gcc_assert (ncopies
>= 1);
5462 /* Determine whether the shift amount is a vector, or scalar. If the
5463 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5465 if ((dt
[1] == vect_internal_def
5466 || dt
[1] == vect_induction_def
)
5468 scalar_shift_arg
= false;
5469 else if (dt
[1] == vect_constant_def
5470 || dt
[1] == vect_external_def
5471 || dt
[1] == vect_internal_def
)
5473 /* In SLP, need to check whether the shift count is the same,
5474 in loops if it is a constant or invariant, it is always
5478 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5481 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
5482 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5483 scalar_shift_arg
= false;
5486 /* If the shift amount is computed by a pattern stmt we cannot
5487 use the scalar amount directly thus give up and use a vector
5489 if (dt
[1] == vect_internal_def
)
5491 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
5492 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
5493 scalar_shift_arg
= false;
5498 if (dump_enabled_p ())
5499 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5500 "operand mode requires invariant argument.\n");
5504 /* Vector shifted by vector. */
5505 if (!scalar_shift_arg
)
5507 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5508 if (dump_enabled_p ())
5509 dump_printf_loc (MSG_NOTE
, vect_location
,
5510 "vector/vector shift/rotate found.\n");
5513 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
5514 if (op1_vectype
== NULL_TREE
5515 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
5517 if (dump_enabled_p ())
5518 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5519 "unusable type for last operand in"
5520 " vector/vector shift/rotate.\n");
5524 /* See if the machine has a vector shifted by scalar insn and if not
5525 then see if it has a vector shifted by vector insn. */
5528 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5530 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5532 if (dump_enabled_p ())
5533 dump_printf_loc (MSG_NOTE
, vect_location
,
5534 "vector/scalar shift/rotate found.\n");
5538 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5540 && (optab_handler (optab
, TYPE_MODE (vectype
))
5541 != CODE_FOR_nothing
))
5543 scalar_shift_arg
= false;
5545 if (dump_enabled_p ())
5546 dump_printf_loc (MSG_NOTE
, vect_location
,
5547 "vector/vector shift/rotate found.\n");
5549 /* Unlike the other binary operators, shifts/rotates have
5550 the rhs being int, instead of the same type as the lhs,
5551 so make sure the scalar is the right type if we are
5552 dealing with vectors of long long/long/short/char. */
5553 if (dt
[1] == vect_constant_def
)
5554 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5555 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5559 && TYPE_MODE (TREE_TYPE (vectype
))
5560 != TYPE_MODE (TREE_TYPE (op1
)))
5562 if (dump_enabled_p ())
5563 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5564 "unusable type for last operand in"
5565 " vector/vector shift/rotate.\n");
5568 if (vec_stmt
&& !slp_node
)
5570 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5571 op1
= vect_init_vector (stmt
, op1
,
5572 TREE_TYPE (vectype
), NULL
);
5579 /* Supportable by target? */
5582 if (dump_enabled_p ())
5583 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5587 vec_mode
= TYPE_MODE (vectype
);
5588 icode
= (int) optab_handler (optab
, vec_mode
);
5589 if (icode
== CODE_FOR_nothing
)
5591 if (dump_enabled_p ())
5592 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5593 "op not supported by target.\n");
5594 /* Check only during analysis. */
5595 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5597 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5599 if (dump_enabled_p ())
5600 dump_printf_loc (MSG_NOTE
, vect_location
,
5601 "proceeding using word mode.\n");
5604 /* Worthwhile without SIMD support? Check only during analysis. */
5606 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5607 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5609 if (dump_enabled_p ())
5610 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5611 "not worthwhile without SIMD support.\n");
5615 if (!vec_stmt
) /* transformation not required. */
5617 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5618 DUMP_VECT_SCOPE ("vectorizable_shift");
5619 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5625 if (dump_enabled_p ())
5626 dump_printf_loc (MSG_NOTE
, vect_location
,
5627 "transform binary/unary operation.\n");
5630 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5632 prev_stmt_info
= NULL
;
5633 for (j
= 0; j
< ncopies
; j
++)
5638 if (scalar_shift_arg
)
5640 /* Vector shl and shr insn patterns can be defined with scalar
5641 operand 2 (shift operand). In this case, use constant or loop
5642 invariant op1 directly, without extending it to vector mode
5644 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5645 if (!VECTOR_MODE_P (optab_op2_mode
))
5647 if (dump_enabled_p ())
5648 dump_printf_loc (MSG_NOTE
, vect_location
,
5649 "operand 1 using scalar mode.\n");
5651 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5652 vec_oprnds1
.quick_push (vec_oprnd1
);
5655 /* Store vec_oprnd1 for every vector stmt to be created
5656 for SLP_NODE. We check during the analysis that all
5657 the shift arguments are the same.
5658 TODO: Allow different constants for different vector
5659 stmts generated for an SLP instance. */
5660 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5661 vec_oprnds1
.quick_push (vec_oprnd1
);
5666 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5667 (a special case for certain kind of vector shifts); otherwise,
5668 operand 1 should be of a vector type (the usual case). */
5670 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5673 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5677 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5679 /* Arguments are ready. Create the new vector stmt. */
5680 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5682 vop1
= vec_oprnds1
[i
];
5683 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5684 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5685 gimple_assign_set_lhs (new_stmt
, new_temp
);
5686 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5688 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5695 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5697 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5698 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5701 vec_oprnds0
.release ();
5702 vec_oprnds1
.release ();
5708 /* Function vectorizable_operation.
5710 Check if STMT performs a binary, unary or ternary operation that can
5712 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5713 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5714 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5717 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5718 gimple
**vec_stmt
, slp_tree slp_node
,
5719 stmt_vector_for_cost
*cost_vec
)
5723 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5724 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5726 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5727 enum tree_code code
, orig_code
;
5728 machine_mode vec_mode
;
5732 bool target_support_p
;
5733 enum vect_def_type dt
[3]
5734 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5736 gimple
*new_stmt
= NULL
;
5737 stmt_vec_info prev_stmt_info
;
5738 poly_uint64 nunits_in
;
5739 poly_uint64 nunits_out
;
5743 vec
<tree
> vec_oprnds0
= vNULL
;
5744 vec
<tree
> vec_oprnds1
= vNULL
;
5745 vec
<tree
> vec_oprnds2
= vNULL
;
5746 tree vop0
, vop1
, vop2
;
5747 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5748 vec_info
*vinfo
= stmt_info
->vinfo
;
5750 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5753 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5757 /* Is STMT a vectorizable binary/unary operation? */
5758 if (!is_gimple_assign (stmt
))
5761 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5764 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5766 /* For pointer addition and subtraction, we should use the normal
5767 plus and minus for the vector operation. */
5768 if (code
== POINTER_PLUS_EXPR
)
5770 if (code
== POINTER_DIFF_EXPR
)
5773 /* Support only unary or binary operations. */
5774 op_type
= TREE_CODE_LENGTH (code
);
5775 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5777 if (dump_enabled_p ())
5778 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5779 "num. args = %d (not unary/binary/ternary op).\n",
5784 scalar_dest
= gimple_assign_lhs (stmt
);
5785 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5787 /* Most operations cannot handle bit-precision types without extra
5789 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5790 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5791 /* Exception are bitwise binary operations. */
5792 && code
!= BIT_IOR_EXPR
5793 && code
!= BIT_XOR_EXPR
5794 && code
!= BIT_AND_EXPR
)
5796 if (dump_enabled_p ())
5797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5798 "bit-precision arithmetic not supported.\n");
5802 op0
= gimple_assign_rhs1 (stmt
);
5803 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5805 if (dump_enabled_p ())
5806 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5807 "use not simple.\n");
5810 /* If op0 is an external or constant def use a vector type with
5811 the same size as the output vector type. */
5814 /* For boolean type we cannot determine vectype by
5815 invariant value (don't know whether it is a vector
5816 of booleans or vector of integers). We use output
5817 vectype because operations on boolean don't change
5819 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5821 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5823 if (dump_enabled_p ())
5824 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5825 "not supported operation on bool value.\n");
5828 vectype
= vectype_out
;
5831 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5834 gcc_assert (vectype
);
5837 if (dump_enabled_p ())
5839 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5840 "no vectype for scalar type ");
5841 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5843 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5849 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5850 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5851 if (maybe_ne (nunits_out
, nunits_in
))
5854 if (op_type
== binary_op
|| op_type
== ternary_op
)
5856 op1
= gimple_assign_rhs2 (stmt
);
5857 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1]))
5859 if (dump_enabled_p ())
5860 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5861 "use not simple.\n");
5865 if (op_type
== ternary_op
)
5867 op2
= gimple_assign_rhs3 (stmt
);
5868 if (!vect_is_simple_use (op2
, vinfo
, &dt
[2]))
5870 if (dump_enabled_p ())
5871 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5872 "use not simple.\n");
5877 /* Multiple types in SLP are handled by creating the appropriate number of
5878 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5883 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5885 gcc_assert (ncopies
>= 1);
5887 /* Shifts are handled in vectorizable_shift (). */
5888 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5889 || code
== RROTATE_EXPR
)
5892 /* Supportable by target? */
5894 vec_mode
= TYPE_MODE (vectype
);
5895 if (code
== MULT_HIGHPART_EXPR
)
5896 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5899 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5902 if (dump_enabled_p ())
5903 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5907 target_support_p
= (optab_handler (optab
, vec_mode
)
5908 != CODE_FOR_nothing
);
5911 if (!target_support_p
)
5913 if (dump_enabled_p ())
5914 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5915 "op not supported by target.\n");
5916 /* Check only during analysis. */
5917 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5918 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
5920 if (dump_enabled_p ())
5921 dump_printf_loc (MSG_NOTE
, vect_location
,
5922 "proceeding using word mode.\n");
5925 /* Worthwhile without SIMD support? Check only during analysis. */
5926 if (!VECTOR_MODE_P (vec_mode
)
5928 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5930 if (dump_enabled_p ())
5931 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5932 "not worthwhile without SIMD support.\n");
5936 if (!vec_stmt
) /* transformation not required. */
5938 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5939 DUMP_VECT_SCOPE ("vectorizable_operation");
5940 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5946 if (dump_enabled_p ())
5947 dump_printf_loc (MSG_NOTE
, vect_location
,
5948 "transform binary/unary operation.\n");
5950 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5951 vectors with unsigned elements, but the result is signed. So, we
5952 need to compute the MINUS_EXPR into vectype temporary and
5953 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5954 tree vec_cvt_dest
= NULL_TREE
;
5955 if (orig_code
== POINTER_DIFF_EXPR
)
5957 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5958 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
5962 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
5964 /* In case the vectorization factor (VF) is bigger than the number
5965 of elements that we can fit in a vectype (nunits), we have to generate
5966 more than one vector stmt - i.e - we need to "unroll" the
5967 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5968 from one copy of the vector stmt to the next, in the field
5969 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5970 stages to find the correct vector defs to be used when vectorizing
5971 stmts that use the defs of the current stmt. The example below
5972 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5973 we need to create 4 vectorized stmts):
5975 before vectorization:
5976 RELATED_STMT VEC_STMT
5980 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5982 RELATED_STMT VEC_STMT
5983 VS1_0: vx0 = memref0 VS1_1 -
5984 VS1_1: vx1 = memref1 VS1_2 -
5985 VS1_2: vx2 = memref2 VS1_3 -
5986 VS1_3: vx3 = memref3 - -
5987 S1: x = load - VS1_0
5990 step2: vectorize stmt S2 (done here):
5991 To vectorize stmt S2 we first need to find the relevant vector
5992 def for the first operand 'x'. This is, as usual, obtained from
5993 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5994 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5995 relevant vector def 'vx0'. Having found 'vx0' we can generate
5996 the vector stmt VS2_0, and as usual, record it in the
5997 STMT_VINFO_VEC_STMT of stmt S2.
5998 When creating the second copy (VS2_1), we obtain the relevant vector
5999 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6000 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6001 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6002 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6003 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6004 chain of stmts and pointers:
6005 RELATED_STMT VEC_STMT
6006 VS1_0: vx0 = memref0 VS1_1 -
6007 VS1_1: vx1 = memref1 VS1_2 -
6008 VS1_2: vx2 = memref2 VS1_3 -
6009 VS1_3: vx3 = memref3 - -
6010 S1: x = load - VS1_0
6011 VS2_0: vz0 = vx0 + v1 VS2_1 -
6012 VS2_1: vz1 = vx1 + v1 VS2_2 -
6013 VS2_2: vz2 = vx2 + v1 VS2_3 -
6014 VS2_3: vz3 = vx3 + v1 - -
6015 S2: z = x + 1 - VS2_0 */
6017 prev_stmt_info
= NULL
;
6018 for (j
= 0; j
< ncopies
; j
++)
6023 if (op_type
== binary_op
)
6024 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
6026 else if (op_type
== ternary_op
)
6030 auto_vec
<tree
> ops(3);
6031 ops
.quick_push (op0
);
6032 ops
.quick_push (op1
);
6033 ops
.quick_push (op2
);
6034 auto_vec
<vec
<tree
> > vec_defs(3);
6035 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
6036 vec_oprnds0
= vec_defs
[0];
6037 vec_oprnds1
= vec_defs
[1];
6038 vec_oprnds2
= vec_defs
[2];
6042 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
6044 vect_get_vec_defs (op2
, NULL_TREE
, stmt
, &vec_oprnds2
, NULL
,
6049 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
6054 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
6055 if (op_type
== ternary_op
)
6057 tree vec_oprnd
= vec_oprnds2
.pop ();
6058 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
6063 /* Arguments are ready. Create the new vector stmt. */
6064 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6066 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6067 ? vec_oprnds1
[i
] : NULL_TREE
);
6068 vop2
= ((op_type
== ternary_op
)
6069 ? vec_oprnds2
[i
] : NULL_TREE
);
6070 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
6071 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6072 gimple_assign_set_lhs (new_stmt
, new_temp
);
6073 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6076 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6077 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6079 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6080 gimple_assign_set_lhs (new_stmt
, new_temp
);
6081 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6084 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6091 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6093 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6094 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6097 vec_oprnds0
.release ();
6098 vec_oprnds1
.release ();
6099 vec_oprnds2
.release ();
6104 /* A helper function to ensure data reference DR's base alignment. */
6107 ensure_base_align (struct data_reference
*dr
)
6109 if (DR_VECT_AUX (dr
)->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6112 if (DR_VECT_AUX (dr
)->base_misaligned
)
6114 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
6116 unsigned int align_base_to
= DR_TARGET_ALIGNMENT (dr
) * BITS_PER_UNIT
;
6118 if (decl_in_symtab_p (base_decl
))
6119 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6122 SET_DECL_ALIGN (base_decl
, align_base_to
);
6123 DECL_USER_ALIGN (base_decl
) = 1;
6125 DR_VECT_AUX (dr
)->base_misaligned
= false;
6130 /* Function get_group_alias_ptr_type.
6132 Return the alias type for the group starting at FIRST_STMT. */
6135 get_group_alias_ptr_type (gimple
*first_stmt
)
6137 struct data_reference
*first_dr
, *next_dr
;
6140 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6141 next_stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt
));
6144 next_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt
));
6145 if (get_alias_set (DR_REF (first_dr
))
6146 != get_alias_set (DR_REF (next_dr
)))
6148 if (dump_enabled_p ())
6149 dump_printf_loc (MSG_NOTE
, vect_location
,
6150 "conflicting alias set types.\n");
6151 return ptr_type_node
;
6153 next_stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6155 return reference_alias_ptr_type (DR_REF (first_dr
));
6159 /* Function vectorizable_store.
6161 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6163 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6164 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6165 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6168 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6169 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
6173 tree vec_oprnd
= NULL_TREE
;
6174 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6175 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6177 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6178 struct loop
*loop
= NULL
;
6179 machine_mode vec_mode
;
6181 enum dr_alignment_support alignment_support_scheme
;
6182 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
6183 enum vect_def_type mask_dt
= vect_unknown_def_type
;
6184 stmt_vec_info prev_stmt_info
= NULL
;
6185 tree dataref_ptr
= NULL_TREE
;
6186 tree dataref_offset
= NULL_TREE
;
6187 gimple
*ptr_incr
= NULL
;
6190 gimple
*next_stmt
, *first_stmt
;
6192 unsigned int group_size
, i
;
6193 vec
<tree
> oprnds
= vNULL
;
6194 vec
<tree
> result_chain
= vNULL
;
6196 tree offset
= NULL_TREE
;
6197 vec
<tree
> vec_oprnds
= vNULL
;
6198 bool slp
= (slp_node
!= NULL
);
6199 unsigned int vec_num
;
6200 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6201 vec_info
*vinfo
= stmt_info
->vinfo
;
6203 gather_scatter_info gs_info
;
6206 vec_load_store_type vls_type
;
6209 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6212 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6216 /* Is vectorizable store? */
6218 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
6219 if (is_gimple_assign (stmt
))
6221 tree scalar_dest
= gimple_assign_lhs (stmt
);
6222 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
6223 && is_pattern_stmt_p (stmt_info
))
6224 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
6225 if (TREE_CODE (scalar_dest
) != ARRAY_REF
6226 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
6227 && TREE_CODE (scalar_dest
) != INDIRECT_REF
6228 && TREE_CODE (scalar_dest
) != COMPONENT_REF
6229 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
6230 && TREE_CODE (scalar_dest
) != REALPART_EXPR
6231 && TREE_CODE (scalar_dest
) != MEM_REF
)
6236 gcall
*call
= dyn_cast
<gcall
*> (stmt
);
6237 if (!call
|| !gimple_call_internal_p (call
))
6240 internal_fn ifn
= gimple_call_internal_fn (call
);
6241 if (!internal_store_fn_p (ifn
))
6244 if (slp_node
!= NULL
)
6246 if (dump_enabled_p ())
6247 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6248 "SLP of masked stores not supported.\n");
6252 int mask_index
= internal_fn_mask_index (ifn
);
6253 if (mask_index
>= 0)
6255 mask
= gimple_call_arg (call
, mask_index
);
6256 if (!vect_check_load_store_mask (stmt
, mask
, &mask_dt
,
6262 op
= vect_get_store_rhs (stmt
);
6264 /* Cannot have hybrid store SLP -- that would mean storing to the
6265 same location twice. */
6266 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
6268 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
6269 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6273 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6274 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6279 /* Multiple types in SLP are handled by creating the appropriate number of
6280 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6285 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6287 gcc_assert (ncopies
>= 1);
6289 /* FORNOW. This restriction should be relaxed. */
6290 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
6292 if (dump_enabled_p ())
6293 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6294 "multiple types in nested loop.\n");
6298 if (!vect_check_store_rhs (stmt
, op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
6301 elem_type
= TREE_TYPE (vectype
);
6302 vec_mode
= TYPE_MODE (vectype
);
6304 if (!STMT_VINFO_DATA_REF (stmt_info
))
6307 vect_memory_access_type memory_access_type
;
6308 if (!get_load_store_type (stmt
, vectype
, slp
, mask
, vls_type
, ncopies
,
6309 &memory_access_type
, &gs_info
))
6314 if (memory_access_type
== VMAT_CONTIGUOUS
)
6316 if (!VECTOR_MODE_P (vec_mode
)
6317 || !can_vec_mask_load_store_p (vec_mode
,
6318 TYPE_MODE (mask_vectype
), false))
6321 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
6322 && (memory_access_type
!= VMAT_GATHER_SCATTER
|| gs_info
.decl
))
6324 if (dump_enabled_p ())
6325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6326 "unsupported access type for masked store.\n");
6332 /* FORNOW. In some cases can vectorize even if data-type not supported
6333 (e.g. - array initialization with 0). */
6334 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
6338 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6339 && memory_access_type
!= VMAT_GATHER_SCATTER
6340 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
6343 first_stmt
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
6344 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6345 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6351 group_size
= vec_num
= 1;
6354 if (!vec_stmt
) /* transformation not required. */
6356 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
6359 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
6360 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
6361 memory_access_type
, &gs_info
);
6363 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
6364 vect_model_store_cost (stmt_info
, ncopies
, rhs_dt
, memory_access_type
,
6365 vls_type
, slp_node
, cost_vec
);
6368 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
6372 ensure_base_align (dr
);
6374 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
6376 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
6377 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
6378 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6379 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
6380 edge pe
= loop_preheader_edge (loop
);
6383 enum { NARROW
, NONE
, WIDEN
} modifier
;
6384 poly_uint64 scatter_off_nunits
6385 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
6387 if (known_eq (nunits
, scatter_off_nunits
))
6389 else if (known_eq (nunits
* 2, scatter_off_nunits
))
6393 /* Currently gathers and scatters are only supported for
6394 fixed-length vectors. */
6395 unsigned int count
= scatter_off_nunits
.to_constant ();
6396 vec_perm_builder
sel (count
, count
, 1);
6397 for (i
= 0; i
< (unsigned int) count
; ++i
)
6398 sel
.quick_push (i
| (count
/ 2));
6400 vec_perm_indices
indices (sel
, 1, count
);
6401 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
6403 gcc_assert (perm_mask
!= NULL_TREE
);
6405 else if (known_eq (nunits
, scatter_off_nunits
* 2))
6409 /* Currently gathers and scatters are only supported for
6410 fixed-length vectors. */
6411 unsigned int count
= nunits
.to_constant ();
6412 vec_perm_builder
sel (count
, count
, 1);
6413 for (i
= 0; i
< (unsigned int) count
; ++i
)
6414 sel
.quick_push (i
| (count
/ 2));
6416 vec_perm_indices
indices (sel
, 2, count
);
6417 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
6418 gcc_assert (perm_mask
!= NULL_TREE
);
6424 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6425 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6426 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6427 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6428 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6429 scaletype
= TREE_VALUE (arglist
);
6431 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
6432 && TREE_CODE (rettype
) == VOID_TYPE
);
6434 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6435 if (!is_gimple_min_invariant (ptr
))
6437 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6438 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6439 gcc_assert (!new_bb
);
6442 /* Currently we support only unconditional scatter stores,
6443 so mask should be all ones. */
6444 mask
= build_int_cst (masktype
, -1);
6445 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6447 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6449 prev_stmt_info
= NULL
;
6450 for (j
= 0; j
< ncopies
; ++j
)
6455 = vect_get_vec_def_for_operand (op
, stmt
);
6457 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
6459 else if (modifier
!= NONE
&& (j
& 1))
6461 if (modifier
== WIDEN
)
6464 = vect_get_vec_def_for_stmt_copy (rhs_dt
, vec_oprnd1
);
6465 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
6468 else if (modifier
== NARROW
)
6470 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
6473 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
6482 = vect_get_vec_def_for_stmt_copy (rhs_dt
, vec_oprnd1
);
6484 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
6488 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
6490 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
6491 TYPE_VECTOR_SUBPARTS (srctype
)));
6492 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
6493 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
6494 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
6495 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6499 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6501 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
6502 TYPE_VECTOR_SUBPARTS (idxtype
)));
6503 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6504 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6505 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6506 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6511 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask
, op
, src
, scale
);
6513 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6515 if (prev_stmt_info
== NULL
)
6516 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6518 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6519 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6524 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6526 gimple
*group_stmt
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
6527 DR_GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt
))++;
6533 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
6535 /* We vectorize all the stmts of the interleaving group when we
6536 reach the last stmt in the group. */
6537 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
6538 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
))
6547 grouped_store
= false;
6548 /* VEC_NUM is the number of vect stmts to be created for this
6550 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6551 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6552 gcc_assert (DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
6553 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6554 op
= vect_get_store_rhs (first_stmt
);
6557 /* VEC_NUM is the number of vect stmts to be created for this
6559 vec_num
= group_size
;
6561 ref_type
= get_group_alias_ptr_type (first_stmt
);
6564 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
6566 if (dump_enabled_p ())
6567 dump_printf_loc (MSG_NOTE
, vect_location
,
6568 "transform store. ncopies = %d\n", ncopies
);
6570 if (memory_access_type
== VMAT_ELEMENTWISE
6571 || memory_access_type
== VMAT_STRIDED_SLP
)
6573 gimple_stmt_iterator incr_gsi
;
6579 tree stride_base
, stride_step
, alias_off
;
6582 /* Checked by get_load_store_type. */
6583 unsigned int const_nunits
= nunits
.to_constant ();
6585 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
6586 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
6589 = fold_build_pointer_plus
6590 (DR_BASE_ADDRESS (first_dr
),
6591 size_binop (PLUS_EXPR
,
6592 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
6593 convert_to_ptrofftype (DR_INIT (first_dr
))));
6594 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
6596 /* For a store with loop-invariant (but other than power-of-2)
6597 stride (i.e. not a grouped access) like so:
6599 for (i = 0; i < n; i += stride)
6602 we generate a new induction variable and new stores from
6603 the components of the (vectorized) rhs:
6605 for (j = 0; ; j += VF*stride)
6610 array[j + stride] = tmp2;
6614 unsigned nstores
= const_nunits
;
6616 tree ltype
= elem_type
;
6617 tree lvectype
= vectype
;
6620 if (group_size
< const_nunits
6621 && const_nunits
% group_size
== 0)
6623 nstores
= const_nunits
/ group_size
;
6625 ltype
= build_vector_type (elem_type
, group_size
);
6628 /* First check if vec_extract optab doesn't support extraction
6629 of vector elts directly. */
6630 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
6632 if (!mode_for_vector (elmode
, group_size
).exists (&vmode
)
6633 || !VECTOR_MODE_P (vmode
)
6634 || !targetm
.vector_mode_supported_p (vmode
)
6635 || (convert_optab_handler (vec_extract_optab
,
6636 TYPE_MODE (vectype
), vmode
)
6637 == CODE_FOR_nothing
))
6639 /* Try to avoid emitting an extract of vector elements
6640 by performing the extracts using an integer type of the
6641 same size, extracting from a vector of those and then
6642 re-interpreting it as the original vector type if
6645 = group_size
* GET_MODE_BITSIZE (elmode
);
6646 elmode
= int_mode_for_size (lsize
, 0).require ();
6647 unsigned int lnunits
= const_nunits
/ group_size
;
6648 /* If we can't construct such a vector fall back to
6649 element extracts from the original vector type and
6650 element size stores. */
6651 if (mode_for_vector (elmode
, lnunits
).exists (&vmode
)
6652 && VECTOR_MODE_P (vmode
)
6653 && targetm
.vector_mode_supported_p (vmode
)
6654 && (convert_optab_handler (vec_extract_optab
,
6656 != CODE_FOR_nothing
))
6660 ltype
= build_nonstandard_integer_type (lsize
, 1);
6661 lvectype
= build_vector_type (ltype
, nstores
);
6663 /* Else fall back to vector extraction anyway.
6664 Fewer stores are more important than avoiding spilling
6665 of the vector we extract from. Compared to the
6666 construction case in vectorizable_load no store-forwarding
6667 issue exists here for reasonable archs. */
6670 else if (group_size
>= const_nunits
6671 && group_size
% const_nunits
== 0)
6674 lnel
= const_nunits
;
6678 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
6679 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6682 ivstep
= stride_step
;
6683 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6684 build_int_cst (TREE_TYPE (ivstep
), vf
));
6686 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6688 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
6689 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
6690 create_iv (stride_base
, ivstep
, NULL
,
6691 loop
, &incr_gsi
, insert_after
,
6693 incr
= gsi_stmt (incr_gsi
);
6694 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6696 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
6698 prev_stmt_info
= NULL
;
6699 alias_off
= build_int_cst (ref_type
, 0);
6700 next_stmt
= first_stmt
;
6701 for (g
= 0; g
< group_size
; g
++)
6703 running_off
= offvar
;
6706 tree size
= TYPE_SIZE_UNIT (ltype
);
6707 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
6709 tree newoff
= copy_ssa_name (running_off
, NULL
);
6710 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6712 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6713 running_off
= newoff
;
6715 unsigned int group_el
= 0;
6716 unsigned HOST_WIDE_INT
6717 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6718 for (j
= 0; j
< ncopies
; j
++)
6720 /* We've set op and dt above, from vect_get_store_rhs,
6721 and first_stmt == stmt. */
6726 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
6728 vec_oprnd
= vec_oprnds
[0];
6732 op
= vect_get_store_rhs (next_stmt
);
6733 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6739 vec_oprnd
= vec_oprnds
[j
];
6742 vect_is_simple_use (op
, vinfo
, &rhs_dt
);
6743 vec_oprnd
= vect_get_vec_def_for_stmt_copy (rhs_dt
,
6747 /* Pun the vector to extract from if necessary. */
6748 if (lvectype
!= vectype
)
6750 tree tem
= make_ssa_name (lvectype
);
6752 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
6753 lvectype
, vec_oprnd
));
6754 vect_finish_stmt_generation (stmt
, pun
, gsi
);
6757 for (i
= 0; i
< nstores
; i
++)
6759 tree newref
, newoff
;
6760 gimple
*incr
, *assign
;
6761 tree size
= TYPE_SIZE (ltype
);
6762 /* Extract the i'th component. */
6763 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
6764 bitsize_int (i
), size
);
6765 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
6768 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
6772 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6774 newref
= build2 (MEM_REF
, ltype
,
6775 running_off
, this_off
);
6776 vect_copy_ref_info (newref
, DR_REF (first_dr
));
6778 /* And store it to *running_off. */
6779 assign
= gimple_build_assign (newref
, elem
);
6780 vect_finish_stmt_generation (stmt
, assign
, gsi
);
6784 || group_el
== group_size
)
6786 newoff
= copy_ssa_name (running_off
, NULL
);
6787 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6788 running_off
, stride_step
);
6789 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6791 running_off
= newoff
;
6794 if (g
== group_size
- 1
6797 if (j
== 0 && i
== 0)
6798 STMT_VINFO_VEC_STMT (stmt_info
)
6799 = *vec_stmt
= assign
;
6801 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
6802 prev_stmt_info
= vinfo_for_stmt (assign
);
6806 next_stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6811 vec_oprnds
.release ();
6815 auto_vec
<tree
> dr_chain (group_size
);
6816 oprnds
.create (group_size
);
6818 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6819 gcc_assert (alignment_support_scheme
);
6820 vec_loop_masks
*loop_masks
6821 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6822 ? &LOOP_VINFO_MASKS (loop_vinfo
)
6824 /* Targets with store-lane instructions must not require explicit
6825 realignment. vect_supportable_dr_alignment always returns either
6826 dr_aligned or dr_unaligned_supported for masked operations. */
6827 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
6830 || alignment_support_scheme
== dr_aligned
6831 || alignment_support_scheme
== dr_unaligned_supported
);
6833 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
6834 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6835 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6838 tree vec_offset
= NULL_TREE
;
6839 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6841 aggr_type
= NULL_TREE
;
6844 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
6846 aggr_type
= elem_type
;
6847 vect_get_strided_load_store_ops (stmt
, loop_vinfo
, &gs_info
,
6848 &bump
, &vec_offset
);
6852 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6853 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6855 aggr_type
= vectype
;
6856 bump
= vect_get_data_ptr_increment (dr
, aggr_type
, memory_access_type
);
6860 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
6862 /* In case the vectorization factor (VF) is bigger than the number
6863 of elements that we can fit in a vectype (nunits), we have to generate
6864 more than one vector stmt - i.e - we need to "unroll" the
6865 vector stmt by a factor VF/nunits. For more details see documentation in
6866 vect_get_vec_def_for_copy_stmt. */
6868 /* In case of interleaving (non-unit grouped access):
6875 We create vectorized stores starting from base address (the access of the
6876 first stmt in the chain (S2 in the above example), when the last store stmt
6877 of the chain (S4) is reached:
6880 VS2: &base + vec_size*1 = vx0
6881 VS3: &base + vec_size*2 = vx1
6882 VS4: &base + vec_size*3 = vx3
6884 Then permutation statements are generated:
6886 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6887 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6890 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6891 (the order of the data-refs in the output of vect_permute_store_chain
6892 corresponds to the order of scalar stmts in the interleaving chain - see
6893 the documentation of vect_permute_store_chain()).
6895 In case of both multiple types and interleaving, above vector stores and
6896 permutation stmts are created for every copy. The result vector stmts are
6897 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6898 STMT_VINFO_RELATED_STMT for the next copies.
6901 prev_stmt_info
= NULL
;
6902 tree vec_mask
= NULL_TREE
;
6903 for (j
= 0; j
< ncopies
; j
++)
6910 /* Get vectorized arguments for SLP_NODE. */
6911 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
6914 vec_oprnd
= vec_oprnds
[0];
6918 /* For interleaved stores we collect vectorized defs for all the
6919 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6920 used as an input to vect_permute_store_chain(), and OPRNDS as
6921 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6923 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6924 OPRNDS are of size 1. */
6925 next_stmt
= first_stmt
;
6926 for (i
= 0; i
< group_size
; i
++)
6928 /* Since gaps are not supported for interleaved stores,
6929 DR_GROUP_SIZE is the exact number of stmts in the chain.
6930 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6931 there is no interleaving, DR_GROUP_SIZE is 1, and only one
6932 iteration of the loop will be executed. */
6933 op
= vect_get_store_rhs (next_stmt
);
6934 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6935 dr_chain
.quick_push (vec_oprnd
);
6936 oprnds
.quick_push (vec_oprnd
);
6937 next_stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6940 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
6944 /* We should have catched mismatched types earlier. */
6945 gcc_assert (useless_type_conversion_p (vectype
,
6946 TREE_TYPE (vec_oprnd
)));
6947 bool simd_lane_access_p
6948 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6949 if (simd_lane_access_p
6950 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6951 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6952 && integer_zerop (DR_OFFSET (first_dr
))
6953 && integer_zerop (DR_INIT (first_dr
))
6954 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6955 get_alias_set (TREE_TYPE (ref_type
))))
6957 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6958 dataref_offset
= build_int_cst (ref_type
, 0);
6961 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6963 vect_get_gather_scatter_ops (loop
, stmt
, &gs_info
,
6964 &dataref_ptr
, &vec_offset
);
6969 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
6970 simd_lane_access_p
? loop
: NULL
,
6971 offset
, &dummy
, gsi
, &ptr_incr
,
6972 simd_lane_access_p
, &inv_p
,
6974 gcc_assert (bb_vinfo
|| !inv_p
);
6978 /* For interleaved stores we created vectorized defs for all the
6979 defs stored in OPRNDS in the previous iteration (previous copy).
6980 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6981 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6983 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6984 OPRNDS are of size 1. */
6985 for (i
= 0; i
< group_size
; i
++)
6988 vect_is_simple_use (op
, vinfo
, &rhs_dt
);
6989 vec_oprnd
= vect_get_vec_def_for_stmt_copy (rhs_dt
, op
);
6990 dr_chain
[i
] = vec_oprnd
;
6991 oprnds
[i
] = vec_oprnd
;
6994 vec_mask
= vect_get_vec_def_for_stmt_copy (mask_dt
, vec_mask
);
6997 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
6998 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6999 vec_offset
= vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
7002 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7006 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7010 /* Get an array into which we can store the individual vectors. */
7011 vec_array
= create_vector_array (vectype
, vec_num
);
7013 /* Invalidate the current contents of VEC_ARRAY. This should
7014 become an RTL clobber too, which prevents the vector registers
7015 from being upward-exposed. */
7016 vect_clobber_variable (stmt
, gsi
, vec_array
);
7018 /* Store the individual vectors into the array. */
7019 for (i
= 0; i
< vec_num
; i
++)
7021 vec_oprnd
= dr_chain
[i
];
7022 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
7025 tree final_mask
= NULL
;
7027 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
7030 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7037 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7039 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
7040 tree alias_ptr
= build_int_cst (ref_type
, align
);
7041 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
7042 dataref_ptr
, alias_ptr
,
7043 final_mask
, vec_array
);
7048 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7049 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7050 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
7052 gimple_call_set_lhs (call
, data_ref
);
7054 gimple_call_set_nothrow (call
, true);
7056 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7058 /* Record that VEC_ARRAY is now dead. */
7059 vect_clobber_variable (stmt
, gsi
, vec_array
);
7067 result_chain
.create (group_size
);
7069 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
7073 next_stmt
= first_stmt
;
7074 for (i
= 0; i
< vec_num
; i
++)
7076 unsigned align
, misalign
;
7078 tree final_mask
= NULL_TREE
;
7080 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
7082 vectype
, vec_num
* j
+ i
);
7084 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7087 if (memory_access_type
== VMAT_GATHER_SCATTER
)
7089 tree scale
= size_int (gs_info
.scale
);
7092 call
= gimple_build_call_internal
7093 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
7094 scale
, vec_oprnd
, final_mask
);
7096 call
= gimple_build_call_internal
7097 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
7099 gimple_call_set_nothrow (call
, true);
7101 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7106 /* Bump the vector pointer. */
7107 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7111 vec_oprnd
= vec_oprnds
[i
];
7112 else if (grouped_store
)
7113 /* For grouped stores vectorized defs are interleaved in
7114 vect_permute_store_chain(). */
7115 vec_oprnd
= result_chain
[i
];
7117 align
= DR_TARGET_ALIGNMENT (first_dr
);
7118 if (aligned_access_p (first_dr
))
7120 else if (DR_MISALIGNMENT (first_dr
) == -1)
7122 align
= dr_alignment (vect_dr_behavior (first_dr
));
7126 misalign
= DR_MISALIGNMENT (first_dr
);
7127 if (dataref_offset
== NULL_TREE
7128 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7129 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
7132 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7134 tree perm_mask
= perm_mask_for_reverse (vectype
);
7136 = vect_create_destination_var (vect_get_store_rhs (stmt
),
7138 tree new_temp
= make_ssa_name (perm_dest
);
7140 /* Generate the permute statement. */
7142 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
7143 vec_oprnd
, perm_mask
);
7144 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
7146 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7147 vec_oprnd
= new_temp
;
7150 /* Arguments are ready. Create the new vector stmt. */
7153 align
= least_bit_hwi (misalign
| align
);
7154 tree ptr
= build_int_cst (ref_type
, align
);
7156 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
7158 final_mask
, vec_oprnd
);
7159 gimple_call_set_nothrow (call
, true);
7164 data_ref
= fold_build2 (MEM_REF
, vectype
,
7168 : build_int_cst (ref_type
, 0));
7169 if (aligned_access_p (first_dr
))
7171 else if (DR_MISALIGNMENT (first_dr
) == -1)
7172 TREE_TYPE (data_ref
)
7173 = build_aligned_type (TREE_TYPE (data_ref
),
7174 align
* BITS_PER_UNIT
);
7176 TREE_TYPE (data_ref
)
7177 = build_aligned_type (TREE_TYPE (data_ref
),
7178 TYPE_ALIGN (elem_type
));
7179 vect_copy_ref_info (data_ref
, DR_REF (first_dr
));
7180 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
7182 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7187 next_stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
7195 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7197 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7198 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7203 result_chain
.release ();
7204 vec_oprnds
.release ();
7209 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7210 VECTOR_CST mask. No checks are made that the target platform supports the
7211 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7212 vect_gen_perm_mask_checked. */
7215 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
7219 poly_uint64 nunits
= sel
.length ();
7220 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
7222 mask_type
= build_vector_type (ssizetype
, nunits
);
7223 return vec_perm_indices_to_tree (mask_type
, sel
);
7226 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7227 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7230 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
7232 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
7233 return vect_gen_perm_mask_any (vectype
, sel
);
7236 /* Given a vector variable X and Y, that was generated for the scalar
7237 STMT, generate instructions to permute the vector elements of X and Y
7238 using permutation mask MASK_VEC, insert them at *GSI and return the
7239 permuted vector variable. */
7242 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
7243 gimple_stmt_iterator
*gsi
)
7245 tree vectype
= TREE_TYPE (x
);
7246 tree perm_dest
, data_ref
;
7249 tree scalar_dest
= gimple_get_lhs (stmt
);
7250 if (TREE_CODE (scalar_dest
) == SSA_NAME
)
7251 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7253 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
7254 data_ref
= make_ssa_name (perm_dest
);
7256 /* Generate the permute statement. */
7257 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
7258 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
7263 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7264 inserting them on the loops preheader edge. Returns true if we
7265 were successful in doing so (and thus STMT can be moved then),
7266 otherwise returns false. */
7269 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
7275 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
7277 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
7278 if (!gimple_nop_p (def_stmt
)
7279 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
7281 /* Make sure we don't need to recurse. While we could do
7282 so in simple cases when there are more complex use webs
7283 we don't have an easy way to preserve stmt order to fulfil
7284 dependencies within them. */
7287 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
7289 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
7291 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
7292 if (!gimple_nop_p (def_stmt2
)
7293 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
7303 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
7305 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
7306 if (!gimple_nop_p (def_stmt
)
7307 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
7309 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
7310 gsi_remove (&gsi
, false);
7311 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
7318 /* vectorizable_load.
7320 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7322 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7323 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7324 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7327 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
7328 slp_tree slp_node
, slp_instance slp_node_instance
,
7329 stmt_vector_for_cost
*cost_vec
)
7332 tree vec_dest
= NULL
;
7333 tree data_ref
= NULL
;
7334 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7335 stmt_vec_info prev_stmt_info
;
7336 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7337 struct loop
*loop
= NULL
;
7338 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
7339 bool nested_in_vect_loop
= false;
7340 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
7344 gimple
*new_stmt
= NULL
;
7346 enum dr_alignment_support alignment_support_scheme
;
7347 tree dataref_ptr
= NULL_TREE
;
7348 tree dataref_offset
= NULL_TREE
;
7349 gimple
*ptr_incr
= NULL
;
7352 unsigned int group_size
;
7353 poly_uint64 group_gap_adj
;
7354 tree msq
= NULL_TREE
, lsq
;
7355 tree offset
= NULL_TREE
;
7356 tree byte_offset
= NULL_TREE
;
7357 tree realignment_token
= NULL_TREE
;
7359 vec
<tree
> dr_chain
= vNULL
;
7360 bool grouped_load
= false;
7362 gimple
*first_stmt_for_drptr
= NULL
;
7364 bool compute_in_loop
= false;
7365 struct loop
*at_loop
;
7367 bool slp
= (slp_node
!= NULL
);
7368 bool slp_perm
= false;
7369 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7372 gather_scatter_info gs_info
;
7373 vec_info
*vinfo
= stmt_info
->vinfo
;
7375 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7377 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7380 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7384 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7385 if (is_gimple_assign (stmt
))
7387 scalar_dest
= gimple_assign_lhs (stmt
);
7388 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
7391 tree_code code
= gimple_assign_rhs_code (stmt
);
7392 if (code
!= ARRAY_REF
7393 && code
!= BIT_FIELD_REF
7394 && code
!= INDIRECT_REF
7395 && code
!= COMPONENT_REF
7396 && code
!= IMAGPART_EXPR
7397 && code
!= REALPART_EXPR
7399 && TREE_CODE_CLASS (code
) != tcc_declaration
)
7404 gcall
*call
= dyn_cast
<gcall
*> (stmt
);
7405 if (!call
|| !gimple_call_internal_p (call
))
7408 internal_fn ifn
= gimple_call_internal_fn (call
);
7409 if (!internal_load_fn_p (ifn
))
7412 scalar_dest
= gimple_call_lhs (call
);
7416 if (slp_node
!= NULL
)
7418 if (dump_enabled_p ())
7419 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7420 "SLP of masked loads not supported.\n");
7424 int mask_index
= internal_fn_mask_index (ifn
);
7425 if (mask_index
>= 0)
7427 mask
= gimple_call_arg (call
, mask_index
);
7428 if (!vect_check_load_store_mask (stmt
, mask
, &mask_dt
,
7434 if (!STMT_VINFO_DATA_REF (stmt_info
))
7437 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7438 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7442 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7443 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
7444 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7449 /* Multiple types in SLP are handled by creating the appropriate number of
7450 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7455 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7457 gcc_assert (ncopies
>= 1);
7459 /* FORNOW. This restriction should be relaxed. */
7460 if (nested_in_vect_loop
&& ncopies
> 1)
7462 if (dump_enabled_p ())
7463 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7464 "multiple types in nested loop.\n");
7468 /* Invalidate assumptions made by dependence analysis when vectorization
7469 on the unrolled body effectively re-orders stmts. */
7471 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
7472 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
7473 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
7475 if (dump_enabled_p ())
7476 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7477 "cannot perform implicit CSE when unrolling "
7478 "with negative dependence distance\n");
7482 elem_type
= TREE_TYPE (vectype
);
7483 mode
= TYPE_MODE (vectype
);
7485 /* FORNOW. In some cases can vectorize even if data-type not supported
7486 (e.g. - data copies). */
7487 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
7489 if (dump_enabled_p ())
7490 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7491 "Aligned load, but unsupported type.\n");
7495 /* Check if the load is a part of an interleaving chain. */
7496 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7498 grouped_load
= true;
7500 gcc_assert (!nested_in_vect_loop
);
7501 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
7503 first_stmt
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7504 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7506 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7509 /* Invalidate assumptions made by dependence analysis when vectorization
7510 on the unrolled body effectively re-orders stmts. */
7511 if (!PURE_SLP_STMT (stmt_info
)
7512 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
7513 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
7514 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
7516 if (dump_enabled_p ())
7517 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7518 "cannot perform implicit CSE when performing "
7519 "group loads with negative dependence distance\n");
7523 /* Similarly when the stmt is a load that is both part of a SLP
7524 instance and a loop vectorized stmt via the same-dr mechanism
7525 we have to give up. */
7526 if (DR_GROUP_SAME_DR_STMT (stmt_info
)
7527 && (STMT_SLP_TYPE (stmt_info
)
7528 != STMT_SLP_TYPE (vinfo_for_stmt
7529 (DR_GROUP_SAME_DR_STMT (stmt_info
)))))
7531 if (dump_enabled_p ())
7532 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7533 "conflicting SLP types for CSEd load\n");
7540 vect_memory_access_type memory_access_type
;
7541 if (!get_load_store_type (stmt
, vectype
, slp
, mask
, VLS_LOAD
, ncopies
,
7542 &memory_access_type
, &gs_info
))
7547 if (memory_access_type
== VMAT_CONTIGUOUS
)
7549 machine_mode vec_mode
= TYPE_MODE (vectype
);
7550 if (!VECTOR_MODE_P (vec_mode
)
7551 || !can_vec_mask_load_store_p (vec_mode
,
7552 TYPE_MODE (mask_vectype
), true))
7555 else if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7557 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7559 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
7560 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
7562 if (dump_enabled_p ())
7563 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7564 "masked gather with integer mask not"
7569 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7570 && memory_access_type
!= VMAT_GATHER_SCATTER
)
7572 if (dump_enabled_p ())
7573 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7574 "unsupported access type for masked load.\n");
7579 if (!vec_stmt
) /* transformation not required. */
7582 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7585 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7586 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
7587 memory_access_type
, &gs_info
);
7589 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
7590 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
7591 slp_node_instance
, slp_node
, cost_vec
);
7596 gcc_assert (memory_access_type
7597 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7599 if (dump_enabled_p ())
7600 dump_printf_loc (MSG_NOTE
, vect_location
,
7601 "transform load. ncopies = %d\n", ncopies
);
7605 ensure_base_align (dr
);
7607 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7609 vect_build_gather_load_calls (stmt
, gsi
, vec_stmt
, &gs_info
, mask
,
7614 if (memory_access_type
== VMAT_ELEMENTWISE
7615 || memory_access_type
== VMAT_STRIDED_SLP
)
7617 gimple_stmt_iterator incr_gsi
;
7623 vec
<constructor_elt
, va_gc
> *v
= NULL
;
7624 tree stride_base
, stride_step
, alias_off
;
7625 /* Checked by get_load_store_type. */
7626 unsigned int const_nunits
= nunits
.to_constant ();
7627 unsigned HOST_WIDE_INT cst_offset
= 0;
7629 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7630 gcc_assert (!nested_in_vect_loop
);
7634 first_stmt
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7635 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7642 if (slp
&& grouped_load
)
7644 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7645 ref_type
= get_group_alias_ptr_type (first_stmt
);
7651 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
7652 * vect_get_place_in_interleaving_chain (stmt
, first_stmt
));
7654 ref_type
= reference_alias_ptr_type (DR_REF (dr
));
7658 = fold_build_pointer_plus
7659 (DR_BASE_ADDRESS (first_dr
),
7660 size_binop (PLUS_EXPR
,
7661 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
7662 convert_to_ptrofftype (DR_INIT (first_dr
))));
7663 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
7665 /* For a load with loop-invariant (but other than power-of-2)
7666 stride (i.e. not a grouped access) like so:
7668 for (i = 0; i < n; i += stride)
7671 we generate a new induction variable and new accesses to
7672 form a new vector (or vectors, depending on ncopies):
7674 for (j = 0; ; j += VF*stride)
7676 tmp2 = array[j + stride];
7678 vectemp = {tmp1, tmp2, ...}
7681 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
7682 build_int_cst (TREE_TYPE (stride_step
), vf
));
7684 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7686 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7687 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7688 create_iv (stride_base
, ivstep
, NULL
,
7689 loop
, &incr_gsi
, insert_after
,
7691 incr
= gsi_stmt (incr_gsi
);
7692 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
7694 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7696 prev_stmt_info
= NULL
;
7697 running_off
= offvar
;
7698 alias_off
= build_int_cst (ref_type
, 0);
7699 int nloads
= const_nunits
;
7701 tree ltype
= TREE_TYPE (vectype
);
7702 tree lvectype
= vectype
;
7703 auto_vec
<tree
> dr_chain
;
7704 if (memory_access_type
== VMAT_STRIDED_SLP
)
7706 if (group_size
< const_nunits
)
7708 /* First check if vec_init optab supports construction from
7709 vector elts directly. */
7710 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
7712 if (mode_for_vector (elmode
, group_size
).exists (&vmode
)
7713 && VECTOR_MODE_P (vmode
)
7714 && targetm
.vector_mode_supported_p (vmode
)
7715 && (convert_optab_handler (vec_init_optab
,
7716 TYPE_MODE (vectype
), vmode
)
7717 != CODE_FOR_nothing
))
7719 nloads
= const_nunits
/ group_size
;
7721 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
7725 /* Otherwise avoid emitting a constructor of vector elements
7726 by performing the loads using an integer type of the same
7727 size, constructing a vector of those and then
7728 re-interpreting it as the original vector type.
7729 This avoids a huge runtime penalty due to the general
7730 inability to perform store forwarding from smaller stores
7731 to a larger load. */
7733 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
7734 elmode
= int_mode_for_size (lsize
, 0).require ();
7735 unsigned int lnunits
= const_nunits
/ group_size
;
7736 /* If we can't construct such a vector fall back to
7737 element loads of the original vector type. */
7738 if (mode_for_vector (elmode
, lnunits
).exists (&vmode
)
7739 && VECTOR_MODE_P (vmode
)
7740 && targetm
.vector_mode_supported_p (vmode
)
7741 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
7742 != CODE_FOR_nothing
))
7746 ltype
= build_nonstandard_integer_type (lsize
, 1);
7747 lvectype
= build_vector_type (ltype
, nloads
);
7754 lnel
= const_nunits
;
7757 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
7759 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7760 else if (nloads
== 1)
7765 /* For SLP permutation support we need to load the whole group,
7766 not only the number of vector stmts the permutation result
7770 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7772 unsigned int const_vf
= vf
.to_constant ();
7773 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
7774 dr_chain
.create (ncopies
);
7777 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7779 unsigned int group_el
= 0;
7780 unsigned HOST_WIDE_INT
7781 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7782 for (j
= 0; j
< ncopies
; j
++)
7785 vec_alloc (v
, nloads
);
7786 for (i
= 0; i
< nloads
; i
++)
7788 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7789 group_el
* elsz
+ cst_offset
);
7790 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
7791 vect_copy_ref_info (data_ref
, DR_REF (first_dr
));
7792 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
7793 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7795 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
7796 gimple_assign_lhs (new_stmt
));
7800 || group_el
== group_size
)
7802 tree newoff
= copy_ssa_name (running_off
);
7803 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7804 running_off
, stride_step
);
7805 vect_finish_stmt_generation (stmt
, incr
, gsi
);
7807 running_off
= newoff
;
7813 tree vec_inv
= build_constructor (lvectype
, v
);
7814 new_temp
= vect_init_vector (stmt
, vec_inv
, lvectype
, gsi
);
7815 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7816 if (lvectype
!= vectype
)
7818 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
7820 build1 (VIEW_CONVERT_EXPR
,
7821 vectype
, new_temp
));
7822 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7829 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
7831 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7836 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7838 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7839 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7845 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7846 slp_node_instance
, false, &n_perms
);
7851 if (memory_access_type
== VMAT_GATHER_SCATTER
7852 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
7853 grouped_load
= false;
7857 first_stmt
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7858 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7859 /* For SLP vectorization we directly vectorize a subchain
7860 without permutation. */
7861 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7862 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7863 /* For BB vectorization always use the first stmt to base
7864 the data ref pointer on. */
7866 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7868 /* Check if the chain of loads is already vectorized. */
7869 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
7870 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7871 ??? But we can only do so if there is exactly one
7872 as we have no way to get at the rest. Leave the CSE
7874 ??? With the group load eventually participating
7875 in multiple different permutations (having multiple
7876 slp nodes which refer to the same group) the CSE
7877 is even wrong code. See PR56270. */
7880 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7883 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7886 /* VEC_NUM is the number of vect stmts to be created for this group. */
7889 grouped_load
= false;
7890 /* For SLP permutation support we need to load the whole group,
7891 not only the number of vector stmts the permutation result
7895 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7897 unsigned int const_vf
= vf
.to_constant ();
7898 unsigned int const_nunits
= nunits
.to_constant ();
7899 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
7900 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
7904 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7906 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
7910 vec_num
= group_size
;
7912 ref_type
= get_group_alias_ptr_type (first_stmt
);
7918 group_size
= vec_num
= 1;
7920 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7923 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
7924 gcc_assert (alignment_support_scheme
);
7925 vec_loop_masks
*loop_masks
7926 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7927 ? &LOOP_VINFO_MASKS (loop_vinfo
)
7929 /* Targets with store-lane instructions must not require explicit
7930 realignment. vect_supportable_dr_alignment always returns either
7931 dr_aligned or dr_unaligned_supported for masked operations. */
7932 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
7935 || alignment_support_scheme
== dr_aligned
7936 || alignment_support_scheme
== dr_unaligned_supported
);
7938 /* In case the vectorization factor (VF) is bigger than the number
7939 of elements that we can fit in a vectype (nunits), we have to generate
7940 more than one vector stmt - i.e - we need to "unroll" the
7941 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7942 from one copy of the vector stmt to the next, in the field
7943 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7944 stages to find the correct vector defs to be used when vectorizing
7945 stmts that use the defs of the current stmt. The example below
7946 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7947 need to create 4 vectorized stmts):
7949 before vectorization:
7950 RELATED_STMT VEC_STMT
7954 step 1: vectorize stmt S1:
7955 We first create the vector stmt VS1_0, and, as usual, record a
7956 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7957 Next, we create the vector stmt VS1_1, and record a pointer to
7958 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7959 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7961 RELATED_STMT VEC_STMT
7962 VS1_0: vx0 = memref0 VS1_1 -
7963 VS1_1: vx1 = memref1 VS1_2 -
7964 VS1_2: vx2 = memref2 VS1_3 -
7965 VS1_3: vx3 = memref3 - -
7966 S1: x = load - VS1_0
7969 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7970 information we recorded in RELATED_STMT field is used to vectorize
7973 /* In case of interleaving (non-unit grouped access):
7980 Vectorized loads are created in the order of memory accesses
7981 starting from the access of the first stmt of the chain:
7984 VS2: vx1 = &base + vec_size*1
7985 VS3: vx3 = &base + vec_size*2
7986 VS4: vx4 = &base + vec_size*3
7988 Then permutation statements are generated:
7990 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7991 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7994 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7995 (the order of the data-refs in the output of vect_permute_load_chain
7996 corresponds to the order of scalar stmts in the interleaving chain - see
7997 the documentation of vect_permute_load_chain()).
7998 The generation of permutation stmts and recording them in
7999 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8001 In case of both multiple types and interleaving, the vector loads and
8002 permutation stmts above are created for every copy. The result vector
8003 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8004 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8006 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8007 on a target that supports unaligned accesses (dr_unaligned_supported)
8008 we generate the following code:
8012 p = p + indx * vectype_size;
8017 Otherwise, the data reference is potentially unaligned on a target that
8018 does not support unaligned accesses (dr_explicit_realign_optimized) -
8019 then generate the following code, in which the data in each iteration is
8020 obtained by two vector loads, one from the previous iteration, and one
8021 from the current iteration:
8023 msq_init = *(floor(p1))
8024 p2 = initial_addr + VS - 1;
8025 realignment_token = call target_builtin;
8028 p2 = p2 + indx * vectype_size
8030 vec_dest = realign_load (msq, lsq, realignment_token)
8035 /* If the misalignment remains the same throughout the execution of the
8036 loop, we can create the init_addr and permutation mask at the loop
8037 preheader. Otherwise, it needs to be created inside the loop.
8038 This can only occur when vectorizing memory accesses in the inner-loop
8039 nested within an outer-loop that is being vectorized. */
8041 if (nested_in_vect_loop
8042 && !multiple_p (DR_STEP_ALIGNMENT (dr
),
8043 GET_MODE_SIZE (TYPE_MODE (vectype
))))
8045 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
8046 compute_in_loop
= true;
8049 if ((alignment_support_scheme
== dr_explicit_realign_optimized
8050 || alignment_support_scheme
== dr_explicit_realign
)
8051 && !compute_in_loop
)
8053 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
8054 alignment_support_scheme
, NULL_TREE
,
8056 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
8058 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
8059 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
8066 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8067 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
8070 tree vec_offset
= NULL_TREE
;
8071 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8073 aggr_type
= NULL_TREE
;
8076 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8078 aggr_type
= elem_type
;
8079 vect_get_strided_load_store_ops (stmt
, loop_vinfo
, &gs_info
,
8080 &bump
, &vec_offset
);
8084 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8085 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8087 aggr_type
= vectype
;
8088 bump
= vect_get_data_ptr_increment (dr
, aggr_type
, memory_access_type
);
8091 tree vec_mask
= NULL_TREE
;
8092 prev_stmt_info
= NULL
;
8093 poly_uint64 group_elt
= 0;
8094 for (j
= 0; j
< ncopies
; j
++)
8096 /* 1. Create the vector or array pointer update chain. */
8099 bool simd_lane_access_p
8100 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
8101 if (simd_lane_access_p
8102 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
8103 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
8104 && integer_zerop (DR_OFFSET (first_dr
))
8105 && integer_zerop (DR_INIT (first_dr
))
8106 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8107 get_alias_set (TREE_TYPE (ref_type
)))
8108 && (alignment_support_scheme
== dr_aligned
8109 || alignment_support_scheme
== dr_unaligned_supported
))
8111 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
8112 dataref_offset
= build_int_cst (ref_type
, 0);
8115 else if (first_stmt_for_drptr
8116 && first_stmt
!= first_stmt_for_drptr
)
8119 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
8120 at_loop
, offset
, &dummy
, gsi
,
8121 &ptr_incr
, simd_lane_access_p
,
8122 &inv_p
, byte_offset
, bump
);
8123 /* Adjust the pointer by the difference to first_stmt. */
8124 data_reference_p ptrdr
8125 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
8126 tree diff
= fold_convert (sizetype
,
8127 size_binop (MINUS_EXPR
,
8130 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8133 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8135 vect_get_gather_scatter_ops (loop
, stmt
, &gs_info
,
8136 &dataref_ptr
, &vec_offset
);
8141 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
8142 offset
, &dummy
, gsi
, &ptr_incr
,
8143 simd_lane_access_p
, &inv_p
,
8146 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
8152 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
8154 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8155 vec_offset
= vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
8158 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8161 vec_mask
= vect_get_vec_def_for_stmt_copy (mask_dt
, vec_mask
);
8164 if (grouped_load
|| slp_perm
)
8165 dr_chain
.create (vec_num
);
8167 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8171 vec_array
= create_vector_array (vectype
, vec_num
);
8173 tree final_mask
= NULL_TREE
;
8175 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8178 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8185 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8187 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
8188 tree alias_ptr
= build_int_cst (ref_type
, align
);
8189 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
8190 dataref_ptr
, alias_ptr
,
8196 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8197 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8198 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
8200 gimple_call_set_lhs (call
, vec_array
);
8201 gimple_call_set_nothrow (call
, true);
8203 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8205 /* Extract each vector into an SSA_NAME. */
8206 for (i
= 0; i
< vec_num
; i
++)
8208 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
8210 dr_chain
.quick_push (new_temp
);
8213 /* Record the mapping between SSA_NAMEs and statements. */
8214 vect_record_grouped_load_vectors (stmt
, dr_chain
);
8216 /* Record that VEC_ARRAY is now dead. */
8217 vect_clobber_variable (stmt
, gsi
, vec_array
);
8221 for (i
= 0; i
< vec_num
; i
++)
8223 tree final_mask
= NULL_TREE
;
8225 && memory_access_type
!= VMAT_INVARIANT
)
8226 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8228 vectype
, vec_num
* j
+ i
);
8230 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8234 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8237 /* 2. Create the vector-load in the loop. */
8238 switch (alignment_support_scheme
)
8241 case dr_unaligned_supported
:
8243 unsigned int align
, misalign
;
8245 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8247 tree scale
= size_int (gs_info
.scale
);
8250 call
= gimple_build_call_internal
8251 (IFN_MASK_GATHER_LOAD
, 4, dataref_ptr
,
8252 vec_offset
, scale
, final_mask
);
8254 call
= gimple_build_call_internal
8255 (IFN_GATHER_LOAD
, 3, dataref_ptr
,
8257 gimple_call_set_nothrow (call
, true);
8259 data_ref
= NULL_TREE
;
8263 align
= DR_TARGET_ALIGNMENT (dr
);
8264 if (alignment_support_scheme
== dr_aligned
)
8266 gcc_assert (aligned_access_p (first_dr
));
8269 else if (DR_MISALIGNMENT (first_dr
) == -1)
8271 align
= dr_alignment (vect_dr_behavior (first_dr
));
8275 misalign
= DR_MISALIGNMENT (first_dr
);
8276 if (dataref_offset
== NULL_TREE
8277 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8278 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
8283 align
= least_bit_hwi (misalign
| align
);
8284 tree ptr
= build_int_cst (ref_type
, align
);
8286 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
8289 gimple_call_set_nothrow (call
, true);
8291 data_ref
= NULL_TREE
;
8296 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
8299 : build_int_cst (ref_type
, 0));
8300 if (alignment_support_scheme
== dr_aligned
)
8302 else if (DR_MISALIGNMENT (first_dr
) == -1)
8303 TREE_TYPE (data_ref
)
8304 = build_aligned_type (TREE_TYPE (data_ref
),
8305 align
* BITS_PER_UNIT
);
8307 TREE_TYPE (data_ref
)
8308 = build_aligned_type (TREE_TYPE (data_ref
),
8309 TYPE_ALIGN (elem_type
));
8313 case dr_explicit_realign
:
8317 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
8319 if (compute_in_loop
)
8320 msq
= vect_setup_realignment (first_stmt
, gsi
,
8322 dr_explicit_realign
,
8325 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
8326 ptr
= copy_ssa_name (dataref_ptr
);
8328 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
8329 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
8330 new_stmt
= gimple_build_assign
8331 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
8333 (TREE_TYPE (dataref_ptr
),
8334 -(HOST_WIDE_INT
) align
));
8335 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8337 = build2 (MEM_REF
, vectype
, ptr
,
8338 build_int_cst (ref_type
, 0));
8339 vect_copy_ref_info (data_ref
, DR_REF (first_dr
));
8340 vec_dest
= vect_create_destination_var (scalar_dest
,
8342 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
8343 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8344 gimple_assign_set_lhs (new_stmt
, new_temp
);
8345 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
8346 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
8347 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8350 bump
= size_binop (MULT_EXPR
, vs
,
8351 TYPE_SIZE_UNIT (elem_type
));
8352 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
8353 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
8354 new_stmt
= gimple_build_assign
8355 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
8357 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
8358 ptr
= copy_ssa_name (ptr
, new_stmt
);
8359 gimple_assign_set_lhs (new_stmt
, ptr
);
8360 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8362 = build2 (MEM_REF
, vectype
, ptr
,
8363 build_int_cst (ref_type
, 0));
8366 case dr_explicit_realign_optimized
:
8368 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
8369 new_temp
= copy_ssa_name (dataref_ptr
);
8371 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
8372 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
8373 new_stmt
= gimple_build_assign
8374 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
8375 build_int_cst (TREE_TYPE (dataref_ptr
),
8376 -(HOST_WIDE_INT
) align
));
8377 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8379 = build2 (MEM_REF
, vectype
, new_temp
,
8380 build_int_cst (ref_type
, 0));
8386 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8387 /* DATA_REF is null if we've already built the statement. */
8390 vect_copy_ref_info (data_ref
, DR_REF (first_dr
));
8391 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
8393 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8394 gimple_set_lhs (new_stmt
, new_temp
);
8395 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8397 /* 3. Handle explicit realignment if necessary/supported.
8399 vec_dest = realign_load (msq, lsq, realignment_token) */
8400 if (alignment_support_scheme
== dr_explicit_realign_optimized
8401 || alignment_support_scheme
== dr_explicit_realign
)
8403 lsq
= gimple_assign_lhs (new_stmt
);
8404 if (!realignment_token
)
8405 realignment_token
= dataref_ptr
;
8406 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8407 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
8408 msq
, lsq
, realignment_token
);
8409 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8410 gimple_assign_set_lhs (new_stmt
, new_temp
);
8411 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8413 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
8416 if (i
== vec_num
- 1 && j
== ncopies
- 1)
8417 add_phi_arg (phi
, lsq
,
8418 loop_latch_edge (containing_loop
),
8424 /* 4. Handle invariant-load. */
8425 if (inv_p
&& !bb_vinfo
)
8427 gcc_assert (!grouped_load
);
8428 /* If we have versioned for aliasing or the loop doesn't
8429 have any data dependencies that would preclude this,
8430 then we are sure this is a loop invariant load and
8431 thus we can insert it on the preheader edge. */
8432 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8433 && !nested_in_vect_loop
8434 && hoist_defs_of_uses (stmt
, loop
))
8436 if (dump_enabled_p ())
8438 dump_printf_loc (MSG_NOTE
, vect_location
,
8439 "hoisting out of the vectorized "
8441 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8443 tree tem
= copy_ssa_name (scalar_dest
);
8444 gsi_insert_on_edge_immediate
8445 (loop_preheader_edge (loop
),
8446 gimple_build_assign (tem
,
8448 (gimple_assign_rhs1 (stmt
))));
8449 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
8450 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8451 set_vinfo_for_stmt (new_stmt
,
8452 new_stmt_vec_info (new_stmt
, vinfo
));
8456 gimple_stmt_iterator gsi2
= *gsi
;
8458 new_temp
= vect_init_vector (stmt
, scalar_dest
,
8460 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8464 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8466 tree perm_mask
= perm_mask_for_reverse (vectype
);
8467 new_temp
= permute_vec_elements (new_temp
, new_temp
,
8468 perm_mask
, stmt
, gsi
);
8469 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8472 /* Collect vector loads and later create their permutation in
8473 vect_transform_grouped_load (). */
8474 if (grouped_load
|| slp_perm
)
8475 dr_chain
.quick_push (new_temp
);
8477 /* Store vector loads in the corresponding SLP_NODE. */
8478 if (slp
&& !slp_perm
)
8479 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8481 /* With SLP permutation we load the gaps as well, without
8482 we need to skip the gaps after we manage to fully load
8483 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8484 group_elt
+= nunits
;
8485 if (maybe_ne (group_gap_adj
, 0U)
8487 && known_eq (group_elt
, group_size
- group_gap_adj
))
8489 poly_wide_int bump_val
8490 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
8492 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
8493 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8498 /* Bump the vector pointer to account for a gap or for excess
8499 elements loaded for a permuted SLP load. */
8500 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
8502 poly_wide_int bump_val
8503 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
8505 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
8506 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8511 if (slp
&& !slp_perm
)
8517 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
8518 slp_node_instance
, false,
8521 dr_chain
.release ();
8529 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
8530 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
8531 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8536 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8538 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8539 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8542 dr_chain
.release ();
8548 /* Function vect_is_simple_cond.
8551 LOOP - the loop that is being vectorized.
8552 COND - Condition that is checked for simple use.
8555 *COMP_VECTYPE - the vector type for the comparison.
8556 *DTS - The def types for the arguments of the comparison
8558 Returns whether a COND can be vectorized. Checks whether
8559 condition operands are supportable using vec_is_simple_use. */
8562 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
8563 tree
*comp_vectype
, enum vect_def_type
*dts
,
8567 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8570 if (TREE_CODE (cond
) == SSA_NAME
8571 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
8573 if (!vect_is_simple_use (cond
, vinfo
, &dts
[0], comp_vectype
)
8575 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
8580 if (!COMPARISON_CLASS_P (cond
))
8583 lhs
= TREE_OPERAND (cond
, 0);
8584 rhs
= TREE_OPERAND (cond
, 1);
8586 if (TREE_CODE (lhs
) == SSA_NAME
)
8588 if (!vect_is_simple_use (lhs
, vinfo
, &dts
[0], &vectype1
))
8591 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
8592 || TREE_CODE (lhs
) == FIXED_CST
)
8593 dts
[0] = vect_constant_def
;
8597 if (TREE_CODE (rhs
) == SSA_NAME
)
8599 if (!vect_is_simple_use (rhs
, vinfo
, &dts
[1], &vectype2
))
8602 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
8603 || TREE_CODE (rhs
) == FIXED_CST
)
8604 dts
[1] = vect_constant_def
;
8608 if (vectype1
&& vectype2
8609 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
8610 TYPE_VECTOR_SUBPARTS (vectype2
)))
8613 *comp_vectype
= vectype1
? vectype1
: vectype2
;
8614 /* Invariant comparison. */
8615 if (! *comp_vectype
&& vectype
)
8617 tree scalar_type
= TREE_TYPE (lhs
);
8618 /* If we can widen the comparison to match vectype do so. */
8619 if (INTEGRAL_TYPE_P (scalar_type
)
8620 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
8621 TYPE_SIZE (TREE_TYPE (vectype
))))
8622 scalar_type
= build_nonstandard_integer_type
8623 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype
))),
8624 TYPE_UNSIGNED (scalar_type
));
8625 *comp_vectype
= get_vectype_for_scalar_type (scalar_type
);
8631 /* vectorizable_condition.
8633 Check if STMT is conditional modify expression that can be vectorized.
8634 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8635 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8638 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8639 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
8640 else clause if it is 2).
8642 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8645 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8646 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
8647 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
8649 tree scalar_dest
= NULL_TREE
;
8650 tree vec_dest
= NULL_TREE
;
8651 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
8652 tree then_clause
, else_clause
;
8653 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8654 tree comp_vectype
= NULL_TREE
;
8655 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
8656 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
8659 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8660 enum vect_def_type dts
[4]
8661 = {vect_unknown_def_type
, vect_unknown_def_type
,
8662 vect_unknown_def_type
, vect_unknown_def_type
};
8665 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8666 stmt_vec_info prev_stmt_info
= NULL
;
8668 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8669 vec
<tree
> vec_oprnds0
= vNULL
;
8670 vec
<tree
> vec_oprnds1
= vNULL
;
8671 vec
<tree
> vec_oprnds2
= vNULL
;
8672 vec
<tree
> vec_oprnds3
= vNULL
;
8674 bool masked
= false;
8676 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
8679 vect_reduction_type reduction_type
8680 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
);
8681 if (reduction_type
== TREE_CODE_REDUCTION
)
8683 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8686 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8687 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8691 /* FORNOW: not yet supported. */
8692 if (STMT_VINFO_LIVE_P (stmt_info
))
8694 if (dump_enabled_p ())
8695 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8696 "value used after loop.\n");
8701 /* Is vectorizable conditional operation? */
8702 if (!is_gimple_assign (stmt
))
8705 code
= gimple_assign_rhs_code (stmt
);
8707 if (code
!= COND_EXPR
)
8710 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8711 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8716 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8718 gcc_assert (ncopies
>= 1);
8719 if (reduc_index
&& ncopies
> 1)
8720 return false; /* FORNOW */
8722 cond_expr
= gimple_assign_rhs1 (stmt
);
8723 then_clause
= gimple_assign_rhs2 (stmt
);
8724 else_clause
= gimple_assign_rhs3 (stmt
);
8726 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
8727 &comp_vectype
, &dts
[0], slp_node
? NULL
: vectype
)
8731 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &dts
[2], &vectype1
))
8733 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &dts
[3], &vectype2
))
8736 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
8739 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
8742 masked
= !COMPARISON_CLASS_P (cond_expr
);
8743 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
8745 if (vec_cmp_type
== NULL_TREE
)
8748 cond_code
= TREE_CODE (cond_expr
);
8751 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
8752 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
8755 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
8757 /* Boolean values may have another representation in vectors
8758 and therefore we prefer bit operations over comparison for
8759 them (which also works for scalar masks). We store opcodes
8760 to use in bitop1 and bitop2. Statement is vectorized as
8761 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8762 depending on bitop1 and bitop2 arity. */
8766 bitop1
= BIT_NOT_EXPR
;
8767 bitop2
= BIT_AND_EXPR
;
8770 bitop1
= BIT_NOT_EXPR
;
8771 bitop2
= BIT_IOR_EXPR
;
8774 bitop1
= BIT_NOT_EXPR
;
8775 bitop2
= BIT_AND_EXPR
;
8776 std::swap (cond_expr0
, cond_expr1
);
8779 bitop1
= BIT_NOT_EXPR
;
8780 bitop2
= BIT_IOR_EXPR
;
8781 std::swap (cond_expr0
, cond_expr1
);
8784 bitop1
= BIT_XOR_EXPR
;
8787 bitop1
= BIT_XOR_EXPR
;
8788 bitop2
= BIT_NOT_EXPR
;
8793 cond_code
= SSA_NAME
;
8798 if (bitop1
!= NOP_EXPR
)
8800 machine_mode mode
= TYPE_MODE (comp_vectype
);
8803 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
8804 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8807 if (bitop2
!= NOP_EXPR
)
8809 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
8811 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8815 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
8818 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
8819 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, slp_node
,
8830 vec_oprnds0
.create (1);
8831 vec_oprnds1
.create (1);
8832 vec_oprnds2
.create (1);
8833 vec_oprnds3
.create (1);
8837 scalar_dest
= gimple_assign_lhs (stmt
);
8838 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
8839 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8841 /* Handle cond expr. */
8842 for (j
= 0; j
< ncopies
; j
++)
8844 gimple
*new_stmt
= NULL
;
8849 auto_vec
<tree
, 4> ops
;
8850 auto_vec
<vec
<tree
>, 4> vec_defs
;
8853 ops
.safe_push (cond_expr
);
8856 ops
.safe_push (cond_expr0
);
8857 ops
.safe_push (cond_expr1
);
8859 ops
.safe_push (then_clause
);
8860 ops
.safe_push (else_clause
);
8861 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8862 vec_oprnds3
= vec_defs
.pop ();
8863 vec_oprnds2
= vec_defs
.pop ();
8865 vec_oprnds1
= vec_defs
.pop ();
8866 vec_oprnds0
= vec_defs
.pop ();
8873 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
8875 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
, &dts
[0]);
8880 = vect_get_vec_def_for_operand (cond_expr0
,
8881 stmt
, comp_vectype
);
8882 vect_is_simple_use (cond_expr0
, loop_vinfo
, &dts
[0]);
8885 = vect_get_vec_def_for_operand (cond_expr1
,
8886 stmt
, comp_vectype
);
8887 vect_is_simple_use (cond_expr1
, loop_vinfo
, &dts
[1]);
8889 if (reduc_index
== 1)
8890 vec_then_clause
= reduc_def
;
8893 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
8895 vect_is_simple_use (then_clause
, loop_vinfo
, &dts
[2]);
8897 if (reduc_index
== 2)
8898 vec_else_clause
= reduc_def
;
8901 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
8903 vect_is_simple_use (else_clause
, loop_vinfo
, &dts
[3]);
8910 = vect_get_vec_def_for_stmt_copy (dts
[0],
8911 vec_oprnds0
.pop ());
8914 = vect_get_vec_def_for_stmt_copy (dts
[1],
8915 vec_oprnds1
.pop ());
8917 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
8918 vec_oprnds2
.pop ());
8919 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
8920 vec_oprnds3
.pop ());
8925 vec_oprnds0
.quick_push (vec_cond_lhs
);
8927 vec_oprnds1
.quick_push (vec_cond_rhs
);
8928 vec_oprnds2
.quick_push (vec_then_clause
);
8929 vec_oprnds3
.quick_push (vec_else_clause
);
8932 /* Arguments are ready. Create the new vector stmt. */
8933 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
8935 vec_then_clause
= vec_oprnds2
[i
];
8936 vec_else_clause
= vec_oprnds3
[i
];
8939 vec_compare
= vec_cond_lhs
;
8942 vec_cond_rhs
= vec_oprnds1
[i
];
8943 if (bitop1
== NOP_EXPR
)
8944 vec_compare
= build2 (cond_code
, vec_cmp_type
,
8945 vec_cond_lhs
, vec_cond_rhs
);
8948 new_temp
= make_ssa_name (vec_cmp_type
);
8949 if (bitop1
== BIT_NOT_EXPR
)
8950 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
8954 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
8956 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8957 if (bitop2
== NOP_EXPR
)
8958 vec_compare
= new_temp
;
8959 else if (bitop2
== BIT_NOT_EXPR
)
8961 /* Instead of doing ~x ? y : z do x ? z : y. */
8962 vec_compare
= new_temp
;
8963 std::swap (vec_then_clause
, vec_else_clause
);
8967 vec_compare
= make_ssa_name (vec_cmp_type
);
8969 = gimple_build_assign (vec_compare
, bitop2
,
8970 vec_cond_lhs
, new_temp
);
8971 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8975 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
8977 if (!is_gimple_val (vec_compare
))
8979 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
8980 new_stmt
= gimple_build_assign (vec_compare_name
,
8982 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8983 vec_compare
= vec_compare_name
;
8985 gcc_assert (reduc_index
== 2);
8986 new_stmt
= gimple_build_call_internal
8987 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
8989 gimple_call_set_lhs (new_stmt
, scalar_dest
);
8990 SSA_NAME_DEF_STMT (scalar_dest
) = new_stmt
;
8991 if (stmt
== gsi_stmt (*gsi
))
8992 vect_finish_replace_stmt (stmt
, new_stmt
);
8995 /* In this case we're moving the definition to later in the
8996 block. That doesn't matter because the only uses of the
8997 lhs are in phi statements. */
8998 gimple_stmt_iterator old_gsi
= gsi_for_stmt (stmt
);
8999 gsi_remove (&old_gsi
, true);
9000 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
9005 new_temp
= make_ssa_name (vec_dest
);
9006 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
9007 vec_compare
, vec_then_clause
,
9009 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
9012 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9019 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
9021 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
9023 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
9026 vec_oprnds0
.release ();
9027 vec_oprnds1
.release ();
9028 vec_oprnds2
.release ();
9029 vec_oprnds3
.release ();
9034 /* vectorizable_comparison.
9036 Check if STMT is comparison expression that can be vectorized.
9037 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
9038 comparison, put it in VEC_STMT, and insert it at GSI.
9040 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
9043 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
9044 gimple
**vec_stmt
, tree reduc_def
,
9045 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
9047 tree lhs
, rhs1
, rhs2
;
9048 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9049 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9050 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9051 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
9053 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9054 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
9058 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
9059 stmt_vec_info prev_stmt_info
= NULL
;
9061 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
9062 vec
<tree
> vec_oprnds0
= vNULL
;
9063 vec
<tree
> vec_oprnds1
= vNULL
;
9067 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9070 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
9073 mask_type
= vectype
;
9074 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
9079 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9081 gcc_assert (ncopies
>= 1);
9082 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
9083 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
9087 if (STMT_VINFO_LIVE_P (stmt_info
))
9089 if (dump_enabled_p ())
9090 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9091 "value used after loop.\n");
9095 if (!is_gimple_assign (stmt
))
9098 code
= gimple_assign_rhs_code (stmt
);
9100 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
9103 rhs1
= gimple_assign_rhs1 (stmt
);
9104 rhs2
= gimple_assign_rhs2 (stmt
);
9106 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &dts
[0], &vectype1
))
9109 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &dts
[1], &vectype2
))
9112 if (vectype1
&& vectype2
9113 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
9114 TYPE_VECTOR_SUBPARTS (vectype2
)))
9117 vectype
= vectype1
? vectype1
: vectype2
;
9119 /* Invariant comparison. */
9122 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
9123 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
9126 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
9129 /* Can't compare mask and non-mask types. */
9130 if (vectype1
&& vectype2
9131 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
9134 /* Boolean values may have another representation in vectors
9135 and therefore we prefer bit operations over comparison for
9136 them (which also works for scalar masks). We store opcodes
9137 to use in bitop1 and bitop2. Statement is vectorized as
9138 BITOP2 (rhs1 BITOP1 rhs2) or
9139 rhs1 BITOP2 (BITOP1 rhs2)
9140 depending on bitop1 and bitop2 arity. */
9141 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
9143 if (code
== GT_EXPR
)
9145 bitop1
= BIT_NOT_EXPR
;
9146 bitop2
= BIT_AND_EXPR
;
9148 else if (code
== GE_EXPR
)
9150 bitop1
= BIT_NOT_EXPR
;
9151 bitop2
= BIT_IOR_EXPR
;
9153 else if (code
== LT_EXPR
)
9155 bitop1
= BIT_NOT_EXPR
;
9156 bitop2
= BIT_AND_EXPR
;
9157 std::swap (rhs1
, rhs2
);
9158 std::swap (dts
[0], dts
[1]);
9160 else if (code
== LE_EXPR
)
9162 bitop1
= BIT_NOT_EXPR
;
9163 bitop2
= BIT_IOR_EXPR
;
9164 std::swap (rhs1
, rhs2
);
9165 std::swap (dts
[0], dts
[1]);
9169 bitop1
= BIT_XOR_EXPR
;
9170 if (code
== EQ_EXPR
)
9171 bitop2
= BIT_NOT_EXPR
;
9177 if (bitop1
== NOP_EXPR
)
9179 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
9184 machine_mode mode
= TYPE_MODE (vectype
);
9187 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
9188 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9191 if (bitop2
!= NOP_EXPR
)
9193 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
9194 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9199 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
9200 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
9201 dts
, ndts
, slp_node
, cost_vec
);
9208 vec_oprnds0
.create (1);
9209 vec_oprnds1
.create (1);
9213 lhs
= gimple_assign_lhs (stmt
);
9214 mask
= vect_create_destination_var (lhs
, mask_type
);
9216 /* Handle cmp expr. */
9217 for (j
= 0; j
< ncopies
; j
++)
9219 gassign
*new_stmt
= NULL
;
9224 auto_vec
<tree
, 2> ops
;
9225 auto_vec
<vec
<tree
>, 2> vec_defs
;
9227 ops
.safe_push (rhs1
);
9228 ops
.safe_push (rhs2
);
9229 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
9230 vec_oprnds1
= vec_defs
.pop ();
9231 vec_oprnds0
= vec_defs
.pop ();
9235 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
9236 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
9241 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
9242 vec_oprnds0
.pop ());
9243 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
9244 vec_oprnds1
.pop ());
9249 vec_oprnds0
.quick_push (vec_rhs1
);
9250 vec_oprnds1
.quick_push (vec_rhs2
);
9253 /* Arguments are ready. Create the new vector stmt. */
9254 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
9256 vec_rhs2
= vec_oprnds1
[i
];
9258 new_temp
= make_ssa_name (mask
);
9259 if (bitop1
== NOP_EXPR
)
9261 new_stmt
= gimple_build_assign (new_temp
, code
,
9262 vec_rhs1
, vec_rhs2
);
9263 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
9267 if (bitop1
== BIT_NOT_EXPR
)
9268 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
9270 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
9272 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
9273 if (bitop2
!= NOP_EXPR
)
9275 tree res
= make_ssa_name (mask
);
9276 if (bitop2
== BIT_NOT_EXPR
)
9277 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
9279 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
9281 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
9285 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9292 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
9294 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
9296 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
9299 vec_oprnds0
.release ();
9300 vec_oprnds1
.release ();
9305 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9306 can handle all live statements in the node. Otherwise return true
9307 if STMT is not live or if vectorizable_live_operation can handle it.
9308 GSI and VEC_STMT are as for vectorizable_live_operation. */
9311 can_vectorize_live_stmts (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
9312 slp_tree slp_node
, gimple
**vec_stmt
,
9313 stmt_vector_for_cost
*cost_vec
)
9319 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt
)
9321 stmt_vec_info slp_stmt_info
= vinfo_for_stmt (slp_stmt
);
9322 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
9323 && !vectorizable_live_operation (slp_stmt
, gsi
, slp_node
, i
,
9324 vec_stmt
, cost_vec
))
9328 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt
))
9329 && !vectorizable_live_operation (stmt
, gsi
, slp_node
, -1, vec_stmt
,
9336 /* Make sure the statement is vectorizable. */
9339 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
,
9340 slp_instance node_instance
, stmt_vector_for_cost
*cost_vec
)
9342 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9343 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
9344 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
9346 gimple
*pattern_stmt
;
9347 gimple_seq pattern_def_seq
;
9349 if (dump_enabled_p ())
9351 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
9352 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
9355 if (gimple_has_volatile_ops (stmt
))
9357 if (dump_enabled_p ())
9358 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9359 "not vectorized: stmt has volatile operands\n");
9364 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9366 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
9368 gimple_stmt_iterator si
;
9370 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
9372 gimple
*pattern_def_stmt
= gsi_stmt (si
);
9373 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
9374 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
9376 /* Analyze def stmt of STMT if it's a pattern stmt. */
9377 if (dump_enabled_p ())
9379 dump_printf_loc (MSG_NOTE
, vect_location
,
9380 "==> examining pattern def statement: ");
9381 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
9384 if (!vect_analyze_stmt (pattern_def_stmt
,
9385 need_to_vectorize
, node
, node_instance
,
9392 /* Skip stmts that do not need to be vectorized. In loops this is expected
9394 - the COND_EXPR which is the loop exit condition
9395 - any LABEL_EXPRs in the loop
9396 - computations that are used only for array indexing or loop control.
9397 In basic blocks we only analyze statements that are a part of some SLP
9398 instance, therefore, all the statements are relevant.
9400 Pattern statement needs to be analyzed instead of the original statement
9401 if the original statement is not relevant. Otherwise, we analyze both
9402 statements. In basic blocks we are called from some SLP instance
9403 traversal, don't analyze pattern stmts instead, the pattern stmts
9404 already will be part of SLP instance. */
9406 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
9407 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
9408 && !STMT_VINFO_LIVE_P (stmt_info
))
9410 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9412 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
9413 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
9415 /* Analyze PATTERN_STMT instead of the original stmt. */
9416 stmt
= pattern_stmt
;
9417 stmt_info
= vinfo_for_stmt (pattern_stmt
);
9418 if (dump_enabled_p ())
9420 dump_printf_loc (MSG_NOTE
, vect_location
,
9421 "==> examining pattern statement: ");
9422 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
9427 if (dump_enabled_p ())
9428 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
9433 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9436 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
9437 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
9439 /* Analyze PATTERN_STMT too. */
9440 if (dump_enabled_p ())
9442 dump_printf_loc (MSG_NOTE
, vect_location
,
9443 "==> examining pattern statement: ");
9444 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
9447 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
,
9448 node_instance
, cost_vec
))
9452 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
9454 case vect_internal_def
:
9457 case vect_reduction_def
:
9458 case vect_nested_cycle
:
9459 gcc_assert (!bb_vinfo
9460 && (relevance
== vect_used_in_outer
9461 || relevance
== vect_used_in_outer_by_reduction
9462 || relevance
== vect_used_by_reduction
9463 || relevance
== vect_unused_in_scope
9464 || relevance
== vect_used_only_live
));
9467 case vect_induction_def
:
9468 gcc_assert (!bb_vinfo
);
9471 case vect_constant_def
:
9472 case vect_external_def
:
9473 case vect_unknown_def_type
:
9478 if (STMT_VINFO_RELEVANT_P (stmt_info
))
9480 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
9481 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
9482 || (is_gimple_call (stmt
)
9483 && gimple_call_lhs (stmt
) == NULL_TREE
));
9484 *need_to_vectorize
= true;
9487 if (PURE_SLP_STMT (stmt_info
) && !node
)
9489 dump_printf_loc (MSG_NOTE
, vect_location
,
9490 "handled only by SLP analysis\n");
9496 && (STMT_VINFO_RELEVANT_P (stmt_info
)
9497 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
9498 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
, cost_vec
)
9499 || vectorizable_conversion (stmt
, NULL
, NULL
, node
, cost_vec
)
9500 || vectorizable_shift (stmt
, NULL
, NULL
, node
, cost_vec
)
9501 || vectorizable_operation (stmt
, NULL
, NULL
, node
, cost_vec
)
9502 || vectorizable_assignment (stmt
, NULL
, NULL
, node
, cost_vec
)
9503 || vectorizable_load (stmt
, NULL
, NULL
, node
, node_instance
, cost_vec
)
9504 || vectorizable_call (stmt
, NULL
, NULL
, node
, cost_vec
)
9505 || vectorizable_store (stmt
, NULL
, NULL
, node
, cost_vec
)
9506 || vectorizable_reduction (stmt
, NULL
, NULL
, node
, node_instance
,
9508 || vectorizable_induction (stmt
, NULL
, NULL
, node
, cost_vec
)
9509 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
, cost_vec
)
9510 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
, cost_vec
));
9514 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
, cost_vec
)
9515 || vectorizable_conversion (stmt
, NULL
, NULL
, node
, cost_vec
)
9516 || vectorizable_shift (stmt
, NULL
, NULL
, node
, cost_vec
)
9517 || vectorizable_operation (stmt
, NULL
, NULL
, node
, cost_vec
)
9518 || vectorizable_assignment (stmt
, NULL
, NULL
, node
, cost_vec
)
9519 || vectorizable_load (stmt
, NULL
, NULL
, node
, node_instance
,
9521 || vectorizable_call (stmt
, NULL
, NULL
, node
, cost_vec
)
9522 || vectorizable_store (stmt
, NULL
, NULL
, node
, cost_vec
)
9523 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
,
9525 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
,
9531 if (dump_enabled_p ())
9533 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9534 "not vectorized: relevant stmt not ");
9535 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
9536 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
9542 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9543 need extra handling, except for vectorizable reductions. */
9545 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
9546 && !can_vectorize_live_stmts (stmt
, NULL
, node
, NULL
, cost_vec
))
9548 if (dump_enabled_p ())
9550 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9551 "not vectorized: live stmt not supported: ");
9552 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
9562 /* Function vect_transform_stmt.
9564 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9567 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
9568 bool *grouped_store
, slp_tree slp_node
,
9569 slp_instance slp_node_instance
)
9571 bool is_store
= false;
9572 gimple
*vec_stmt
= NULL
;
9573 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9576 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
9577 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9579 bool nested_p
= (STMT_VINFO_LOOP_VINFO (stmt_info
)
9580 && nested_in_vect_loop_p
9581 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info
)),
9584 switch (STMT_VINFO_TYPE (stmt_info
))
9586 case type_demotion_vec_info_type
:
9587 case type_promotion_vec_info_type
:
9588 case type_conversion_vec_info_type
:
9589 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
, NULL
);
9593 case induc_vec_info_type
:
9594 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
, slp_node
, NULL
);
9598 case shift_vec_info_type
:
9599 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
, NULL
);
9603 case op_vec_info_type
:
9604 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
, NULL
);
9608 case assignment_vec_info_type
:
9609 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
, NULL
);
9613 case load_vec_info_type
:
9614 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
9615 slp_node_instance
, NULL
);
9619 case store_vec_info_type
:
9620 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
, NULL
);
9622 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
9624 /* In case of interleaving, the whole chain is vectorized when the
9625 last store in the chain is reached. Store stmts before the last
9626 one are skipped, and there vec_stmt_info shouldn't be freed
9628 *grouped_store
= true;
9629 stmt_vec_info group_info
9630 = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info
));
9631 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
9638 case condition_vec_info_type
:
9639 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
, NULL
);
9643 case comparison_vec_info_type
:
9644 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
, NULL
);
9648 case call_vec_info_type
:
9649 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
, NULL
);
9650 stmt
= gsi_stmt (*gsi
);
9653 case call_simd_clone_vec_info_type
:
9654 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
, NULL
);
9655 stmt
= gsi_stmt (*gsi
);
9658 case reduc_vec_info_type
:
9659 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
,
9660 slp_node_instance
, NULL
);
9665 if (!STMT_VINFO_LIVE_P (stmt_info
))
9667 if (dump_enabled_p ())
9668 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9669 "stmt not supported.\n");
9674 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9675 This would break hybrid SLP vectorization. */
9677 gcc_assert (!vec_stmt
9678 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
9680 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9681 is being vectorized, but outside the immediately enclosing loop. */
9684 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
9685 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
9686 || STMT_VINFO_RELEVANT (stmt_info
) ==
9687 vect_used_in_outer_by_reduction
))
9689 struct loop
*innerloop
= LOOP_VINFO_LOOP (
9690 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
9691 imm_use_iterator imm_iter
;
9692 use_operand_p use_p
;
9696 if (dump_enabled_p ())
9697 dump_printf_loc (MSG_NOTE
, vect_location
,
9698 "Record the vdef for outer-loop vectorization.\n");
9700 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9701 (to be used when vectorizing outer-loop stmts that use the DEF of
9703 if (gimple_code (stmt
) == GIMPLE_PHI
)
9704 scalar_dest
= PHI_RESULT (stmt
);
9706 scalar_dest
= gimple_assign_lhs (stmt
);
9708 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
9710 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
9712 exit_phi
= USE_STMT (use_p
);
9713 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
9718 /* Handle stmts whose DEF is used outside the loop-nest that is
9719 being vectorized. */
9720 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
9722 done
= can_vectorize_live_stmts (stmt
, gsi
, slp_node
, &vec_stmt
, NULL
);
9727 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
9733 /* Remove a group of stores (for SLP or interleaving), free their
9737 vect_remove_stores (gimple
*first_stmt
)
9739 gimple
*next
= first_stmt
;
9741 gimple_stmt_iterator next_si
;
9745 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
9747 tmp
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
9748 if (is_pattern_stmt_p (stmt_info
))
9749 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
9750 /* Free the attached stmt_vec_info and remove the stmt. */
9751 next_si
= gsi_for_stmt (next
);
9752 unlink_stmt_vdef (next
);
9753 gsi_remove (&next_si
, true);
9754 release_defs (next
);
9755 free_stmt_vec_info (next
);
9761 /* Function new_stmt_vec_info.
9763 Create and initialize a new stmt_vec_info struct for STMT. */
9766 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
9769 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
9771 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
9772 STMT_VINFO_STMT (res
) = stmt
;
9774 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
9775 STMT_VINFO_LIVE_P (res
) = false;
9776 STMT_VINFO_VECTYPE (res
) = NULL
;
9777 STMT_VINFO_VEC_STMT (res
) = NULL
;
9778 STMT_VINFO_VECTORIZABLE (res
) = true;
9779 STMT_VINFO_IN_PATTERN_P (res
) = false;
9780 STMT_VINFO_RELATED_STMT (res
) = NULL
;
9781 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
9782 STMT_VINFO_DATA_REF (res
) = NULL
;
9783 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
9784 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res
) = ERROR_MARK
;
9786 if (gimple_code (stmt
) == GIMPLE_PHI
9787 && is_loop_header_bb_p (gimple_bb (stmt
)))
9788 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
9790 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
9792 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
9793 STMT_SLP_TYPE (res
) = loop_vect
;
9794 STMT_VINFO_NUM_SLP_USES (res
) = 0;
9796 res
->first_element
= NULL
; /* GROUP_FIRST_ELEMENT */
9797 res
->next_element
= NULL
; /* GROUP_NEXT_ELEMENT */
9798 res
->size
= 0; /* GROUP_SIZE */
9799 res
->store_count
= 0; /* GROUP_STORE_COUNT */
9800 res
->gap
= 0; /* GROUP_GAP */
9801 res
->same_dr_stmt
= NULL
; /* GROUP_SAME_DR_STMT */
9803 /* This is really "uninitialized" until vect_compute_data_ref_alignment. */
9804 res
->dr_aux
.misalignment
= DR_MISALIGNMENT_UNINITIALIZED
;
9810 /* Set the current stmt_vec_info vector to V. */
9813 set_stmt_vec_info_vec (vec
<stmt_vec_info
> *v
)
9815 stmt_vec_info_vec
= v
;
9818 /* Free the stmt_vec_info entries in V and release V. */
9821 free_stmt_vec_infos (vec
<stmt_vec_info
> *v
)
9825 FOR_EACH_VEC_ELT (*v
, i
, info
)
9827 free_stmt_vec_info (STMT_VINFO_STMT (info
));
9828 if (v
== stmt_vec_info_vec
)
9829 stmt_vec_info_vec
= NULL
;
9834 /* Free stmt vectorization related info. */
9837 free_stmt_vec_info (gimple
*stmt
)
9839 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9844 /* Check if this statement has a related "pattern stmt"
9845 (introduced by the vectorizer during the pattern recognition
9846 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9848 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
9850 if (gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
))
9851 for (gimple_stmt_iterator si
= gsi_start (seq
);
9852 !gsi_end_p (si
); gsi_next (&si
))
9854 gimple
*seq_stmt
= gsi_stmt (si
);
9855 gimple_set_bb (seq_stmt
, NULL
);
9856 tree lhs
= gimple_get_lhs (seq_stmt
);
9857 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9858 release_ssa_name (lhs
);
9859 free_stmt_vec_info (seq_stmt
);
9861 stmt_vec_info patt_info
9862 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9865 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
9866 gimple_set_bb (patt_stmt
, NULL
);
9867 tree lhs
= gimple_get_lhs (patt_stmt
);
9868 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9869 release_ssa_name (lhs
);
9870 free_stmt_vec_info (patt_stmt
);
9874 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
9875 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
9876 set_vinfo_for_stmt (stmt
, NULL
);
9881 /* Function get_vectype_for_scalar_type_and_size.
9883 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9887 get_vectype_for_scalar_type_and_size (tree scalar_type
, poly_uint64 size
)
9889 tree orig_scalar_type
= scalar_type
;
9890 scalar_mode inner_mode
;
9891 machine_mode simd_mode
;
9895 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
9896 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
9899 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
9901 /* For vector types of elements whose mode precision doesn't
9902 match their types precision we use a element type of mode
9903 precision. The vectorization routines will have to make sure
9904 they support the proper result truncation/extension.
9905 We also make sure to build vector types with INTEGER_TYPE
9906 component type only. */
9907 if (INTEGRAL_TYPE_P (scalar_type
)
9908 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
9909 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
9910 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
9911 TYPE_UNSIGNED (scalar_type
));
9913 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9914 When the component mode passes the above test simply use a type
9915 corresponding to that mode. The theory is that any use that
9916 would cause problems with this will disable vectorization anyway. */
9917 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
9918 && !INTEGRAL_TYPE_P (scalar_type
))
9919 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
9921 /* We can't build a vector type of elements with alignment bigger than
9923 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
9924 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
9925 TYPE_UNSIGNED (scalar_type
));
9927 /* If we felt back to using the mode fail if there was
9928 no scalar type for it. */
9929 if (scalar_type
== NULL_TREE
)
9932 /* If no size was supplied use the mode the target prefers. Otherwise
9933 lookup a vector mode of the specified size. */
9934 if (known_eq (size
, 0U))
9935 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
9936 else if (!multiple_p (size
, nbytes
, &nunits
)
9937 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
9939 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9940 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
))
9943 vectype
= build_vector_type (scalar_type
, nunits
);
9945 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
9946 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
9949 /* Re-attach the address-space qualifier if we canonicalized the scalar
9951 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
9952 return build_qualified_type
9953 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
9958 poly_uint64 current_vector_size
;
9960 /* Function get_vectype_for_scalar_type.
9962 Returns the vector type corresponding to SCALAR_TYPE as supported
9966 get_vectype_for_scalar_type (tree scalar_type
)
9969 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
9970 current_vector_size
);
9972 && known_eq (current_vector_size
, 0U))
9973 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
9977 /* Function get_mask_type_for_scalar_type.
9979 Returns the mask type corresponding to a result of comparison
9980 of vectors of specified SCALAR_TYPE as supported by target. */
9983 get_mask_type_for_scalar_type (tree scalar_type
)
9985 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
9990 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
9991 current_vector_size
);
9994 /* Function get_same_sized_vectype
9996 Returns a vector type corresponding to SCALAR_TYPE of size
9997 VECTOR_TYPE if supported by the target. */
10000 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
10002 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10003 return build_same_sized_truth_vector_type (vector_type
);
10005 return get_vectype_for_scalar_type_and_size
10006 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
10009 /* Function vect_is_simple_use.
10012 VINFO - the vect info of the loop or basic block that is being vectorized.
10013 OPERAND - operand in the loop or bb.
10015 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME.
10016 DT - the type of definition
10018 Returns whether a stmt with OPERAND can be vectorized.
10019 For loops, supportable operands are constants, loop invariants, and operands
10020 that are defined by the current iteration of the loop. Unsupportable
10021 operands are those that are defined by a previous iteration of the loop (as
10022 is the case in reduction/induction computations).
10023 For basic blocks, supportable operands are constants and bb invariants.
10024 For now, operands defined outside the basic block are not supported. */
10027 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
10028 gimple
**def_stmt_out
)
10031 *def_stmt_out
= NULL
;
10032 *dt
= vect_unknown_def_type
;
10034 if (dump_enabled_p ())
10036 dump_printf_loc (MSG_NOTE
, vect_location
,
10037 "vect_is_simple_use: operand ");
10038 if (TREE_CODE (operand
) == SSA_NAME
10039 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
10040 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
10042 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
10045 if (CONSTANT_CLASS_P (operand
))
10046 *dt
= vect_constant_def
;
10047 else if (is_gimple_min_invariant (operand
))
10048 *dt
= vect_external_def
;
10049 else if (TREE_CODE (operand
) != SSA_NAME
)
10050 *dt
= vect_unknown_def_type
;
10051 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
10052 *dt
= vect_external_def
;
10055 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
10056 if (! vect_stmt_in_region_p (vinfo
, def_stmt
))
10057 *dt
= vect_external_def
;
10060 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (def_stmt
);
10061 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
10063 def_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
10064 stmt_vinfo
= vinfo_for_stmt (def_stmt
);
10066 switch (gimple_code (def_stmt
))
10069 case GIMPLE_ASSIGN
:
10071 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
10074 *dt
= vect_unknown_def_type
;
10079 *def_stmt_out
= def_stmt
;
10082 if (dump_enabled_p ())
10084 dump_printf (MSG_NOTE
, ", type of def: ");
10087 case vect_uninitialized_def
:
10088 dump_printf (MSG_NOTE
, "uninitialized\n");
10090 case vect_constant_def
:
10091 dump_printf (MSG_NOTE
, "constant\n");
10093 case vect_external_def
:
10094 dump_printf (MSG_NOTE
, "external\n");
10096 case vect_internal_def
:
10097 dump_printf (MSG_NOTE
, "internal\n");
10099 case vect_induction_def
:
10100 dump_printf (MSG_NOTE
, "induction\n");
10102 case vect_reduction_def
:
10103 dump_printf (MSG_NOTE
, "reduction\n");
10105 case vect_double_reduction_def
:
10106 dump_printf (MSG_NOTE
, "double reduction\n");
10108 case vect_nested_cycle
:
10109 dump_printf (MSG_NOTE
, "nested cycle\n");
10111 case vect_unknown_def_type
:
10112 dump_printf (MSG_NOTE
, "unknown\n");
10117 if (*dt
== vect_unknown_def_type
)
10119 if (dump_enabled_p ())
10120 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10121 "Unsupported pattern.\n");
10128 /* Function vect_is_simple_use.
10130 Same as vect_is_simple_use but also determines the vector operand
10131 type of OPERAND and stores it to *VECTYPE. If the definition of
10132 OPERAND is vect_uninitialized_def, vect_constant_def or
10133 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10134 is responsible to compute the best suited vector type for the
10138 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
10139 tree
*vectype
, gimple
**def_stmt_out
)
10142 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt
))
10146 *def_stmt_out
= def_stmt
;
10148 /* Now get a vector type if the def is internal, otherwise supply
10149 NULL_TREE and leave it up to the caller to figure out a proper
10150 type for the use stmt. */
10151 if (*dt
== vect_internal_def
10152 || *dt
== vect_induction_def
10153 || *dt
== vect_reduction_def
10154 || *dt
== vect_double_reduction_def
10155 || *dt
== vect_nested_cycle
)
10157 stmt_vec_info stmt_info
= vinfo_for_stmt (def_stmt
);
10158 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10159 gcc_assert (*vectype
!= NULL_TREE
);
10160 if (dump_enabled_p ())
10162 dump_printf_loc (MSG_NOTE
, vect_location
,
10163 "vect_is_simple_use: vectype ");
10164 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, *vectype
);
10165 dump_printf (MSG_NOTE
, "\n");
10168 else if (*dt
== vect_uninitialized_def
10169 || *dt
== vect_constant_def
10170 || *dt
== vect_external_def
)
10171 *vectype
= NULL_TREE
;
10173 gcc_unreachable ();
10179 /* Function supportable_widening_operation
10181 Check whether an operation represented by the code CODE is a
10182 widening operation that is supported by the target platform in
10183 vector form (i.e., when operating on arguments of type VECTYPE_IN
10184 producing a result of type VECTYPE_OUT).
10186 Widening operations we currently support are NOP (CONVERT), FLOAT,
10187 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10188 are supported by the target platform either directly (via vector
10189 tree-codes), or via target builtins.
10192 - CODE1 and CODE2 are codes of vector operations to be used when
10193 vectorizing the operation, if available.
10194 - MULTI_STEP_CVT determines the number of required intermediate steps in
10195 case of multi-step conversion (like char->short->int - in that case
10196 MULTI_STEP_CVT will be 1).
10197 - INTERM_TYPES contains the intermediate type required to perform the
10198 widening operation (short in the above example). */
10201 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
10202 tree vectype_out
, tree vectype_in
,
10203 enum tree_code
*code1
, enum tree_code
*code2
,
10204 int *multi_step_cvt
,
10205 vec
<tree
> *interm_types
)
10207 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
10208 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
10209 struct loop
*vect_loop
= NULL
;
10210 machine_mode vec_mode
;
10211 enum insn_code icode1
, icode2
;
10212 optab optab1
, optab2
;
10213 tree vectype
= vectype_in
;
10214 tree wide_vectype
= vectype_out
;
10215 enum tree_code c1
, c2
;
10217 tree prev_type
, intermediate_type
;
10218 machine_mode intermediate_mode
, prev_mode
;
10219 optab optab3
, optab4
;
10221 *multi_step_cvt
= 0;
10223 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
10227 case WIDEN_MULT_EXPR
:
10228 /* The result of a vectorized widening operation usually requires
10229 two vectors (because the widened results do not fit into one vector).
10230 The generated vector results would normally be expected to be
10231 generated in the same order as in the original scalar computation,
10232 i.e. if 8 results are generated in each vector iteration, they are
10233 to be organized as follows:
10234 vect1: [res1,res2,res3,res4],
10235 vect2: [res5,res6,res7,res8].
10237 However, in the special case that the result of the widening
10238 operation is used in a reduction computation only, the order doesn't
10239 matter (because when vectorizing a reduction we change the order of
10240 the computation). Some targets can take advantage of this and
10241 generate more efficient code. For example, targets like Altivec,
10242 that support widen_mult using a sequence of {mult_even,mult_odd}
10243 generate the following vectors:
10244 vect1: [res1,res3,res5,res7],
10245 vect2: [res2,res4,res6,res8].
10247 When vectorizing outer-loops, we execute the inner-loop sequentially
10248 (each vectorized inner-loop iteration contributes to VF outer-loop
10249 iterations in parallel). We therefore don't allow to change the
10250 order of the computation in the inner-loop during outer-loop
10252 /* TODO: Another case in which order doesn't *really* matter is when we
10253 widen and then contract again, e.g. (short)((int)x * y >> 8).
10254 Normally, pack_trunc performs an even/odd permute, whereas the
10255 repack from an even/odd expansion would be an interleave, which
10256 would be significantly simpler for e.g. AVX2. */
10257 /* In any case, in order to avoid duplicating the code below, recurse
10258 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10259 are properly set up for the caller. If we fail, we'll continue with
10260 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10262 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
10263 && !nested_in_vect_loop_p (vect_loop
, stmt
)
10264 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
10265 stmt
, vectype_out
, vectype_in
,
10266 code1
, code2
, multi_step_cvt
,
10269 /* Elements in a vector with vect_used_by_reduction property cannot
10270 be reordered if the use chain with this property does not have the
10271 same operation. One such an example is s += a * b, where elements
10272 in a and b cannot be reordered. Here we check if the vector defined
10273 by STMT is only directly used in the reduction statement. */
10274 tree lhs
= gimple_assign_lhs (stmt
);
10275 use_operand_p dummy
;
10277 stmt_vec_info use_stmt_info
= NULL
;
10278 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
10279 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
10280 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
10283 c1
= VEC_WIDEN_MULT_LO_EXPR
;
10284 c2
= VEC_WIDEN_MULT_HI_EXPR
;
10287 case DOT_PROD_EXPR
:
10288 c1
= DOT_PROD_EXPR
;
10289 c2
= DOT_PROD_EXPR
;
10297 case VEC_WIDEN_MULT_EVEN_EXPR
:
10298 /* Support the recursion induced just above. */
10299 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
10300 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
10303 case WIDEN_LSHIFT_EXPR
:
10304 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
10305 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
10309 c1
= VEC_UNPACK_LO_EXPR
;
10310 c2
= VEC_UNPACK_HI_EXPR
;
10314 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
10315 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
10318 case FIX_TRUNC_EXPR
:
10319 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
10320 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
10324 gcc_unreachable ();
10327 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
10328 std::swap (c1
, c2
);
10330 if (code
== FIX_TRUNC_EXPR
)
10332 /* The signedness is determined from output operand. */
10333 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
10334 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
10338 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
10339 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
10342 if (!optab1
|| !optab2
)
10345 vec_mode
= TYPE_MODE (vectype
);
10346 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
10347 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
10353 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
10354 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
10355 /* For scalar masks we may have different boolean
10356 vector types having the same QImode. Thus we
10357 add additional check for elements number. */
10358 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10359 || known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
10360 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2));
10362 /* Check if it's a multi-step conversion that can be done using intermediate
10365 prev_type
= vectype
;
10366 prev_mode
= vec_mode
;
10368 if (!CONVERT_EXPR_CODE_P (code
))
10371 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10372 intermediate steps in promotion sequence. We try
10373 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10375 interm_types
->create (MAX_INTERM_CVT_STEPS
);
10376 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
10378 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
10379 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
10381 intermediate_type
= vect_halve_mask_nunits (prev_type
);
10382 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
10387 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
10388 TYPE_UNSIGNED (prev_type
));
10390 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
10391 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
10393 if (!optab3
|| !optab4
10394 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
10395 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
10396 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
10397 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
10398 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
10399 == CODE_FOR_nothing
)
10400 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
10401 == CODE_FOR_nothing
))
10404 interm_types
->quick_push (intermediate_type
);
10405 (*multi_step_cvt
)++;
10407 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
10408 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
10409 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10410 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
10411 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2));
10413 prev_type
= intermediate_type
;
10414 prev_mode
= intermediate_mode
;
10417 interm_types
->release ();
10422 /* Function supportable_narrowing_operation
10424 Check whether an operation represented by the code CODE is a
10425 narrowing operation that is supported by the target platform in
10426 vector form (i.e., when operating on arguments of type VECTYPE_IN
10427 and producing a result of type VECTYPE_OUT).
10429 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10430 and FLOAT. This function checks if these operations are supported by
10431 the target platform directly via vector tree-codes.
10434 - CODE1 is the code of a vector operation to be used when
10435 vectorizing the operation, if available.
10436 - MULTI_STEP_CVT determines the number of required intermediate steps in
10437 case of multi-step conversion (like int->short->char - in that case
10438 MULTI_STEP_CVT will be 1).
10439 - INTERM_TYPES contains the intermediate type required to perform the
10440 narrowing operation (short in the above example). */
10443 supportable_narrowing_operation (enum tree_code code
,
10444 tree vectype_out
, tree vectype_in
,
10445 enum tree_code
*code1
, int *multi_step_cvt
,
10446 vec
<tree
> *interm_types
)
10448 machine_mode vec_mode
;
10449 enum insn_code icode1
;
10450 optab optab1
, interm_optab
;
10451 tree vectype
= vectype_in
;
10452 tree narrow_vectype
= vectype_out
;
10454 tree intermediate_type
, prev_type
;
10455 machine_mode intermediate_mode
, prev_mode
;
10459 *multi_step_cvt
= 0;
10463 c1
= VEC_PACK_TRUNC_EXPR
;
10466 case FIX_TRUNC_EXPR
:
10467 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
10471 c1
= VEC_PACK_FLOAT_EXPR
;
10475 gcc_unreachable ();
10478 if (code
== FIX_TRUNC_EXPR
)
10479 /* The signedness is determined from output operand. */
10480 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
10482 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
10487 vec_mode
= TYPE_MODE (vectype
);
10488 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
10493 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
10494 /* For scalar masks we may have different boolean
10495 vector types having the same QImode. Thus we
10496 add additional check for elements number. */
10497 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10498 || known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
10499 TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
10501 if (code
== FLOAT_EXPR
)
10504 /* Check if it's a multi-step conversion that can be done using intermediate
10506 prev_mode
= vec_mode
;
10507 prev_type
= vectype
;
10508 if (code
== FIX_TRUNC_EXPR
)
10509 uns
= TYPE_UNSIGNED (vectype_out
);
10511 uns
= TYPE_UNSIGNED (vectype
);
10513 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10514 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10515 costly than signed. */
10516 if (code
== FIX_TRUNC_EXPR
&& uns
)
10518 enum insn_code icode2
;
10521 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
10523 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
10524 if (interm_optab
!= unknown_optab
10525 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
10526 && insn_data
[icode1
].operand
[0].mode
10527 == insn_data
[icode2
].operand
[0].mode
)
10530 optab1
= interm_optab
;
10535 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10536 intermediate steps in promotion sequence. We try
10537 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10538 interm_types
->create (MAX_INTERM_CVT_STEPS
);
10539 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
10541 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
10542 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
10544 intermediate_type
= vect_double_mask_nunits (prev_type
);
10545 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
10550 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
10552 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
10555 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
10556 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
10557 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
10558 == CODE_FOR_nothing
))
10561 interm_types
->quick_push (intermediate_type
);
10562 (*multi_step_cvt
)++;
10564 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
10565 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10566 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
10567 TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
10569 prev_mode
= intermediate_mode
;
10570 prev_type
= intermediate_type
;
10571 optab1
= interm_optab
;
10574 interm_types
->release ();
10578 /* Generate and return a statement that sets vector mask MASK such that
10579 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10582 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
10584 tree cmp_type
= TREE_TYPE (start_index
);
10585 tree mask_type
= TREE_TYPE (mask
);
10586 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
10587 cmp_type
, mask_type
,
10588 OPTIMIZE_FOR_SPEED
));
10589 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
10590 start_index
, end_index
,
10591 build_zero_cst (mask_type
));
10592 gimple_call_set_lhs (call
, mask
);
10596 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10597 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10600 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
10603 tree tmp
= make_ssa_name (mask_type
);
10604 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
10605 gimple_seq_add_stmt (seq
, call
);
10606 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
10609 /* Try to compute the vector types required to vectorize STMT_INFO,
10610 returning true on success and false if vectorization isn't possible.
10614 - Set *STMT_VECTYPE_OUT to:
10615 - NULL_TREE if the statement doesn't need to be vectorized;
10616 - boolean_type_node if the statement is a boolean operation whose
10617 vector type can only be determined once all the other vector types
10619 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10621 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10622 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10623 statement does not help to determine the overall number of units. */
10626 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info
,
10627 tree
*stmt_vectype_out
,
10628 tree
*nunits_vectype_out
)
10630 gimple
*stmt
= stmt_info
->stmt
;
10632 *stmt_vectype_out
= NULL_TREE
;
10633 *nunits_vectype_out
= NULL_TREE
;
10635 if (gimple_get_lhs (stmt
) == NULL_TREE
10636 /* MASK_STORE has no lhs, but is ok. */
10637 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
10639 if (is_a
<gcall
*> (stmt
))
10641 /* Ignore calls with no lhs. These must be calls to
10642 #pragma omp simd functions, and what vectorization factor
10643 it really needs can't be determined until
10644 vectorizable_simd_clone_call. */
10645 if (dump_enabled_p ())
10646 dump_printf_loc (MSG_NOTE
, vect_location
,
10647 "defer to SIMD clone analysis.\n");
10651 if (dump_enabled_p ())
10653 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10654 "not vectorized: irregular stmt.");
10655 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
10660 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
10662 if (dump_enabled_p ())
10664 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10665 "not vectorized: vector stmt in loop:");
10666 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
10672 tree scalar_type
= NULL_TREE
;
10673 if (STMT_VINFO_VECTYPE (stmt_info
))
10674 *stmt_vectype_out
= vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10677 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info
));
10678 if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
10679 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
10681 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
10683 /* Pure bool ops don't participate in number-of-units computation.
10684 For comparisons use the types being compared. */
10685 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
)
10686 && is_gimple_assign (stmt
)
10687 && gimple_assign_rhs_code (stmt
) != COND_EXPR
)
10689 *stmt_vectype_out
= boolean_type_node
;
10691 tree rhs1
= gimple_assign_rhs1 (stmt
);
10692 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
10693 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10694 scalar_type
= TREE_TYPE (rhs1
);
10697 if (dump_enabled_p ())
10698 dump_printf_loc (MSG_NOTE
, vect_location
,
10699 "pure bool operation.\n");
10704 if (dump_enabled_p ())
10706 dump_printf_loc (MSG_NOTE
, vect_location
,
10707 "get vectype for scalar type: ");
10708 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
10709 dump_printf (MSG_NOTE
, "\n");
10711 vectype
= get_vectype_for_scalar_type (scalar_type
);
10714 if (dump_enabled_p ())
10716 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10717 "not vectorized: unsupported data-type ");
10718 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
10720 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
10725 if (!*stmt_vectype_out
)
10726 *stmt_vectype_out
= vectype
;
10728 if (dump_enabled_p ())
10730 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
10731 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
10732 dump_printf (MSG_NOTE
, "\n");
10736 /* Don't try to compute scalar types if the stmt produces a boolean
10737 vector; use the existing vector type instead. */
10738 tree nunits_vectype
;
10739 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10740 nunits_vectype
= vectype
;
10743 /* The number of units is set according to the smallest scalar
10744 type (or the largest vector size, but we only support one
10745 vector size per vectorization). */
10746 if (*stmt_vectype_out
!= boolean_type_node
)
10748 HOST_WIDE_INT dummy
;
10749 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
10751 if (dump_enabled_p ())
10753 dump_printf_loc (MSG_NOTE
, vect_location
,
10754 "get vectype for scalar type: ");
10755 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
10756 dump_printf (MSG_NOTE
, "\n");
10758 nunits_vectype
= get_vectype_for_scalar_type (scalar_type
);
10760 if (!nunits_vectype
)
10762 if (dump_enabled_p ())
10764 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10765 "not vectorized: unsupported data-type ");
10766 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
10767 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
10772 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
10773 GET_MODE_SIZE (TYPE_MODE (nunits_vectype
))))
10775 if (dump_enabled_p ())
10777 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10778 "not vectorized: different sized vector "
10779 "types in statement, ");
10780 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, vectype
);
10781 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
10782 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, nunits_vectype
);
10783 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
10788 if (dump_enabled_p ())
10790 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
10791 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, nunits_vectype
);
10792 dump_printf (MSG_NOTE
, "\n");
10794 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
10795 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
10796 dump_printf (MSG_NOTE
, "\n");
10799 *nunits_vectype_out
= nunits_vectype
;
10803 /* Try to determine the correct vector type for STMT_INFO, which is a
10804 statement that produces a scalar boolean result. Return the vector
10805 type on success, otherwise return NULL_TREE. */
10808 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info
)
10810 gimple
*stmt
= stmt_info
->stmt
;
10811 tree mask_type
= NULL
;
10812 tree vectype
, scalar_type
;
10814 if (is_gimple_assign (stmt
)
10815 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
10816 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt
))))
10818 scalar_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
10819 mask_type
= get_mask_type_for_scalar_type (scalar_type
);
10823 if (dump_enabled_p ())
10824 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10825 "not vectorized: unsupported mask\n");
10833 enum vect_def_type dt
;
10835 FOR_EACH_SSA_TREE_OPERAND (rhs
, stmt
, iter
, SSA_OP_USE
)
10837 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &dt
, &vectype
))
10839 if (dump_enabled_p ())
10841 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10842 "not vectorized: can't compute mask type "
10843 "for statement, ");
10844 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
,
10850 /* No vectype probably means external definition.
10851 Allow it in case there is another operand which
10852 allows to determine mask type. */
10857 mask_type
= vectype
;
10858 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type
),
10859 TYPE_VECTOR_SUBPARTS (vectype
)))
10861 if (dump_enabled_p ())
10863 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10864 "not vectorized: different sized masks "
10865 "types in statement, ");
10866 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
10868 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
10869 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
10871 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
10875 else if (VECTOR_BOOLEAN_TYPE_P (mask_type
)
10876 != VECTOR_BOOLEAN_TYPE_P (vectype
))
10878 if (dump_enabled_p ())
10880 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10881 "not vectorized: mixed mask and "
10882 "nonmask vector types in statement, ");
10883 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
10885 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
10886 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
10888 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
10894 /* We may compare boolean value loaded as vector of integers.
10895 Fix mask_type in such case. */
10897 && !VECTOR_BOOLEAN_TYPE_P (mask_type
)
10898 && gimple_code (stmt
) == GIMPLE_ASSIGN
10899 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
)
10900 mask_type
= build_same_sized_truth_vector_type (mask_type
);
10903 /* No mask_type should mean loop invariant predicate.
10904 This is probably a subject for optimization in if-conversion. */
10905 if (!mask_type
&& dump_enabled_p ())
10907 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10908 "not vectorized: can't compute mask type "
10909 "for statement, ");
10910 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);