1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
62 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
64 return STMT_VINFO_VECTYPE (stmt_info
);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
70 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
72 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
73 basic_block bb
= gimple_bb (stmt
);
74 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
80 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
82 return (bb
->loop_father
== loop
->inner
);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
90 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
91 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
92 int misalign
, enum vect_cost_model_location where
)
94 if ((kind
== vector_load
|| kind
== unaligned_load
)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
96 kind
= vector_gather_load
;
97 if ((kind
== vector_store
|| kind
== unaligned_store
)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
99 kind
= vector_scatter_store
;
101 stmt_info_for_cost si
= { count
, kind
, where
,
102 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
104 body_cost_vec
->safe_push (si
);
106 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
108 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
111 /* Return a variable of type ELEM_TYPE[NELEMS]. */
114 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
116 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
120 /* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT and the vector is associated
123 with scalar destination SCALAR_DEST. */
126 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
127 tree array
, unsigned HOST_WIDE_INT n
)
129 tree vect_type
, vect
, vect_name
, array_ref
;
132 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
133 vect_type
= TREE_TYPE (TREE_TYPE (array
));
134 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
135 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
136 build_int_cst (size_type_node
, n
),
137 NULL_TREE
, NULL_TREE
);
139 new_stmt
= gimple_build_assign (vect
, array_ref
);
140 vect_name
= make_ssa_name (vect
, new_stmt
);
141 gimple_assign_set_lhs (new_stmt
, vect_name
);
142 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
147 /* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT. */
152 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
153 tree array
, unsigned HOST_WIDE_INT n
)
158 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
159 build_int_cst (size_type_node
, n
),
160 NULL_TREE
, NULL_TREE
);
162 new_stmt
= gimple_build_assign (array_ref
, vect
);
163 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
166 /* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
171 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
175 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
176 /* Arrays have the same alignment as their type. */
177 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
181 /* Add a clobber of variable VAR to the vectorization of STMT.
182 Emit the clobber before *GSI. */
185 vect_clobber_variable (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree var
)
187 tree clobber
= build_clobber (TREE_TYPE (var
));
188 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
189 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
192 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
194 /* Function vect_mark_relevant.
196 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
199 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
200 enum vect_relevant relevant
, bool live_p
)
202 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
203 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
204 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
205 gimple
*pattern_stmt
;
207 if (dump_enabled_p ())
209 dump_printf_loc (MSG_NOTE
, vect_location
,
210 "mark relevant %d, live %d: ", relevant
, live_p
);
211 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
227 if (dump_enabled_p ())
228 dump_printf_loc (MSG_NOTE
, vect_location
,
229 "last stmt in pattern. don't mark"
230 " relevant/live.\n");
231 stmt_info
= vinfo_for_stmt (pattern_stmt
);
232 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
233 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
234 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
238 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
239 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
240 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
242 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
243 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
245 if (dump_enabled_p ())
246 dump_printf_loc (MSG_NOTE
, vect_location
,
247 "already marked relevant/live.\n");
251 worklist
->safe_push (stmt
);
255 /* Function is_simple_and_all_uses_invariant
257 Return true if STMT is simple and all uses of it are invariant. */
260 is_simple_and_all_uses_invariant (gimple
*stmt
, loop_vec_info loop_vinfo
)
265 if (!is_gimple_assign (stmt
))
268 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
270 enum vect_def_type dt
= vect_uninitialized_def
;
272 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
276 "use not simple.\n");
280 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
299 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
300 enum vect_relevant
*relevant
, bool *live_p
)
302 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
304 imm_use_iterator imm_iter
;
308 *relevant
= vect_unused_in_scope
;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt
)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
314 != loop_exit_ctrl_vec_info_type
)
315 *relevant
= vect_used_in_scope
;
317 /* changing memory. */
318 if (gimple_code (stmt
) != GIMPLE_PHI
)
319 if (gimple_vdef (stmt
)
320 && !gimple_clobber_p (stmt
))
322 if (dump_enabled_p ())
323 dump_printf_loc (MSG_NOTE
, vect_location
,
324 "vec_stmt_relevant_p: stmt has vdefs.\n");
325 *relevant
= vect_used_in_scope
;
328 /* uses outside the loop. */
329 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
331 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
333 basic_block bb
= gimple_bb (USE_STMT (use_p
));
334 if (!flow_bb_inside_loop_p (loop
, bb
))
336 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE
, vect_location
,
338 "vec_stmt_relevant_p: used out of loop.\n");
340 if (is_gimple_debug (USE_STMT (use_p
)))
343 /* We expect all such uses to be in the loop exit phis
344 (because of loop closed form) */
345 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
346 gcc_assert (bb
== single_exit (loop
)->dest
);
353 if (*live_p
&& *relevant
== vect_unused_in_scope
354 && !is_simple_and_all_uses_invariant (stmt
, loop_vinfo
))
356 if (dump_enabled_p ())
357 dump_printf_loc (MSG_NOTE
, vect_location
,
358 "vec_stmt_relevant_p: stmt live but not relevant.\n");
359 *relevant
= vect_used_only_live
;
362 return (*live_p
|| *relevant
);
366 /* Function exist_non_indexing_operands_for_use_p
368 USE is one of the uses attached to STMT. Check if USE is
369 used in STMT for anything other than indexing an array. */
372 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
375 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
377 /* USE corresponds to some operand in STMT. If there is no data
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info
))
383 /* STMT has a data_ref. FORNOW this means that its of one of
387 (This should have been verified in analyze_data_refs).
389 'var' in the second case corresponds to a def, not a use,
390 so USE cannot correspond to any operands that are not used
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
396 if (!gimple_assign_copy_p (stmt
))
398 if (is_gimple_call (stmt
)
399 && gimple_call_internal_p (stmt
))
401 internal_fn ifn
= gimple_call_internal_fn (stmt
);
402 int mask_index
= internal_fn_mask_index (ifn
);
404 && use
== gimple_call_arg (stmt
, mask_index
))
406 int stored_value_index
= internal_fn_stored_value_index (ifn
);
407 if (stored_value_index
>= 0
408 && use
== gimple_call_arg (stmt
, stored_value_index
))
410 if (internal_gather_scatter_fn_p (ifn
)
411 && use
== gimple_call_arg (stmt
, 1))
417 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
419 operand
= gimple_assign_rhs1 (stmt
);
420 if (TREE_CODE (operand
) != SSA_NAME
)
431 Function process_use.
434 - a USE in STMT in a loop represented by LOOP_VINFO
435 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
436 that defined USE. This is done by calling mark_relevant and passing it
437 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
438 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
442 Generally, LIVE_P and RELEVANT are used to define the liveness and
443 relevance info of the DEF_STMT of this USE:
444 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
445 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
447 - case 1: If USE is used only for address computations (e.g. array indexing),
448 which does not need to be directly vectorized, then the liveness/relevance
449 of the respective DEF_STMT is left unchanged.
450 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
451 skip DEF_STMT cause it had already been processed.
452 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
453 be modified accordingly.
455 Return true if everything is as expected. Return false otherwise. */
458 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
,
459 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
462 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
463 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
464 stmt_vec_info dstmt_vinfo
;
465 basic_block bb
, def_bb
;
467 enum vect_def_type dt
;
469 /* case 1: we are only interested in uses that need to be vectorized. Uses
470 that are used for address computation are not considered relevant. */
471 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
474 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &def_stmt
))
476 if (dump_enabled_p ())
477 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
478 "not vectorized: unsupported use in stmt.\n");
482 if (!def_stmt
|| gimple_nop_p (def_stmt
))
485 def_bb
= gimple_bb (def_stmt
);
486 if (!flow_bb_inside_loop_p (loop
, def_bb
))
488 if (dump_enabled_p ())
489 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
493 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
494 DEF_STMT must have already been processed, because this should be the
495 only way that STMT, which is a reduction-phi, was put in the worklist,
496 as there should be no other uses for DEF_STMT in the loop. So we just
497 check that everything is as expected, and we are done. */
498 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
499 bb
= gimple_bb (stmt
);
500 if (gimple_code (stmt
) == GIMPLE_PHI
501 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
502 && gimple_code (def_stmt
) != GIMPLE_PHI
503 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
504 && bb
->loop_father
== def_bb
->loop_father
)
506 if (dump_enabled_p ())
507 dump_printf_loc (MSG_NOTE
, vect_location
,
508 "reduc-stmt defining reduc-phi in the same nest.\n");
509 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
510 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
511 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
515 /* case 3a: outer-loop stmt defining an inner-loop stmt:
516 outer-loop-header-bb:
522 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
524 if (dump_enabled_p ())
525 dump_printf_loc (MSG_NOTE
, vect_location
,
526 "outer-loop def-stmt defining inner-loop stmt.\n");
530 case vect_unused_in_scope
:
531 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
532 vect_used_in_scope
: vect_unused_in_scope
;
535 case vect_used_in_outer_by_reduction
:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
537 relevant
= vect_used_by_reduction
;
540 case vect_used_in_outer
:
541 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
542 relevant
= vect_used_in_scope
;
545 case vect_used_in_scope
:
553 /* case 3b: inner-loop stmt defining an outer-loop stmt:
554 outer-loop-header-bb:
558 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
560 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
562 if (dump_enabled_p ())
563 dump_printf_loc (MSG_NOTE
, vect_location
,
564 "inner-loop def-stmt defining outer-loop stmt.\n");
568 case vect_unused_in_scope
:
569 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
570 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
571 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
574 case vect_used_by_reduction
:
575 case vect_used_only_live
:
576 relevant
= vect_used_in_outer_by_reduction
;
579 case vect_used_in_scope
:
580 relevant
= vect_used_in_outer
;
587 /* We are also not interested in uses on loop PHI backedges that are
588 inductions. Otherwise we'll needlessly vectorize the IV increment
589 and cause hybrid SLP for SLP inductions. Unless the PHI is live
591 else if (gimple_code (stmt
) == GIMPLE_PHI
592 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
593 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
594 && (PHI_ARG_DEF_FROM_EDGE (stmt
, loop_latch_edge (bb
->loop_father
))
597 if (dump_enabled_p ())
598 dump_printf_loc (MSG_NOTE
, vect_location
,
599 "induction value on backedge.\n");
604 vect_mark_relevant (worklist
, def_stmt
, relevant
, false);
609 /* Function vect_mark_stmts_to_be_vectorized.
611 Not all stmts in the loop need to be vectorized. For example:
620 Stmt 1 and 3 do not need to be vectorized, because loop control and
621 addressing of vectorized data-refs are handled differently.
623 This pass detects such stmts. */
626 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
628 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
629 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
630 unsigned int nbbs
= loop
->num_nodes
;
631 gimple_stmt_iterator si
;
634 stmt_vec_info stmt_vinfo
;
638 enum vect_relevant relevant
;
640 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
642 auto_vec
<gimple
*, 64> worklist
;
644 /* 1. Init worklist. */
645 for (i
= 0; i
< nbbs
; i
++)
648 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
651 if (dump_enabled_p ())
653 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
654 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
657 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
658 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
);
660 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
662 stmt
= gsi_stmt (si
);
663 if (dump_enabled_p ())
665 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
666 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
669 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
670 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
);
674 /* 2. Process_worklist */
675 while (worklist
.length () > 0)
680 stmt
= worklist
.pop ();
681 if (dump_enabled_p ())
683 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
684 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
687 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
688 (DEF_STMT) as relevant/irrelevant according to the relevance property
690 stmt_vinfo
= vinfo_for_stmt (stmt
);
691 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
693 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
694 propagated as is to the DEF_STMTs of its USEs.
696 One exception is when STMT has been identified as defining a reduction
697 variable; in this case we set the relevance to vect_used_by_reduction.
698 This is because we distinguish between two kinds of relevant stmts -
699 those that are used by a reduction computation, and those that are
700 (also) used by a regular computation. This allows us later on to
701 identify stmts that are used solely by a reduction, and therefore the
702 order of the results that they produce does not have to be kept. */
704 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
706 case vect_reduction_def
:
707 gcc_assert (relevant
!= vect_unused_in_scope
);
708 if (relevant
!= vect_unused_in_scope
709 && relevant
!= vect_used_in_scope
710 && relevant
!= vect_used_by_reduction
711 && relevant
!= vect_used_only_live
)
713 if (dump_enabled_p ())
714 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
715 "unsupported use of reduction.\n");
720 case vect_nested_cycle
:
721 if (relevant
!= vect_unused_in_scope
722 && relevant
!= vect_used_in_outer_by_reduction
723 && relevant
!= vect_used_in_outer
)
725 if (dump_enabled_p ())
726 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
727 "unsupported use of nested cycle.\n");
733 case vect_double_reduction_def
:
734 if (relevant
!= vect_unused_in_scope
735 && relevant
!= vect_used_by_reduction
736 && relevant
!= vect_used_only_live
)
738 if (dump_enabled_p ())
739 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
740 "unsupported use of double reduction.\n");
750 if (is_pattern_stmt_p (stmt_vinfo
))
752 /* Pattern statements are not inserted into the code, so
753 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
754 have to scan the RHS or function arguments instead. */
755 if (is_gimple_assign (stmt
))
757 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
758 tree op
= gimple_assign_rhs1 (stmt
);
761 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
763 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
764 relevant
, &worklist
, false)
765 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
766 relevant
, &worklist
, false))
770 for (; i
< gimple_num_ops (stmt
); i
++)
772 op
= gimple_op (stmt
, i
);
773 if (TREE_CODE (op
) == SSA_NAME
774 && !process_use (stmt
, op
, loop_vinfo
, relevant
,
779 else if (is_gimple_call (stmt
))
781 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
783 tree arg
= gimple_call_arg (stmt
, i
);
784 if (!process_use (stmt
, arg
, loop_vinfo
, relevant
,
791 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
793 tree op
= USE_FROM_PTR (use_p
);
794 if (!process_use (stmt
, op
, loop_vinfo
, relevant
,
799 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
801 gather_scatter_info gs_info
;
802 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, &gs_info
))
804 if (!process_use (stmt
, gs_info
.offset
, loop_vinfo
, relevant
,
808 } /* while worklist */
813 /* Compute the prologue cost for invariant or constant operands. */
816 vect_prologue_cost_for_slp_op (slp_tree node
, stmt_vec_info stmt_info
,
817 unsigned opno
, enum vect_def_type dt
,
818 stmt_vector_for_cost
*cost_vec
)
820 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
821 tree op
= gimple_op (stmt
, opno
);
822 unsigned prologue_cost
= 0;
824 /* Without looking at the actual initializer a vector of
825 constants can be implemented as load from the constant pool.
826 When all elements are the same we can use a splat. */
827 tree vectype
= get_vectype_for_scalar_type (TREE_TYPE (op
));
828 unsigned group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
829 unsigned num_vects_to_check
;
830 unsigned HOST_WIDE_INT const_nunits
;
832 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
833 && ! multiple_p (const_nunits
, group_size
))
835 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
836 nelt_limit
= const_nunits
;
840 /* If either the vector has variable length or the vectors
841 are composed of repeated whole groups we only need to
842 cost construction once. All vectors will be the same. */
843 num_vects_to_check
= 1;
844 nelt_limit
= group_size
;
846 tree elt
= NULL_TREE
;
848 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
850 unsigned si
= j
% group_size
;
852 elt
= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
], opno
);
853 /* ??? We're just tracking whether all operands of a single
854 vector initializer are the same, ideally we'd check if
855 we emitted the same one already. */
856 else if (elt
!= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
],
860 if (nelt
== nelt_limit
)
862 /* ??? We need to pass down stmt_info for a vector type
863 even if it points to the wrong stmt. */
864 prologue_cost
+= record_stmt_cost
866 dt
== vect_external_def
867 ? (elt
? scalar_to_vec
: vec_construct
)
869 stmt_info
, 0, vect_prologue
);
874 return prologue_cost
;
877 /* Function vect_model_simple_cost.
879 Models cost for simple operations, i.e. those that only emit ncopies of a
880 single op. Right now, this does not account for multiple insns that could
881 be generated for the single vector op. We will handle that shortly. */
884 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
885 enum vect_def_type
*dt
,
888 stmt_vector_for_cost
*cost_vec
)
890 int inside_cost
= 0, prologue_cost
= 0;
892 gcc_assert (cost_vec
!= NULL
);
894 /* ??? Somehow we need to fix this at the callers. */
896 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
900 /* Scan operands and account for prologue cost of constants/externals.
901 ??? This over-estimates cost for multiple uses and should be
903 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
904 tree lhs
= gimple_get_lhs (stmt
);
905 for (unsigned i
= 0; i
< gimple_num_ops (stmt
); ++i
)
907 tree op
= gimple_op (stmt
, i
);
908 enum vect_def_type dt
;
909 if (!op
|| op
== lhs
)
911 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &dt
)
912 && (dt
== vect_constant_def
|| dt
== vect_external_def
))
913 prologue_cost
+= vect_prologue_cost_for_slp_op (node
, stmt_info
,
918 /* Cost the "broadcast" of a scalar operand in to a vector operand.
919 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
921 for (int i
= 0; i
< ndts
; i
++)
922 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
923 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
924 stmt_info
, 0, vect_prologue
);
926 /* Adjust for two-operator SLP nodes. */
927 if (node
&& SLP_TREE_TWO_OPERATORS (node
))
930 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_perm
,
931 stmt_info
, 0, vect_body
);
934 /* Pass the inside-of-loop statements to the target-specific cost model. */
935 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vector_stmt
,
936 stmt_info
, 0, vect_body
);
938 if (dump_enabled_p ())
939 dump_printf_loc (MSG_NOTE
, vect_location
,
940 "vect_model_simple_cost: inside_cost = %d, "
941 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
945 /* Model cost for type demotion and promotion operations. PWR is normally
946 zero for single-step promotions and demotions. It will be one if
947 two-step promotion/demotion is required, and so on. Each additional
948 step doubles the number of instructions required. */
951 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
952 enum vect_def_type
*dt
, int pwr
,
953 stmt_vector_for_cost
*cost_vec
)
956 int inside_cost
= 0, prologue_cost
= 0;
958 for (i
= 0; i
< pwr
+ 1; i
++)
960 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
962 inside_cost
+= record_stmt_cost (cost_vec
, vect_pow2 (tmp
),
963 vec_promote_demote
, stmt_info
, 0,
967 /* FORNOW: Assuming maximum 2 args per stmts. */
968 for (i
= 0; i
< 2; i
++)
969 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
970 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
971 stmt_info
, 0, vect_prologue
);
973 if (dump_enabled_p ())
974 dump_printf_loc (MSG_NOTE
, vect_location
,
975 "vect_model_promotion_demotion_cost: inside_cost = %d, "
976 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
979 /* Function vect_model_store_cost
981 Models cost for stores. In the case of grouped accesses, one access
982 has the overhead of the grouped access attributed to it. */
985 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
986 enum vect_def_type dt
,
987 vect_memory_access_type memory_access_type
,
988 vec_load_store_type vls_type
, slp_tree slp_node
,
989 stmt_vector_for_cost
*cost_vec
)
991 unsigned int inside_cost
= 0, prologue_cost
= 0;
992 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
993 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
995 /* ??? Somehow we need to fix this at the callers. */
997 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
999 if (vls_type
== VLS_STORE_INVARIANT
)
1002 prologue_cost
+= vect_prologue_cost_for_slp_op (slp_node
, stmt_info
,
1005 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
1006 stmt_info
, 0, vect_prologue
);
1009 /* Grouped stores update all elements in the group at once,
1010 so we want the DR for the first statement. */
1011 if (!slp_node
&& grouped_access_p
)
1012 first_stmt
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1014 /* True if we should include any once-per-group costs as well as
1015 the cost of the statement itself. For SLP we only get called
1016 once per group anyhow. */
1017 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
1019 /* We assume that the cost of a single store-lanes instruction is
1020 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1021 access is instead being provided by a permute-and-store operation,
1022 include the cost of the permutes. */
1024 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1026 /* Uses a high and low interleave or shuffle operations for each
1028 int group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1029 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1030 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1031 stmt_info
, 0, vect_body
);
1033 if (dump_enabled_p ())
1034 dump_printf_loc (MSG_NOTE
, vect_location
,
1035 "vect_model_store_cost: strided group_size = %d .\n",
1039 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1040 /* Costs of the stores. */
1041 if (memory_access_type
== VMAT_ELEMENTWISE
1042 || memory_access_type
== VMAT_GATHER_SCATTER
)
1044 /* N scalar stores plus extracting the elements. */
1045 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1046 inside_cost
+= record_stmt_cost (cost_vec
,
1047 ncopies
* assumed_nunits
,
1048 scalar_store
, stmt_info
, 0, vect_body
);
1051 vect_get_store_cost (stmt_info
, ncopies
, &inside_cost
, cost_vec
);
1053 if (memory_access_type
== VMAT_ELEMENTWISE
1054 || memory_access_type
== VMAT_STRIDED_SLP
)
1056 /* N scalar stores plus extracting the elements. */
1057 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1058 inside_cost
+= record_stmt_cost (cost_vec
,
1059 ncopies
* assumed_nunits
,
1060 vec_to_scalar
, stmt_info
, 0, vect_body
);
1063 if (dump_enabled_p ())
1064 dump_printf_loc (MSG_NOTE
, vect_location
,
1065 "vect_model_store_cost: inside_cost = %d, "
1066 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1070 /* Calculate cost of DR's memory access. */
1072 vect_get_store_cost (stmt_vec_info stmt_info
, int ncopies
,
1073 unsigned int *inside_cost
,
1074 stmt_vector_for_cost
*body_cost_vec
)
1076 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1077 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1079 switch (alignment_support_scheme
)
1083 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1084 vector_store
, stmt_info
, 0,
1087 if (dump_enabled_p ())
1088 dump_printf_loc (MSG_NOTE
, vect_location
,
1089 "vect_model_store_cost: aligned.\n");
1093 case dr_unaligned_supported
:
1095 /* Here, we assign an additional cost for the unaligned store. */
1096 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1097 unaligned_store
, stmt_info
,
1098 DR_MISALIGNMENT (dr
), vect_body
);
1099 if (dump_enabled_p ())
1100 dump_printf_loc (MSG_NOTE
, vect_location
,
1101 "vect_model_store_cost: unaligned supported by "
1106 case dr_unaligned_unsupported
:
1108 *inside_cost
= VECT_MAX_COST
;
1110 if (dump_enabled_p ())
1111 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1112 "vect_model_store_cost: unsupported access.\n");
1122 /* Function vect_model_load_cost
1124 Models cost for loads. In the case of grouped accesses, one access has
1125 the overhead of the grouped access attributed to it. Since unaligned
1126 accesses are supported for loads, we also account for the costs of the
1127 access scheme chosen. */
1130 vect_model_load_cost (stmt_vec_info stmt_info
, unsigned ncopies
,
1131 vect_memory_access_type memory_access_type
,
1132 slp_instance instance
,
1134 stmt_vector_for_cost
*cost_vec
)
1136 gimple
*first_stmt
= STMT_VINFO_STMT (stmt_info
);
1137 unsigned int inside_cost
= 0, prologue_cost
= 0;
1138 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1140 gcc_assert (cost_vec
);
1142 /* ??? Somehow we need to fix this at the callers. */
1144 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1146 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1148 /* If the load is permuted then the alignment is determined by
1149 the first group element not by the first scalar stmt DR. */
1150 gimple
*stmt
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1151 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1152 /* Record the cost for the permutation. */
1154 unsigned assumed_nunits
1155 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info
));
1156 unsigned slp_vf
= (ncopies
* assumed_nunits
) / instance
->group_size
;
1157 vect_transform_slp_perm_load (slp_node
, vNULL
, NULL
,
1158 slp_vf
, instance
, true,
1160 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1161 stmt_info
, 0, vect_body
);
1162 /* And adjust the number of loads performed. This handles
1163 redundancies as well as loads that are later dead. */
1164 auto_sbitmap
perm (DR_GROUP_SIZE (stmt_info
));
1165 bitmap_clear (perm
);
1166 for (unsigned i
= 0;
1167 i
< SLP_TREE_LOAD_PERMUTATION (slp_node
).length (); ++i
)
1168 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (slp_node
)[i
]);
1170 bool load_seen
= false;
1171 for (unsigned i
= 0; i
< DR_GROUP_SIZE (stmt_info
); ++i
)
1173 if (i
% assumed_nunits
== 0)
1179 if (bitmap_bit_p (perm
, i
))
1185 <= (DR_GROUP_SIZE (stmt_info
) - DR_GROUP_GAP (stmt_info
)
1186 + assumed_nunits
- 1) / assumed_nunits
);
1189 /* Grouped loads read all elements in the group at once,
1190 so we want the DR for the first statement. */
1191 if (!slp_node
&& grouped_access_p
)
1192 first_stmt
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1194 /* True if we should include any once-per-group costs as well as
1195 the cost of the statement itself. For SLP we only get called
1196 once per group anyhow. */
1197 bool first_stmt_p
= (first_stmt
== STMT_VINFO_STMT (stmt_info
));
1199 /* We assume that the cost of a single load-lanes instruction is
1200 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1201 access is instead being provided by a load-and-permute operation,
1202 include the cost of the permutes. */
1204 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1206 /* Uses an even and odd extract operations or shuffle operations
1207 for each needed permute. */
1208 int group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
1209 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1210 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1211 stmt_info
, 0, vect_body
);
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_NOTE
, vect_location
,
1215 "vect_model_load_cost: strided group_size = %d .\n",
1219 /* The loads themselves. */
1220 if (memory_access_type
== VMAT_ELEMENTWISE
1221 || memory_access_type
== VMAT_GATHER_SCATTER
)
1223 /* N scalar loads plus gathering them into a vector. */
1224 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1225 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1226 inside_cost
+= record_stmt_cost (cost_vec
,
1227 ncopies
* assumed_nunits
,
1228 scalar_load
, stmt_info
, 0, vect_body
);
1231 vect_get_load_cost (stmt_info
, ncopies
, first_stmt_p
,
1232 &inside_cost
, &prologue_cost
,
1233 cost_vec
, cost_vec
, true);
1234 if (memory_access_type
== VMAT_ELEMENTWISE
1235 || memory_access_type
== VMAT_STRIDED_SLP
)
1236 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1237 stmt_info
, 0, vect_body
);
1239 if (dump_enabled_p ())
1240 dump_printf_loc (MSG_NOTE
, vect_location
,
1241 "vect_model_load_cost: inside_cost = %d, "
1242 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1246 /* Calculate cost of DR's memory access. */
1248 vect_get_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1249 bool add_realign_cost
, unsigned int *inside_cost
,
1250 unsigned int *prologue_cost
,
1251 stmt_vector_for_cost
*prologue_cost_vec
,
1252 stmt_vector_for_cost
*body_cost_vec
,
1253 bool record_prologue_costs
)
1255 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1256 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1258 switch (alignment_support_scheme
)
1262 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1263 stmt_info
, 0, vect_body
);
1265 if (dump_enabled_p ())
1266 dump_printf_loc (MSG_NOTE
, vect_location
,
1267 "vect_model_load_cost: aligned.\n");
1271 case dr_unaligned_supported
:
1273 /* Here, we assign an additional cost for the unaligned load. */
1274 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1275 unaligned_load
, stmt_info
,
1276 DR_MISALIGNMENT (dr
), vect_body
);
1278 if (dump_enabled_p ())
1279 dump_printf_loc (MSG_NOTE
, vect_location
,
1280 "vect_model_load_cost: unaligned supported by "
1285 case dr_explicit_realign
:
1287 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1288 vector_load
, stmt_info
, 0, vect_body
);
1289 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1290 vec_perm
, stmt_info
, 0, vect_body
);
1292 /* FIXME: If the misalignment remains fixed across the iterations of
1293 the containing loop, the following cost should be added to the
1295 if (targetm
.vectorize
.builtin_mask_for_load
)
1296 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1297 stmt_info
, 0, vect_body
);
1299 if (dump_enabled_p ())
1300 dump_printf_loc (MSG_NOTE
, vect_location
,
1301 "vect_model_load_cost: explicit realign\n");
1305 case dr_explicit_realign_optimized
:
1307 if (dump_enabled_p ())
1308 dump_printf_loc (MSG_NOTE
, vect_location
,
1309 "vect_model_load_cost: unaligned software "
1312 /* Unaligned software pipeline has a load of an address, an initial
1313 load, and possibly a mask operation to "prime" the loop. However,
1314 if this is an access in a group of loads, which provide grouped
1315 access, then the above cost should only be considered for one
1316 access in the group. Inside the loop, there is a load op
1317 and a realignment op. */
1319 if (add_realign_cost
&& record_prologue_costs
)
1321 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1322 vector_stmt
, stmt_info
,
1324 if (targetm
.vectorize
.builtin_mask_for_load
)
1325 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1326 vector_stmt
, stmt_info
,
1330 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1331 stmt_info
, 0, vect_body
);
1332 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1333 stmt_info
, 0, vect_body
);
1335 if (dump_enabled_p ())
1336 dump_printf_loc (MSG_NOTE
, vect_location
,
1337 "vect_model_load_cost: explicit realign optimized"
1343 case dr_unaligned_unsupported
:
1345 *inside_cost
= VECT_MAX_COST
;
1347 if (dump_enabled_p ())
1348 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1349 "vect_model_load_cost: unsupported access.\n");
1358 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1359 the loop preheader for the vectorized stmt STMT. */
1362 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1365 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1368 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1369 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1373 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1377 if (nested_in_vect_loop_p (loop
, stmt
))
1380 pe
= loop_preheader_edge (loop
);
1381 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1382 gcc_assert (!new_bb
);
1386 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1388 gimple_stmt_iterator gsi_bb_start
;
1390 gcc_assert (bb_vinfo
);
1391 bb
= BB_VINFO_BB (bb_vinfo
);
1392 gsi_bb_start
= gsi_after_labels (bb
);
1393 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1397 if (dump_enabled_p ())
1399 dump_printf_loc (MSG_NOTE
, vect_location
,
1400 "created new init_stmt: ");
1401 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1405 /* Function vect_init_vector.
1407 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1408 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1409 vector type a vector with all elements equal to VAL is created first.
1410 Place the initialization at BSI if it is not NULL. Otherwise, place the
1411 initialization at the loop preheader.
1412 Return the DEF of INIT_STMT.
1413 It will be used in the vectorization of STMT. */
1416 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1421 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1422 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1424 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1425 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1427 /* Scalar boolean value should be transformed into
1428 all zeros or all ones value before building a vector. */
1429 if (VECTOR_BOOLEAN_TYPE_P (type
))
1431 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1432 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1434 if (CONSTANT_CLASS_P (val
))
1435 val
= integer_zerop (val
) ? false_val
: true_val
;
1438 new_temp
= make_ssa_name (TREE_TYPE (type
));
1439 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1440 val
, true_val
, false_val
);
1441 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1445 else if (CONSTANT_CLASS_P (val
))
1446 val
= fold_convert (TREE_TYPE (type
), val
);
1449 new_temp
= make_ssa_name (TREE_TYPE (type
));
1450 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1451 init_stmt
= gimple_build_assign (new_temp
,
1452 fold_build1 (VIEW_CONVERT_EXPR
,
1456 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1457 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1461 val
= build_vector_from_val (type
, val
);
1464 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1465 init_stmt
= gimple_build_assign (new_temp
, val
);
1466 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1470 /* Function vect_get_vec_def_for_operand_1.
1472 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1473 DT that will be used in the vectorized stmt. */
1476 vect_get_vec_def_for_operand_1 (gimple
*def_stmt
, enum vect_def_type dt
)
1480 stmt_vec_info def_stmt_info
= NULL
;
1484 /* operand is a constant or a loop invariant. */
1485 case vect_constant_def
:
1486 case vect_external_def
:
1487 /* Code should use vect_get_vec_def_for_operand. */
1490 /* operand is defined inside the loop. */
1491 case vect_internal_def
:
1493 /* Get the def from the vectorized stmt. */
1494 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1496 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1497 /* Get vectorized pattern statement. */
1499 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1500 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1501 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1502 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1503 gcc_assert (vec_stmt
);
1504 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1505 vec_oprnd
= PHI_RESULT (vec_stmt
);
1506 else if (is_gimple_call (vec_stmt
))
1507 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1509 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1513 /* operand is defined by a loop header phi. */
1514 case vect_reduction_def
:
1515 case vect_double_reduction_def
:
1516 case vect_nested_cycle
:
1517 case vect_induction_def
:
1519 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1521 /* Get the def from the vectorized stmt. */
1522 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1523 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1524 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1525 vec_oprnd
= PHI_RESULT (vec_stmt
);
1527 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1537 /* Function vect_get_vec_def_for_operand.
1539 OP is an operand in STMT. This function returns a (vector) def that will be
1540 used in the vectorized stmt for STMT.
1542 In the case that OP is an SSA_NAME which is defined in the loop, then
1543 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1545 In case OP is an invariant or constant, a new stmt that creates a vector def
1546 needs to be introduced. VECTYPE may be used to specify a required type for
1547 vector invariant. */
1550 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1553 enum vect_def_type dt
;
1555 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1556 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1558 if (dump_enabled_p ())
1560 dump_printf_loc (MSG_NOTE
, vect_location
,
1561 "vect_get_vec_def_for_operand: ");
1562 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1563 dump_printf (MSG_NOTE
, "\n");
1566 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
, &def_stmt
);
1567 gcc_assert (is_simple_use
);
1568 if (def_stmt
&& dump_enabled_p ())
1570 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1571 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1574 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1576 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1580 vector_type
= vectype
;
1581 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1582 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1583 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1585 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1587 gcc_assert (vector_type
);
1588 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1591 return vect_get_vec_def_for_operand_1 (def_stmt
, dt
);
1595 /* Function vect_get_vec_def_for_stmt_copy
1597 Return a vector-def for an operand. This function is used when the
1598 vectorized stmt to be created (by the caller to this function) is a "copy"
1599 created in case the vectorized result cannot fit in one vector, and several
1600 copies of the vector-stmt are required. In this case the vector-def is
1601 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1602 of the stmt that defines VEC_OPRND.
1603 DT is the type of the vector def VEC_OPRND.
1606 In case the vectorization factor (VF) is bigger than the number
1607 of elements that can fit in a vectype (nunits), we have to generate
1608 more than one vector stmt to vectorize the scalar stmt. This situation
1609 arises when there are multiple data-types operated upon in the loop; the
1610 smallest data-type determines the VF, and as a result, when vectorizing
1611 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1612 vector stmt (each computing a vector of 'nunits' results, and together
1613 computing 'VF' results in each iteration). This function is called when
1614 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1615 which VF=16 and nunits=4, so the number of copies required is 4):
1617 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1619 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1620 VS1.1: vx.1 = memref1 VS1.2
1621 VS1.2: vx.2 = memref2 VS1.3
1622 VS1.3: vx.3 = memref3
1624 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1625 VSnew.1: vz1 = vx.1 + ... VSnew.2
1626 VSnew.2: vz2 = vx.2 + ... VSnew.3
1627 VSnew.3: vz3 = vx.3 + ...
1629 The vectorization of S1 is explained in vectorizable_load.
1630 The vectorization of S2:
1631 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1632 the function 'vect_get_vec_def_for_operand' is called to
1633 get the relevant vector-def for each operand of S2. For operand x it
1634 returns the vector-def 'vx.0'.
1636 To create the remaining copies of the vector-stmt (VSnew.j), this
1637 function is called to get the relevant vector-def for each operand. It is
1638 obtained from the respective VS1.j stmt, which is recorded in the
1639 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1641 For example, to obtain the vector-def 'vx.1' in order to create the
1642 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1643 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1644 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1645 and return its def ('vx.1').
1646 Overall, to create the above sequence this function will be called 3 times:
1647 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1648 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1649 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1652 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1654 gimple
*vec_stmt_for_operand
;
1655 stmt_vec_info def_stmt_info
;
1657 /* Do nothing; can reuse same def. */
1658 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1661 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1662 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1663 gcc_assert (def_stmt_info
);
1664 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1665 gcc_assert (vec_stmt_for_operand
);
1666 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1667 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1669 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1674 /* Get vectorized definitions for the operands to create a copy of an original
1675 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1678 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1679 vec
<tree
> *vec_oprnds0
,
1680 vec
<tree
> *vec_oprnds1
)
1682 tree vec_oprnd
= vec_oprnds0
->pop ();
1684 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1685 vec_oprnds0
->quick_push (vec_oprnd
);
1687 if (vec_oprnds1
&& vec_oprnds1
->length ())
1689 vec_oprnd
= vec_oprnds1
->pop ();
1690 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1691 vec_oprnds1
->quick_push (vec_oprnd
);
1696 /* Get vectorized definitions for OP0 and OP1. */
1699 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1700 vec
<tree
> *vec_oprnds0
,
1701 vec
<tree
> *vec_oprnds1
,
1706 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1707 auto_vec
<tree
> ops (nops
);
1708 auto_vec
<vec
<tree
> > vec_defs (nops
);
1710 ops
.quick_push (op0
);
1712 ops
.quick_push (op1
);
1714 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
1716 *vec_oprnds0
= vec_defs
[0];
1718 *vec_oprnds1
= vec_defs
[1];
1724 vec_oprnds0
->create (1);
1725 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1726 vec_oprnds0
->quick_push (vec_oprnd
);
1730 vec_oprnds1
->create (1);
1731 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1732 vec_oprnds1
->quick_push (vec_oprnd
);
1737 /* Helper function called by vect_finish_replace_stmt and
1738 vect_finish_stmt_generation. Set the location of the new
1739 statement and create a stmt_vec_info for it. */
1742 vect_finish_stmt_generation_1 (gimple
*stmt
, gimple
*vec_stmt
)
1744 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1745 vec_info
*vinfo
= stmt_info
->vinfo
;
1747 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1749 if (dump_enabled_p ())
1751 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1752 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1755 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1757 /* While EH edges will generally prevent vectorization, stmt might
1758 e.g. be in a must-not-throw region. Ensure newly created stmts
1759 that could throw are part of the same region. */
1760 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1761 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1762 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1765 /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1766 which sets the same scalar result as STMT did. */
1769 vect_finish_replace_stmt (gimple
*stmt
, gimple
*vec_stmt
)
1771 gcc_assert (gimple_get_lhs (stmt
) == gimple_get_lhs (vec_stmt
));
1773 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1774 gsi_replace (&gsi
, vec_stmt
, false);
1776 vect_finish_stmt_generation_1 (stmt
, vec_stmt
);
1779 /* Function vect_finish_stmt_generation.
1781 Insert a new stmt. */
1784 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1785 gimple_stmt_iterator
*gsi
)
1787 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1789 if (!gsi_end_p (*gsi
)
1790 && gimple_has_mem_ops (vec_stmt
))
1792 gimple
*at_stmt
= gsi_stmt (*gsi
);
1793 tree vuse
= gimple_vuse (at_stmt
);
1794 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1796 tree vdef
= gimple_vdef (at_stmt
);
1797 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1798 /* If we have an SSA vuse and insert a store, update virtual
1799 SSA form to avoid triggering the renamer. Do so only
1800 if we can easily see all uses - which is what almost always
1801 happens with the way vectorized stmts are inserted. */
1802 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1803 && ((is_gimple_assign (vec_stmt
)
1804 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1805 || (is_gimple_call (vec_stmt
)
1806 && !(gimple_call_flags (vec_stmt
)
1807 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1809 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1810 gimple_set_vdef (vec_stmt
, new_vdef
);
1811 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1815 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1816 vect_finish_stmt_generation_1 (stmt
, vec_stmt
);
1819 /* We want to vectorize a call to combined function CFN with function
1820 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1821 as the types of all inputs. Check whether this is possible using
1822 an internal function, returning its code if so or IFN_LAST if not. */
1825 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1826 tree vectype_out
, tree vectype_in
)
1829 if (internal_fn_p (cfn
))
1830 ifn
= as_internal_fn (cfn
);
1832 ifn
= associated_internal_fn (fndecl
);
1833 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1835 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1836 if (info
.vectorizable
)
1838 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1839 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1840 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1841 OPTIMIZE_FOR_SPEED
))
1849 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1850 gimple_stmt_iterator
*);
1852 /* Check whether a load or store statement in the loop described by
1853 LOOP_VINFO is possible in a fully-masked loop. This is testing
1854 whether the vectorizer pass has the appropriate support, as well as
1855 whether the target does.
1857 VLS_TYPE says whether the statement is a load or store and VECTYPE
1858 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1859 says how the load or store is going to be implemented and GROUP_SIZE
1860 is the number of load or store statements in the containing group.
1861 If the access is a gather load or scatter store, GS_INFO describes
1864 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1865 supported, otherwise record the required mask types. */
1868 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1869 vec_load_store_type vls_type
, int group_size
,
1870 vect_memory_access_type memory_access_type
,
1871 gather_scatter_info
*gs_info
)
1873 /* Invariant loads need no special support. */
1874 if (memory_access_type
== VMAT_INVARIANT
)
1877 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1878 machine_mode vecmode
= TYPE_MODE (vectype
);
1879 bool is_load
= (vls_type
== VLS_LOAD
);
1880 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1883 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1884 : !vect_store_lanes_supported (vectype
, group_size
, true))
1886 if (dump_enabled_p ())
1887 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1888 "can't use a fully-masked loop because the"
1889 " target doesn't have an appropriate masked"
1890 " load/store-lanes instruction.\n");
1891 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1894 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1895 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1899 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1901 internal_fn ifn
= (is_load
1902 ? IFN_MASK_GATHER_LOAD
1903 : IFN_MASK_SCATTER_STORE
);
1904 tree offset_type
= TREE_TYPE (gs_info
->offset
);
1905 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1906 gs_info
->memory_type
,
1907 TYPE_SIGN (offset_type
),
1910 if (dump_enabled_p ())
1911 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1912 "can't use a fully-masked loop because the"
1913 " target doesn't have an appropriate masked"
1914 " gather load or scatter store instruction.\n");
1915 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1918 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1919 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1923 if (memory_access_type
!= VMAT_CONTIGUOUS
1924 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1926 /* Element X of the data must come from iteration i * VF + X of the
1927 scalar loop. We need more work to support other mappings. */
1928 if (dump_enabled_p ())
1929 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1930 "can't use a fully-masked loop because an access"
1931 " isn't contiguous.\n");
1932 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1936 machine_mode mask_mode
;
1937 if (!(targetm
.vectorize
.get_mask_mode
1938 (GET_MODE_NUNITS (vecmode
),
1939 GET_MODE_SIZE (vecmode
)).exists (&mask_mode
))
1940 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1942 if (dump_enabled_p ())
1943 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1944 "can't use a fully-masked loop because the target"
1945 " doesn't have the appropriate masked load or"
1947 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1950 /* We might load more scalars than we need for permuting SLP loads.
1951 We checked in get_group_load_store_type that the extra elements
1952 don't leak into a new vector. */
1953 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1954 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1955 unsigned int nvectors
;
1956 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1957 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
);
1962 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1963 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1964 that needs to be applied to all loads and stores in a vectorized loop.
1965 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1967 MASK_TYPE is the type of both masks. If new statements are needed,
1968 insert them before GSI. */
1971 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1972 gimple_stmt_iterator
*gsi
)
1974 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1978 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1979 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1980 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1981 vec_mask
, loop_mask
);
1982 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1986 /* Determine whether we can use a gather load or scatter store to vectorize
1987 strided load or store STMT by truncating the current offset to a smaller
1988 width. We need to be able to construct an offset vector:
1990 { 0, X, X*2, X*3, ... }
1992 without loss of precision, where X is STMT's DR_STEP.
1994 Return true if this is possible, describing the gather load or scatter
1995 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1998 vect_truncate_gather_scatter_offset (gimple
*stmt
, loop_vec_info loop_vinfo
,
2000 gather_scatter_info
*gs_info
)
2002 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2003 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2004 tree step
= DR_STEP (dr
);
2005 if (TREE_CODE (step
) != INTEGER_CST
)
2007 /* ??? Perhaps we could use range information here? */
2008 if (dump_enabled_p ())
2009 dump_printf_loc (MSG_NOTE
, vect_location
,
2010 "cannot truncate variable step.\n");
2014 /* Get the number of bits in an element. */
2015 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2016 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
2017 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
2019 /* Set COUNT to the upper limit on the number of elements - 1.
2020 Start with the maximum vectorization factor. */
2021 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
2023 /* Try lowering COUNT to the number of scalar latch iterations. */
2024 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2025 widest_int max_iters
;
2026 if (max_loop_iterations (loop
, &max_iters
)
2027 && max_iters
< count
)
2028 count
= max_iters
.to_shwi ();
2030 /* Try scales of 1 and the element size. */
2031 int scales
[] = { 1, vect_get_scalar_dr_size (dr
) };
2032 wi::overflow_type overflow
= wi::OVF_NONE
;
2033 for (int i
= 0; i
< 2; ++i
)
2035 int scale
= scales
[i
];
2037 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
2040 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2041 in OFFSET_BITS bits. */
2042 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
2045 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
2046 if (wi::min_precision (range
, sign
) > element_bits
)
2048 overflow
= wi::OVF_UNKNOWN
;
2052 /* See whether the target supports the operation. */
2053 tree memory_type
= TREE_TYPE (DR_REF (dr
));
2054 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr
), masked_p
, vectype
,
2055 memory_type
, element_bits
, sign
, scale
,
2056 &gs_info
->ifn
, &gs_info
->element_type
))
2059 tree offset_type
= build_nonstandard_integer_type (element_bits
,
2062 gs_info
->decl
= NULL_TREE
;
2063 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2064 but we don't need to store that here. */
2065 gs_info
->base
= NULL_TREE
;
2066 gs_info
->offset
= fold_convert (offset_type
, step
);
2067 gs_info
->offset_dt
= vect_constant_def
;
2068 gs_info
->offset_vectype
= NULL_TREE
;
2069 gs_info
->scale
= scale
;
2070 gs_info
->memory_type
= memory_type
;
2074 if (overflow
&& dump_enabled_p ())
2075 dump_printf_loc (MSG_NOTE
, vect_location
,
2076 "truncating gather/scatter offset to %d bits"
2077 " might change its value.\n", element_bits
);
2082 /* Return true if we can use gather/scatter internal functions to
2083 vectorize STMT, which is a grouped or strided load or store.
2084 MASKED_P is true if load or store is conditional. When returning
2085 true, fill in GS_INFO with the information required to perform the
2089 vect_use_strided_gather_scatters_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
2091 gather_scatter_info
*gs_info
)
2093 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, gs_info
)
2095 return vect_truncate_gather_scatter_offset (stmt
, loop_vinfo
,
2098 scalar_mode element_mode
= SCALAR_TYPE_MODE (gs_info
->element_type
);
2099 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
2100 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2101 unsigned int offset_bits
= TYPE_PRECISION (offset_type
);
2103 /* Enforced by vect_check_gather_scatter. */
2104 gcc_assert (element_bits
>= offset_bits
);
2106 /* If the elements are wider than the offset, convert the offset to the
2107 same width, without changing its sign. */
2108 if (element_bits
> offset_bits
)
2110 bool unsigned_p
= TYPE_UNSIGNED (offset_type
);
2111 offset_type
= build_nonstandard_integer_type (element_bits
, unsigned_p
);
2112 gs_info
->offset
= fold_convert (offset_type
, gs_info
->offset
);
2115 if (dump_enabled_p ())
2116 dump_printf_loc (MSG_NOTE
, vect_location
,
2117 "using gather/scatter for strided/grouped access,"
2118 " scale = %d\n", gs_info
->scale
);
2123 /* STMT is a non-strided load or store, meaning that it accesses
2124 elements with a known constant step. Return -1 if that step
2125 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2128 compare_step_with_zero (gimple
*stmt
)
2130 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2131 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2132 return tree_int_cst_compare (vect_dr_behavior (dr
)->step
,
2136 /* If the target supports a permute mask that reverses the elements in
2137 a vector of type VECTYPE, return that mask, otherwise return null. */
2140 perm_mask_for_reverse (tree vectype
)
2142 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2144 /* The encoding has a single stepped pattern. */
2145 vec_perm_builder
sel (nunits
, 1, 3);
2146 for (int i
= 0; i
< 3; ++i
)
2147 sel
.quick_push (nunits
- 1 - i
);
2149 vec_perm_indices
indices (sel
, 1, nunits
);
2150 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
2152 return vect_gen_perm_mask_checked (vectype
, indices
);
2155 /* STMT is either a masked or unconditional store. Return the value
2159 vect_get_store_rhs (gimple
*stmt
)
2161 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt
))
2163 gcc_assert (gimple_assign_single_p (assign
));
2164 return gimple_assign_rhs1 (assign
);
2166 if (gcall
*call
= dyn_cast
<gcall
*> (stmt
))
2168 internal_fn ifn
= gimple_call_internal_fn (call
);
2169 int index
= internal_fn_stored_value_index (ifn
);
2170 gcc_assert (index
>= 0);
2171 return gimple_call_arg (stmt
, index
);
2176 /* A subroutine of get_load_store_type, with a subset of the same
2177 arguments. Handle the case where STMT is part of a grouped load
2180 For stores, the statements in the group are all consecutive
2181 and there is no gap at the end. For loads, the statements in the
2182 group might not be consecutive; there can be gaps between statements
2183 as well as at the end. */
2186 get_group_load_store_type (gimple
*stmt
, tree vectype
, bool slp
,
2187 bool masked_p
, vec_load_store_type vls_type
,
2188 vect_memory_access_type
*memory_access_type
,
2189 gather_scatter_info
*gs_info
)
2191 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2192 vec_info
*vinfo
= stmt_info
->vinfo
;
2193 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2194 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2195 gimple
*first_stmt
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2196 data_reference
*first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
2197 unsigned int group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
2198 bool single_element_p
= (stmt
== first_stmt
2199 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2200 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (vinfo_for_stmt (first_stmt
));
2201 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2203 /* True if the vectorized statements would access beyond the last
2204 statement in the group. */
2205 bool overrun_p
= false;
2207 /* True if we can cope with such overrun by peeling for gaps, so that
2208 there is at least one final scalar iteration after the vector loop. */
2209 bool can_overrun_p
= (!masked_p
2210 && vls_type
== VLS_LOAD
2214 /* There can only be a gap at the end of the group if the stride is
2215 known at compile time. */
2216 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info
) || gap
== 0);
2218 /* Stores can't yet have gaps. */
2219 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
2223 if (STMT_VINFO_STRIDED_P (stmt_info
))
2225 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2226 separated by the stride, until we have a complete vector.
2227 Fall back to scalar accesses if that isn't possible. */
2228 if (multiple_p (nunits
, group_size
))
2229 *memory_access_type
= VMAT_STRIDED_SLP
;
2231 *memory_access_type
= VMAT_ELEMENTWISE
;
2235 overrun_p
= loop_vinfo
&& gap
!= 0;
2236 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2238 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2239 "Grouped store with gaps requires"
2240 " non-consecutive accesses\n");
2243 /* An overrun is fine if the trailing elements are smaller
2244 than the alignment boundary B. Every vector access will
2245 be a multiple of B and so we are guaranteed to access a
2246 non-gap element in the same B-sized block. */
2248 && gap
< (vect_known_alignment_in_bytes (first_dr
)
2249 / vect_get_scalar_dr_size (first_dr
)))
2251 if (overrun_p
&& !can_overrun_p
)
2253 if (dump_enabled_p ())
2254 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2255 "Peeling for outer loop is not supported\n");
2258 *memory_access_type
= VMAT_CONTIGUOUS
;
2263 /* We can always handle this case using elementwise accesses,
2264 but see if something more efficient is available. */
2265 *memory_access_type
= VMAT_ELEMENTWISE
;
2267 /* If there is a gap at the end of the group then these optimizations
2268 would access excess elements in the last iteration. */
2269 bool would_overrun_p
= (gap
!= 0);
2270 /* An overrun is fine if the trailing elements are smaller than the
2271 alignment boundary B. Every vector access will be a multiple of B
2272 and so we are guaranteed to access a non-gap element in the
2273 same B-sized block. */
2276 && gap
< (vect_known_alignment_in_bytes (first_dr
)
2277 / vect_get_scalar_dr_size (first_dr
)))
2278 would_overrun_p
= false;
2280 if (!STMT_VINFO_STRIDED_P (stmt_info
)
2281 && (can_overrun_p
|| !would_overrun_p
)
2282 && compare_step_with_zero (stmt
) > 0)
2284 /* First cope with the degenerate case of a single-element
2286 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2287 *memory_access_type
= VMAT_CONTIGUOUS
;
2289 /* Otherwise try using LOAD/STORE_LANES. */
2290 if (*memory_access_type
== VMAT_ELEMENTWISE
2291 && (vls_type
== VLS_LOAD
2292 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2293 : vect_store_lanes_supported (vectype
, group_size
,
2296 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2297 overrun_p
= would_overrun_p
;
2300 /* If that fails, try using permuting loads. */
2301 if (*memory_access_type
== VMAT_ELEMENTWISE
2302 && (vls_type
== VLS_LOAD
2303 ? vect_grouped_load_supported (vectype
, single_element_p
,
2305 : vect_grouped_store_supported (vectype
, group_size
)))
2307 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2308 overrun_p
= would_overrun_p
;
2312 /* As a last resort, trying using a gather load or scatter store.
2314 ??? Although the code can handle all group sizes correctly,
2315 it probably isn't a win to use separate strided accesses based
2316 on nearby locations. Or, even if it's a win over scalar code,
2317 it might not be a win over vectorizing at a lower VF, if that
2318 allows us to use contiguous accesses. */
2319 if (*memory_access_type
== VMAT_ELEMENTWISE
2322 && vect_use_strided_gather_scatters_p (stmt
, loop_vinfo
,
2324 *memory_access_type
= VMAT_GATHER_SCATTER
;
2327 if (vls_type
!= VLS_LOAD
&& first_stmt
== stmt
)
2329 /* STMT is the leader of the group. Check the operands of all the
2330 stmts of the group. */
2331 gimple
*next_stmt
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2334 tree op
= vect_get_store_rhs (next_stmt
);
2335 enum vect_def_type dt
;
2336 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2338 if (dump_enabled_p ())
2339 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2340 "use not simple.\n");
2343 next_stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
2349 gcc_assert (can_overrun_p
);
2350 if (dump_enabled_p ())
2351 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2352 "Data access with gaps requires scalar "
2354 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2360 /* A subroutine of get_load_store_type, with a subset of the same
2361 arguments. Handle the case where STMT is a load or store that
2362 accesses consecutive elements with a negative step. */
2364 static vect_memory_access_type
2365 get_negative_load_store_type (gimple
*stmt
, tree vectype
,
2366 vec_load_store_type vls_type
,
2367 unsigned int ncopies
)
2369 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2370 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2371 dr_alignment_support alignment_support_scheme
;
2375 if (dump_enabled_p ())
2376 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2377 "multiple types with negative step.\n");
2378 return VMAT_ELEMENTWISE
;
2381 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
2382 if (alignment_support_scheme
!= dr_aligned
2383 && alignment_support_scheme
!= dr_unaligned_supported
)
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2387 "negative step but alignment required.\n");
2388 return VMAT_ELEMENTWISE
;
2391 if (vls_type
== VLS_STORE_INVARIANT
)
2393 if (dump_enabled_p ())
2394 dump_printf_loc (MSG_NOTE
, vect_location
,
2395 "negative step with invariant source;"
2396 " no permute needed.\n");
2397 return VMAT_CONTIGUOUS_DOWN
;
2400 if (!perm_mask_for_reverse (vectype
))
2402 if (dump_enabled_p ())
2403 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2404 "negative step and reversing not supported.\n");
2405 return VMAT_ELEMENTWISE
;
2408 return VMAT_CONTIGUOUS_REVERSE
;
2411 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
2412 if there is a memory access type that the vectorized form can use,
2413 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2414 or scatters, fill in GS_INFO accordingly.
2416 SLP says whether we're performing SLP rather than loop vectorization.
2417 MASKED_P is true if the statement is conditional on a vectorized mask.
2418 VECTYPE is the vector type that the vectorized statements will use.
2419 NCOPIES is the number of vector statements that will be needed. */
2422 get_load_store_type (gimple
*stmt
, tree vectype
, bool slp
, bool masked_p
,
2423 vec_load_store_type vls_type
, unsigned int ncopies
,
2424 vect_memory_access_type
*memory_access_type
,
2425 gather_scatter_info
*gs_info
)
2427 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2428 vec_info
*vinfo
= stmt_info
->vinfo
;
2429 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2430 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2431 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2433 *memory_access_type
= VMAT_GATHER_SCATTER
;
2434 if (!vect_check_gather_scatter (stmt
, loop_vinfo
, gs_info
))
2436 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2437 &gs_info
->offset_dt
,
2438 &gs_info
->offset_vectype
))
2440 if (dump_enabled_p ())
2441 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2442 "%s index use not simple.\n",
2443 vls_type
== VLS_LOAD
? "gather" : "scatter");
2447 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2449 if (!get_group_load_store_type (stmt
, vectype
, slp
, masked_p
, vls_type
,
2450 memory_access_type
, gs_info
))
2453 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2457 && vect_use_strided_gather_scatters_p (stmt
, loop_vinfo
,
2459 *memory_access_type
= VMAT_GATHER_SCATTER
;
2461 *memory_access_type
= VMAT_ELEMENTWISE
;
2465 int cmp
= compare_step_with_zero (stmt
);
2467 *memory_access_type
= get_negative_load_store_type
2468 (stmt
, vectype
, vls_type
, ncopies
);
2471 gcc_assert (vls_type
== VLS_LOAD
);
2472 *memory_access_type
= VMAT_INVARIANT
;
2475 *memory_access_type
= VMAT_CONTIGUOUS
;
2478 if ((*memory_access_type
== VMAT_ELEMENTWISE
2479 || *memory_access_type
== VMAT_STRIDED_SLP
)
2480 && !nunits
.is_constant ())
2482 if (dump_enabled_p ())
2483 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2484 "Not using elementwise accesses due to variable "
2485 "vectorization factor.\n");
2489 /* FIXME: At the moment the cost model seems to underestimate the
2490 cost of using elementwise accesses. This check preserves the
2491 traditional behavior until that can be fixed. */
2492 if (*memory_access_type
== VMAT_ELEMENTWISE
2493 && !STMT_VINFO_STRIDED_P (stmt_info
)
2494 && !(stmt
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2495 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2496 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2498 if (dump_enabled_p ())
2499 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2500 "not falling back to elementwise accesses\n");
2506 /* Return true if boolean argument MASK is suitable for vectorizing
2507 conditional load or store STMT. When returning true, store the type
2508 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2509 in *MASK_VECTYPE_OUT. */
2512 vect_check_load_store_mask (gimple
*stmt
, tree mask
,
2513 vect_def_type
*mask_dt_out
,
2514 tree
*mask_vectype_out
)
2516 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2518 if (dump_enabled_p ())
2519 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2520 "mask argument is not a boolean.\n");
2524 if (TREE_CODE (mask
) != SSA_NAME
)
2526 if (dump_enabled_p ())
2527 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2528 "mask argument is not an SSA name.\n");
2532 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2533 enum vect_def_type mask_dt
;
2535 if (!vect_is_simple_use (mask
, stmt_info
->vinfo
, &mask_dt
, &mask_vectype
))
2537 if (dump_enabled_p ())
2538 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2539 "mask use not simple.\n");
2543 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2545 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2547 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2549 if (dump_enabled_p ())
2550 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2551 "could not find an appropriate vector mask type.\n");
2555 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2556 TYPE_VECTOR_SUBPARTS (vectype
)))
2558 if (dump_enabled_p ())
2560 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2561 "vector mask type ");
2562 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, mask_vectype
);
2563 dump_printf (MSG_MISSED_OPTIMIZATION
,
2564 " does not match vector data type ");
2565 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, vectype
);
2566 dump_printf (MSG_MISSED_OPTIMIZATION
, ".\n");
2571 *mask_dt_out
= mask_dt
;
2572 *mask_vectype_out
= mask_vectype
;
2576 /* Return true if stored value RHS is suitable for vectorizing store
2577 statement STMT. When returning true, store the type of the
2578 definition in *RHS_DT_OUT, the type of the vectorized store value in
2579 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2582 vect_check_store_rhs (gimple
*stmt
, tree rhs
, vect_def_type
*rhs_dt_out
,
2583 tree
*rhs_vectype_out
, vec_load_store_type
*vls_type_out
)
2585 /* In the case this is a store from a constant make sure
2586 native_encode_expr can handle it. */
2587 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2589 if (dump_enabled_p ())
2590 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2591 "cannot encode constant as a byte sequence.\n");
2595 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2596 enum vect_def_type rhs_dt
;
2598 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &rhs_dt
, &rhs_vectype
))
2600 if (dump_enabled_p ())
2601 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2602 "use not simple.\n");
2606 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2607 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2609 if (dump_enabled_p ())
2610 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2611 "incompatible vector types.\n");
2615 *rhs_dt_out
= rhs_dt
;
2616 *rhs_vectype_out
= rhs_vectype
;
2617 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2618 *vls_type_out
= VLS_STORE_INVARIANT
;
2620 *vls_type_out
= VLS_STORE
;
2624 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2625 Note that we support masks with floating-point type, in which case the
2626 floats are interpreted as a bitmask. */
2629 vect_build_all_ones_mask (gimple
*stmt
, tree masktype
)
2631 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2632 return build_int_cst (masktype
, -1);
2633 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2635 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2636 mask
= build_vector_from_val (masktype
, mask
);
2637 return vect_init_vector (stmt
, mask
, masktype
, NULL
);
2639 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2643 for (int j
= 0; j
< 6; ++j
)
2645 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2646 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2647 mask
= build_vector_from_val (masktype
, mask
);
2648 return vect_init_vector (stmt
, mask
, masktype
, NULL
);
2653 /* Build an all-zero merge value of type VECTYPE while vectorizing
2654 STMT as a gather load. */
2657 vect_build_zero_merge_argument (gimple
*stmt
, tree vectype
)
2660 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2661 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2662 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2666 for (int j
= 0; j
< 6; ++j
)
2668 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2669 merge
= build_real (TREE_TYPE (vectype
), r
);
2673 merge
= build_vector_from_val (vectype
, merge
);
2674 return vect_init_vector (stmt
, merge
, vectype
, NULL
);
2677 /* Build a gather load call while vectorizing STMT. Insert new instructions
2678 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2679 operation. If the load is conditional, MASK is the unvectorized
2680 condition and MASK_DT is its definition type, otherwise MASK is null. */
2683 vect_build_gather_load_calls (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2684 gimple
**vec_stmt
, gather_scatter_info
*gs_info
,
2685 tree mask
, vect_def_type mask_dt
)
2687 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2688 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2689 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2690 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2691 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2692 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2693 edge pe
= loop_preheader_edge (loop
);
2694 enum { NARROW
, NONE
, WIDEN
} modifier
;
2695 poly_uint64 gather_off_nunits
2696 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2698 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2699 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2700 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2701 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2702 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2703 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2704 tree scaletype
= TREE_VALUE (arglist
);
2705 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2706 && (!mask
|| types_compatible_p (srctype
, masktype
)));
2708 tree perm_mask
= NULL_TREE
;
2709 tree mask_perm_mask
= NULL_TREE
;
2710 if (known_eq (nunits
, gather_off_nunits
))
2712 else if (known_eq (nunits
* 2, gather_off_nunits
))
2716 /* Currently widening gathers and scatters are only supported for
2717 fixed-length vectors. */
2718 int count
= gather_off_nunits
.to_constant ();
2719 vec_perm_builder
sel (count
, count
, 1);
2720 for (int i
= 0; i
< count
; ++i
)
2721 sel
.quick_push (i
| (count
/ 2));
2723 vec_perm_indices
indices (sel
, 1, count
);
2724 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2727 else if (known_eq (nunits
, gather_off_nunits
* 2))
2731 /* Currently narrowing gathers and scatters are only supported for
2732 fixed-length vectors. */
2733 int count
= nunits
.to_constant ();
2734 vec_perm_builder
sel (count
, count
, 1);
2735 sel
.quick_grow (count
);
2736 for (int i
= 0; i
< count
; ++i
)
2737 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2738 vec_perm_indices
indices (sel
, 2, count
);
2739 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2745 for (int i
= 0; i
< count
; ++i
)
2746 sel
[i
] = i
| (count
/ 2);
2747 indices
.new_vector (sel
, 2, count
);
2748 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2754 tree vec_dest
= vect_create_destination_var (gimple_get_lhs (stmt
),
2757 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2758 if (!is_gimple_min_invariant (ptr
))
2761 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2762 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2763 gcc_assert (!new_bb
);
2766 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2768 tree vec_oprnd0
= NULL_TREE
;
2769 tree vec_mask
= NULL_TREE
;
2770 tree src_op
= NULL_TREE
;
2771 tree mask_op
= NULL_TREE
;
2772 tree prev_res
= NULL_TREE
;
2773 stmt_vec_info prev_stmt_info
= NULL
;
2777 src_op
= vect_build_zero_merge_argument (stmt
, rettype
);
2778 mask_op
= vect_build_all_ones_mask (stmt
, masktype
);
2781 for (int j
= 0; j
< ncopies
; ++j
)
2785 if (modifier
== WIDEN
&& (j
& 1))
2786 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2787 perm_mask
, stmt
, gsi
);
2790 = vect_get_vec_def_for_operand (gs_info
->offset
, stmt
);
2793 = vect_get_vec_def_for_stmt_copy (gs_info
->offset_dt
, vec_oprnd0
);
2795 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2797 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2798 TYPE_VECTOR_SUBPARTS (idxtype
)));
2799 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2800 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2801 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2802 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2808 if (mask_perm_mask
&& (j
& 1))
2809 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2810 mask_perm_mask
, stmt
, gsi
);
2814 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2816 vec_mask
= vect_get_vec_def_for_stmt_copy (mask_dt
, vec_mask
);
2819 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2822 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
)),
2823 TYPE_VECTOR_SUBPARTS (masktype
)));
2824 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2825 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2826 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
,
2828 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2835 new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2838 if (!useless_type_conversion_p (vectype
, rettype
))
2840 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2841 TYPE_VECTOR_SUBPARTS (rettype
)));
2842 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2843 gimple_call_set_lhs (new_stmt
, op
);
2844 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2845 var
= make_ssa_name (vec_dest
);
2846 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2847 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2851 var
= make_ssa_name (vec_dest
, new_stmt
);
2852 gimple_call_set_lhs (new_stmt
, var
);
2855 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2857 if (modifier
== NARROW
)
2864 var
= permute_vec_elements (prev_res
, var
, perm_mask
, stmt
, gsi
);
2865 new_stmt
= SSA_NAME_DEF_STMT (var
);
2868 if (prev_stmt_info
== NULL
)
2869 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2871 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2872 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2876 /* Prepare the base and offset in GS_INFO for vectorization.
2877 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2878 to the vectorized offset argument for the first copy of STMT. STMT
2879 is the statement described by GS_INFO and LOOP is the containing loop. */
2882 vect_get_gather_scatter_ops (struct loop
*loop
, gimple
*stmt
,
2883 gather_scatter_info
*gs_info
,
2884 tree
*dataref_ptr
, tree
*vec_offset
)
2886 gimple_seq stmts
= NULL
;
2887 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2891 edge pe
= loop_preheader_edge (loop
);
2892 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2893 gcc_assert (!new_bb
);
2895 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2896 tree offset_vectype
= get_vectype_for_scalar_type (offset_type
);
2897 *vec_offset
= vect_get_vec_def_for_operand (gs_info
->offset
, stmt
,
2901 /* Prepare to implement a grouped or strided load or store using
2902 the gather load or scatter store operation described by GS_INFO.
2903 STMT is the load or store statement.
2905 Set *DATAREF_BUMP to the amount that should be added to the base
2906 address after each copy of the vectorized statement. Set *VEC_OFFSET
2907 to an invariant offset vector in which element I has the value
2908 I * DR_STEP / SCALE. */
2911 vect_get_strided_load_store_ops (gimple
*stmt
, loop_vec_info loop_vinfo
,
2912 gather_scatter_info
*gs_info
,
2913 tree
*dataref_bump
, tree
*vec_offset
)
2915 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2916 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2917 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2918 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2921 tree bump
= size_binop (MULT_EXPR
,
2922 fold_convert (sizetype
, DR_STEP (dr
)),
2923 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2924 *dataref_bump
= force_gimple_operand (bump
, &stmts
, true, NULL_TREE
);
2926 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2928 /* The offset given in GS_INFO can have pointer type, so use the element
2929 type of the vector instead. */
2930 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2931 tree offset_vectype
= get_vectype_for_scalar_type (offset_type
);
2932 offset_type
= TREE_TYPE (offset_vectype
);
2934 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2935 tree step
= size_binop (EXACT_DIV_EXPR
, DR_STEP (dr
),
2936 ssize_int (gs_info
->scale
));
2937 step
= fold_convert (offset_type
, step
);
2938 step
= force_gimple_operand (step
, &stmts
, true, NULL_TREE
);
2940 /* Create {0, X, X*2, X*3, ...}. */
2941 *vec_offset
= gimple_build (&stmts
, VEC_SERIES_EXPR
, offset_vectype
,
2942 build_zero_cst (offset_type
), step
);
2944 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2947 /* Return the amount that should be added to a vector pointer to move
2948 to the next or previous copy of AGGR_TYPE. DR is the data reference
2949 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2953 vect_get_data_ptr_increment (data_reference
*dr
, tree aggr_type
,
2954 vect_memory_access_type memory_access_type
)
2956 if (memory_access_type
== VMAT_INVARIANT
)
2957 return size_zero_node
;
2959 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
2960 tree step
= vect_dr_behavior (dr
)->step
;
2961 if (tree_int_cst_sgn (step
) == -1)
2962 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
2966 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2969 vectorizable_bswap (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2970 gimple
**vec_stmt
, slp_tree slp_node
,
2971 tree vectype_in
, enum vect_def_type
*dt
,
2972 stmt_vector_for_cost
*cost_vec
)
2975 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2976 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2978 unsigned HOST_WIDE_INT nunits
, num_bytes
;
2980 op
= gimple_call_arg (stmt
, 0);
2981 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2983 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
2986 /* Multiple types in SLP are handled by creating the appropriate number of
2987 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2992 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2994 gcc_assert (ncopies
>= 1);
2996 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3000 if (!TYPE_VECTOR_SUBPARTS (char_vectype
).is_constant (&num_bytes
))
3003 unsigned word_bytes
= num_bytes
/ nunits
;
3005 /* The encoding uses one stepped pattern for each byte in the word. */
3006 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3007 for (unsigned i
= 0; i
< 3; ++i
)
3008 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3009 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3011 vec_perm_indices
indices (elts
, 1, num_bytes
);
3012 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
3017 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3018 DUMP_VECT_SCOPE ("vectorizable_bswap");
3021 record_stmt_cost (cost_vec
,
3022 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3023 record_stmt_cost (cost_vec
,
3024 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
3029 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3032 vec
<tree
> vec_oprnds
= vNULL
;
3033 gimple
*new_stmt
= NULL
;
3034 stmt_vec_info prev_stmt_info
= NULL
;
3035 for (unsigned j
= 0; j
< ncopies
; j
++)
3039 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
3041 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
3043 /* Arguments are ready. create the new vector stmt. */
3046 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3048 tree tem
= make_ssa_name (char_vectype
);
3049 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3050 char_vectype
, vop
));
3051 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3052 tree tem2
= make_ssa_name (char_vectype
);
3053 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3054 tem
, tem
, bswap_vconst
);
3055 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3056 tem
= make_ssa_name (vectype
);
3057 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3059 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3061 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3068 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3070 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3072 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3075 vec_oprnds
.release ();
3079 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3080 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3081 in a single step. On success, store the binary pack code in
3085 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3086 tree_code
*convert_code
)
3088 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3089 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3093 int multi_step_cvt
= 0;
3094 auto_vec
<tree
, 8> interm_types
;
3095 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3096 &code
, &multi_step_cvt
,
3101 *convert_code
= code
;
3105 /* Function vectorizable_call.
3107 Check if GS performs a function call that can be vectorized.
3108 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3109 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3110 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3113 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
3114 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
3120 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3121 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
3122 tree vectype_out
, vectype_in
;
3123 poly_uint64 nunits_in
;
3124 poly_uint64 nunits_out
;
3125 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3126 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3127 vec_info
*vinfo
= stmt_info
->vinfo
;
3128 tree fndecl
, new_temp
, rhs_type
;
3129 enum vect_def_type dt
[4]
3130 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3131 vect_unknown_def_type
};
3132 int ndts
= ARRAY_SIZE (dt
);
3133 gimple
*new_stmt
= NULL
;
3135 auto_vec
<tree
, 8> vargs
;
3136 auto_vec
<tree
, 8> orig_vargs
;
3137 enum { NARROW
, NONE
, WIDEN
} modifier
;
3141 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3144 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3148 /* Is GS a vectorizable call? */
3149 stmt
= dyn_cast
<gcall
*> (gs
);
3153 if (gimple_call_internal_p (stmt
)
3154 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3155 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3156 /* Handled by vectorizable_load and vectorizable_store. */
3159 if (gimple_call_lhs (stmt
) == NULL_TREE
3160 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3163 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3165 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3167 /* Process function arguments. */
3168 rhs_type
= NULL_TREE
;
3169 vectype_in
= NULL_TREE
;
3170 nargs
= gimple_call_num_args (stmt
);
3172 /* Bail out if the function has more than three arguments, we do not have
3173 interesting builtin functions to vectorize with more than two arguments
3174 except for fma. No arguments is also not good. */
3175 if (nargs
== 0 || nargs
> 4)
3178 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3179 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3180 if (cfn
== CFN_GOMP_SIMD_LANE
)
3183 rhs_type
= unsigned_type_node
;
3187 if (internal_fn_p (cfn
))
3188 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3190 for (i
= 0; i
< nargs
; i
++)
3194 op
= gimple_call_arg (stmt
, i
);
3195 if (!vect_is_simple_use (op
, vinfo
, &dt
[i
], &opvectype
))
3197 if (dump_enabled_p ())
3198 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3199 "use not simple.\n");
3203 /* Skip the mask argument to an internal function. This operand
3204 has been converted via a pattern if necessary. */
3205 if ((int) i
== mask_opno
)
3208 /* We can only handle calls with arguments of the same type. */
3210 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3212 if (dump_enabled_p ())
3213 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3214 "argument types differ.\n");
3218 rhs_type
= TREE_TYPE (op
);
3221 vectype_in
= opvectype
;
3223 && opvectype
!= vectype_in
)
3225 if (dump_enabled_p ())
3226 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3227 "argument vector types differ.\n");
3231 /* If all arguments are external or constant defs use a vector type with
3232 the same size as the output vector type. */
3234 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3236 gcc_assert (vectype_in
);
3239 if (dump_enabled_p ())
3241 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3242 "no vectype for scalar type ");
3243 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3244 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3251 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3252 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3253 if (known_eq (nunits_in
* 2, nunits_out
))
3255 else if (known_eq (nunits_out
, nunits_in
))
3257 else if (known_eq (nunits_out
* 2, nunits_in
))
3262 /* We only handle functions that do not read or clobber memory. */
3263 if (gimple_vuse (stmt
))
3265 if (dump_enabled_p ())
3266 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3267 "function reads from or writes to memory.\n");
3271 /* For now, we only vectorize functions if a target specific builtin
3272 is available. TODO -- in some cases, it might be profitable to
3273 insert the calls for pieces of the vector, in order to be able
3274 to vectorize other operations in the loop. */
3276 internal_fn ifn
= IFN_LAST
;
3277 tree callee
= gimple_call_fndecl (stmt
);
3279 /* First try using an internal function. */
3280 tree_code convert_code
= ERROR_MARK
;
3282 && (modifier
== NONE
3283 || (modifier
== NARROW
3284 && simple_integer_narrowing (vectype_out
, vectype_in
,
3286 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3289 /* If that fails, try asking for a target-specific built-in function. */
3290 if (ifn
== IFN_LAST
)
3292 if (cfn
!= CFN_LAST
)
3293 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3294 (cfn
, vectype_out
, vectype_in
);
3296 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3297 (callee
, vectype_out
, vectype_in
);
3300 if (ifn
== IFN_LAST
&& !fndecl
)
3302 if (cfn
== CFN_GOMP_SIMD_LANE
3305 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3306 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3307 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3308 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3310 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3311 { 0, 1, 2, ... vf - 1 } vector. */
3312 gcc_assert (nargs
== 0);
3314 else if (modifier
== NONE
3315 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3316 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3317 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
3318 return vectorizable_bswap (stmt
, gsi
, vec_stmt
, slp_node
,
3319 vectype_in
, dt
, cost_vec
);
3322 if (dump_enabled_p ())
3323 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3324 "function is not vectorizable.\n");
3331 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3332 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3334 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3336 /* Sanity check: make sure that at least one copy of the vectorized stmt
3337 needs to be generated. */
3338 gcc_assert (ncopies
>= 1);
3340 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
3341 if (!vec_stmt
) /* transformation not required. */
3343 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3344 DUMP_VECT_SCOPE ("vectorizable_call");
3345 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3346 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3347 record_stmt_cost (cost_vec
, ncopies
/ 2,
3348 vec_promote_demote
, stmt_info
, 0, vect_body
);
3350 if (loop_vinfo
&& mask_opno
>= 0)
3352 unsigned int nvectors
= (slp_node
3353 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3355 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype_out
);
3362 if (dump_enabled_p ())
3363 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3366 scalar_dest
= gimple_call_lhs (stmt
);
3367 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3369 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3371 prev_stmt_info
= NULL
;
3372 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3374 tree prev_res
= NULL_TREE
;
3375 vargs
.safe_grow (nargs
);
3376 orig_vargs
.safe_grow (nargs
);
3377 for (j
= 0; j
< ncopies
; ++j
)
3379 /* Build argument list for the vectorized call. */
3382 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3383 vec
<tree
> vec_oprnds0
;
3385 for (i
= 0; i
< nargs
; i
++)
3386 vargs
[i
] = gimple_call_arg (stmt
, i
);
3387 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3388 vec_oprnds0
= vec_defs
[0];
3390 /* Arguments are ready. Create the new vector stmt. */
3391 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3394 for (k
= 0; k
< nargs
; k
++)
3396 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3397 vargs
[k
] = vec_oprndsk
[i
];
3399 if (modifier
== NARROW
)
3401 /* We don't define any narrowing conditional functions
3403 gcc_assert (mask_opno
< 0);
3404 tree half_res
= make_ssa_name (vectype_in
);
3406 = gimple_build_call_internal_vec (ifn
, vargs
);
3407 gimple_call_set_lhs (call
, half_res
);
3408 gimple_call_set_nothrow (call
, true);
3410 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3413 prev_res
= half_res
;
3416 new_temp
= make_ssa_name (vec_dest
);
3417 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3418 prev_res
, half_res
);
3422 if (mask_opno
>= 0 && masked_loop_p
)
3424 unsigned int vec_num
= vec_oprnds0
.length ();
3425 /* Always true for SLP. */
3426 gcc_assert (ncopies
== 1);
3427 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3429 vargs
[mask_opno
] = prepare_load_store_mask
3430 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3434 if (ifn
!= IFN_LAST
)
3435 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3437 call
= gimple_build_call_vec (fndecl
, vargs
);
3438 new_temp
= make_ssa_name (vec_dest
, call
);
3439 gimple_call_set_lhs (call
, new_temp
);
3440 gimple_call_set_nothrow (call
, true);
3443 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3444 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3447 for (i
= 0; i
< nargs
; i
++)
3449 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3450 vec_oprndsi
.release ();
3455 for (i
= 0; i
< nargs
; i
++)
3457 op
= gimple_call_arg (stmt
, i
);
3460 = vect_get_vec_def_for_operand (op
, stmt
);
3463 = vect_get_vec_def_for_stmt_copy (dt
[i
], orig_vargs
[i
]);
3465 orig_vargs
[i
] = vargs
[i
] = vec_oprnd0
;
3468 if (mask_opno
>= 0 && masked_loop_p
)
3470 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3473 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3474 vargs
[mask_opno
], gsi
);
3477 if (cfn
== CFN_GOMP_SIMD_LANE
)
3479 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3481 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3482 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3483 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
3484 new_temp
= make_ssa_name (vec_dest
);
3485 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3487 else if (modifier
== NARROW
)
3489 /* We don't define any narrowing conditional functions at
3491 gcc_assert (mask_opno
< 0);
3492 tree half_res
= make_ssa_name (vectype_in
);
3493 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3494 gimple_call_set_lhs (call
, half_res
);
3495 gimple_call_set_nothrow (call
, true);
3497 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3500 prev_res
= half_res
;
3503 new_temp
= make_ssa_name (vec_dest
);
3504 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3505 prev_res
, half_res
);
3510 if (ifn
!= IFN_LAST
)
3511 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3513 call
= gimple_build_call_vec (fndecl
, vargs
);
3514 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3515 gimple_call_set_lhs (call
, new_temp
);
3516 gimple_call_set_nothrow (call
, true);
3519 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3521 if (j
== (modifier
== NARROW
? 1 : 0))
3522 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3524 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3526 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3529 else if (modifier
== NARROW
)
3531 /* We don't define any narrowing conditional functions at present. */
3532 gcc_assert (mask_opno
< 0);
3533 for (j
= 0; j
< ncopies
; ++j
)
3535 /* Build argument list for the vectorized call. */
3537 vargs
.create (nargs
* 2);
3543 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3544 vec
<tree
> vec_oprnds0
;
3546 for (i
= 0; i
< nargs
; i
++)
3547 vargs
.quick_push (gimple_call_arg (stmt
, i
));
3548 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3549 vec_oprnds0
= vec_defs
[0];
3551 /* Arguments are ready. Create the new vector stmt. */
3552 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3556 for (k
= 0; k
< nargs
; k
++)
3558 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3559 vargs
.quick_push (vec_oprndsk
[i
]);
3560 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3563 if (ifn
!= IFN_LAST
)
3564 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3566 call
= gimple_build_call_vec (fndecl
, vargs
);
3567 new_temp
= make_ssa_name (vec_dest
, call
);
3568 gimple_call_set_lhs (call
, new_temp
);
3569 gimple_call_set_nothrow (call
, true);
3571 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3572 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3575 for (i
= 0; i
< nargs
; i
++)
3577 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3578 vec_oprndsi
.release ();
3583 for (i
= 0; i
< nargs
; i
++)
3585 op
= gimple_call_arg (stmt
, i
);
3589 = vect_get_vec_def_for_operand (op
, stmt
);
3591 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3595 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
3597 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
3599 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
3602 vargs
.quick_push (vec_oprnd0
);
3603 vargs
.quick_push (vec_oprnd1
);
3606 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3607 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3608 gimple_call_set_lhs (new_stmt
, new_temp
);
3609 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3612 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3614 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3616 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3619 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3622 /* No current target implements this case. */
3627 /* The call in STMT might prevent it from being removed in dce.
3628 We however cannot remove it here, due to the way the ssa name
3629 it defines is mapped to the new definition. So just replace
3630 rhs of the statement with something harmless. */
3635 if (is_pattern_stmt_p (stmt_info
))
3636 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
3637 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3639 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3640 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3641 set_vinfo_for_stmt (stmt_info
->stmt
, NULL
);
3642 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3643 gsi_replace (gsi
, new_stmt
, false);
3649 struct simd_call_arg_info
3653 HOST_WIDE_INT linear_step
;
3654 enum vect_def_type dt
;
3656 bool simd_lane_linear
;
3659 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3660 is linear within simd lane (but not within whole loop), note it in
3664 vect_simd_lane_linear (tree op
, struct loop
*loop
,
3665 struct simd_call_arg_info
*arginfo
)
3667 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3669 if (!is_gimple_assign (def_stmt
)
3670 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3671 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3674 tree base
= gimple_assign_rhs1 (def_stmt
);
3675 HOST_WIDE_INT linear_step
= 0;
3676 tree v
= gimple_assign_rhs2 (def_stmt
);
3677 while (TREE_CODE (v
) == SSA_NAME
)
3680 def_stmt
= SSA_NAME_DEF_STMT (v
);
3681 if (is_gimple_assign (def_stmt
))
3682 switch (gimple_assign_rhs_code (def_stmt
))
3685 t
= gimple_assign_rhs2 (def_stmt
);
3686 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3688 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3689 v
= gimple_assign_rhs1 (def_stmt
);
3692 t
= gimple_assign_rhs2 (def_stmt
);
3693 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3695 linear_step
= tree_to_shwi (t
);
3696 v
= gimple_assign_rhs1 (def_stmt
);
3699 t
= gimple_assign_rhs1 (def_stmt
);
3700 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3701 || (TYPE_PRECISION (TREE_TYPE (v
))
3702 < TYPE_PRECISION (TREE_TYPE (t
))))
3711 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3713 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3714 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3719 arginfo
->linear_step
= linear_step
;
3721 arginfo
->simd_lane_linear
= true;
3727 /* Return the number of elements in vector type VECTYPE, which is associated
3728 with a SIMD clone. At present these vectors always have a constant
3731 static unsigned HOST_WIDE_INT
3732 simd_clone_subparts (tree vectype
)
3734 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3737 /* Function vectorizable_simd_clone_call.
3739 Check if STMT performs a function call that can be vectorized
3740 by calling a simd clone of the function.
3741 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3742 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3743 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3746 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3747 gimple
**vec_stmt
, slp_tree slp_node
,
3748 stmt_vector_for_cost
*)
3753 tree vec_oprnd0
= NULL_TREE
;
3754 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
3756 unsigned int nunits
;
3757 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3758 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3759 vec_info
*vinfo
= stmt_info
->vinfo
;
3760 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3761 tree fndecl
, new_temp
;
3762 gimple
*new_stmt
= NULL
;
3764 auto_vec
<simd_call_arg_info
> arginfo
;
3765 vec
<tree
> vargs
= vNULL
;
3767 tree lhs
, rtype
, ratype
;
3768 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3770 /* Is STMT a vectorizable call? */
3771 if (!is_gimple_call (stmt
))
3774 fndecl
= gimple_call_fndecl (stmt
);
3775 if (fndecl
== NULL_TREE
)
3778 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3779 if (node
== NULL
|| node
->simd_clones
== NULL
)
3782 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3785 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3789 if (gimple_call_lhs (stmt
)
3790 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3793 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
3795 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3797 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
3804 /* Process function arguments. */
3805 nargs
= gimple_call_num_args (stmt
);
3807 /* Bail out if the function has zero arguments. */
3811 arginfo
.reserve (nargs
, true);
3813 for (i
= 0; i
< nargs
; i
++)
3815 simd_call_arg_info thisarginfo
;
3818 thisarginfo
.linear_step
= 0;
3819 thisarginfo
.align
= 0;
3820 thisarginfo
.op
= NULL_TREE
;
3821 thisarginfo
.simd_lane_linear
= false;
3823 op
= gimple_call_arg (stmt
, i
);
3824 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3825 &thisarginfo
.vectype
)
3826 || thisarginfo
.dt
== vect_uninitialized_def
)
3828 if (dump_enabled_p ())
3829 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3830 "use not simple.\n");
3834 if (thisarginfo
.dt
== vect_constant_def
3835 || thisarginfo
.dt
== vect_external_def
)
3836 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3838 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3840 /* For linear arguments, the analyze phase should have saved
3841 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3842 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3843 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3845 gcc_assert (vec_stmt
);
3846 thisarginfo
.linear_step
3847 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3849 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3850 thisarginfo
.simd_lane_linear
3851 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3852 == boolean_true_node
);
3853 /* If loop has been peeled for alignment, we need to adjust it. */
3854 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3855 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3856 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3858 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3859 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3860 tree opt
= TREE_TYPE (thisarginfo
.op
);
3861 bias
= fold_convert (TREE_TYPE (step
), bias
);
3862 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3864 = fold_build2 (POINTER_TYPE_P (opt
)
3865 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3866 thisarginfo
.op
, bias
);
3870 && thisarginfo
.dt
!= vect_constant_def
3871 && thisarginfo
.dt
!= vect_external_def
3873 && TREE_CODE (op
) == SSA_NAME
3874 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3876 && tree_fits_shwi_p (iv
.step
))
3878 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3879 thisarginfo
.op
= iv
.base
;
3881 else if ((thisarginfo
.dt
== vect_constant_def
3882 || thisarginfo
.dt
== vect_external_def
)
3883 && POINTER_TYPE_P (TREE_TYPE (op
)))
3884 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3885 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3887 if (POINTER_TYPE_P (TREE_TYPE (op
))
3888 && !thisarginfo
.linear_step
3890 && thisarginfo
.dt
!= vect_constant_def
3891 && thisarginfo
.dt
!= vect_external_def
3894 && TREE_CODE (op
) == SSA_NAME
)
3895 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3897 arginfo
.quick_push (thisarginfo
);
3900 unsigned HOST_WIDE_INT vf
;
3901 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
3903 if (dump_enabled_p ())
3904 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3905 "not considering SIMD clones; not yet supported"
3906 " for variable-width vectors.\n");
3910 unsigned int badness
= 0;
3911 struct cgraph_node
*bestn
= NULL
;
3912 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3913 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3915 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3916 n
= n
->simdclone
->next_clone
)
3918 unsigned int this_badness
= 0;
3919 if (n
->simdclone
->simdlen
> vf
3920 || n
->simdclone
->nargs
!= nargs
)
3922 if (n
->simdclone
->simdlen
< vf
)
3923 this_badness
+= (exact_log2 (vf
)
3924 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3925 if (n
->simdclone
->inbranch
)
3926 this_badness
+= 2048;
3927 int target_badness
= targetm
.simd_clone
.usable (n
);
3928 if (target_badness
< 0)
3930 this_badness
+= target_badness
* 512;
3931 /* FORNOW: Have to add code to add the mask argument. */
3932 if (n
->simdclone
->inbranch
)
3934 for (i
= 0; i
< nargs
; i
++)
3936 switch (n
->simdclone
->args
[i
].arg_type
)
3938 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3939 if (!useless_type_conversion_p
3940 (n
->simdclone
->args
[i
].orig_type
,
3941 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3943 else if (arginfo
[i
].dt
== vect_constant_def
3944 || arginfo
[i
].dt
== vect_external_def
3945 || arginfo
[i
].linear_step
)
3948 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3949 if (arginfo
[i
].dt
!= vect_constant_def
3950 && arginfo
[i
].dt
!= vect_external_def
)
3953 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3954 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3955 if (arginfo
[i
].dt
== vect_constant_def
3956 || arginfo
[i
].dt
== vect_external_def
3957 || (arginfo
[i
].linear_step
3958 != n
->simdclone
->args
[i
].linear_step
))
3961 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3962 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3963 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3964 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3965 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3966 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3970 case SIMD_CLONE_ARG_TYPE_MASK
:
3973 if (i
== (size_t) -1)
3975 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3980 if (arginfo
[i
].align
)
3981 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3982 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3984 if (i
== (size_t) -1)
3986 if (bestn
== NULL
|| this_badness
< badness
)
3989 badness
= this_badness
;
3996 for (i
= 0; i
< nargs
; i
++)
3997 if ((arginfo
[i
].dt
== vect_constant_def
3998 || arginfo
[i
].dt
== vect_external_def
)
3999 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4002 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
4004 if (arginfo
[i
].vectype
== NULL
4005 || (simd_clone_subparts (arginfo
[i
].vectype
)
4006 > bestn
->simdclone
->simdlen
))
4010 fndecl
= bestn
->decl
;
4011 nunits
= bestn
->simdclone
->simdlen
;
4012 ncopies
= vf
/ nunits
;
4014 /* If the function isn't const, only allow it in simd loops where user
4015 has asserted that at least nunits consecutive iterations can be
4016 performed using SIMD instructions. */
4017 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
4018 && gimple_vuse (stmt
))
4021 /* Sanity check: make sure that at least one copy of the vectorized stmt
4022 needs to be generated. */
4023 gcc_assert (ncopies
>= 1);
4025 if (!vec_stmt
) /* transformation not required. */
4027 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
4028 for (i
= 0; i
< nargs
; i
++)
4029 if ((bestn
->simdclone
->args
[i
].arg_type
4030 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
4031 || (bestn
->simdclone
->args
[i
].arg_type
4032 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
4034 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
4036 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
4037 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4038 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4039 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4040 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4041 tree sll
= arginfo
[i
].simd_lane_linear
4042 ? boolean_true_node
: boolean_false_node
;
4043 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4045 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4046 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4047 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4053 if (dump_enabled_p ())
4054 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4057 scalar_dest
= gimple_call_lhs (stmt
);
4058 vec_dest
= NULL_TREE
;
4063 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4064 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4065 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4068 rtype
= TREE_TYPE (ratype
);
4072 prev_stmt_info
= NULL
;
4073 for (j
= 0; j
< ncopies
; ++j
)
4075 /* Build argument list for the vectorized call. */
4077 vargs
.create (nargs
);
4081 for (i
= 0; i
< nargs
; i
++)
4083 unsigned int k
, l
, m
, o
;
4085 op
= gimple_call_arg (stmt
, i
);
4086 switch (bestn
->simdclone
->args
[i
].arg_type
)
4088 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4089 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4090 o
= nunits
/ simd_clone_subparts (atype
);
4091 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4093 if (simd_clone_subparts (atype
)
4094 < simd_clone_subparts (arginfo
[i
].vectype
))
4096 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4097 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4098 / simd_clone_subparts (atype
));
4099 gcc_assert ((k
& (k
- 1)) == 0);
4102 = vect_get_vec_def_for_operand (op
, stmt
);
4105 vec_oprnd0
= arginfo
[i
].op
;
4106 if ((m
& (k
- 1)) == 0)
4108 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
4111 arginfo
[i
].op
= vec_oprnd0
;
4113 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4115 bitsize_int ((m
& (k
- 1)) * prec
));
4117 = gimple_build_assign (make_ssa_name (atype
),
4119 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4120 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4124 k
= (simd_clone_subparts (atype
)
4125 / simd_clone_subparts (arginfo
[i
].vectype
));
4126 gcc_assert ((k
& (k
- 1)) == 0);
4127 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4129 vec_alloc (ctor_elts
, k
);
4132 for (l
= 0; l
< k
; l
++)
4134 if (m
== 0 && l
== 0)
4136 = vect_get_vec_def_for_operand (op
, stmt
);
4139 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
4141 arginfo
[i
].op
= vec_oprnd0
;
4144 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4148 vargs
.safe_push (vec_oprnd0
);
4151 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4153 = gimple_build_assign (make_ssa_name (atype
),
4155 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4156 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4161 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4162 vargs
.safe_push (op
);
4164 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4165 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4170 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
4175 edge pe
= loop_preheader_edge (loop
);
4176 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4177 gcc_assert (!new_bb
);
4179 if (arginfo
[i
].simd_lane_linear
)
4181 vargs
.safe_push (arginfo
[i
].op
);
4184 tree phi_res
= copy_ssa_name (op
);
4185 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4186 set_vinfo_for_stmt (new_phi
,
4187 new_stmt_vec_info (new_phi
, loop_vinfo
));
4188 add_phi_arg (new_phi
, arginfo
[i
].op
,
4189 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4191 = POINTER_TYPE_P (TREE_TYPE (op
))
4192 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4193 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4194 ? sizetype
: TREE_TYPE (op
);
4196 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4198 tree tcst
= wide_int_to_tree (type
, cst
);
4199 tree phi_arg
= copy_ssa_name (op
);
4201 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4202 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4203 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4204 set_vinfo_for_stmt (new_stmt
,
4205 new_stmt_vec_info (new_stmt
, loop_vinfo
));
4206 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4208 arginfo
[i
].op
= phi_res
;
4209 vargs
.safe_push (phi_res
);
4214 = POINTER_TYPE_P (TREE_TYPE (op
))
4215 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4216 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4217 ? sizetype
: TREE_TYPE (op
);
4219 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4221 tree tcst
= wide_int_to_tree (type
, cst
);
4222 new_temp
= make_ssa_name (TREE_TYPE (op
));
4223 new_stmt
= gimple_build_assign (new_temp
, code
,
4224 arginfo
[i
].op
, tcst
);
4225 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4226 vargs
.safe_push (new_temp
);
4229 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4230 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4231 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4232 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4233 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4234 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4240 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
4243 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
4245 new_temp
= create_tmp_var (ratype
);
4246 else if (simd_clone_subparts (vectype
)
4247 == simd_clone_subparts (rtype
))
4248 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4250 new_temp
= make_ssa_name (rtype
, new_stmt
);
4251 gimple_call_set_lhs (new_stmt
, new_temp
);
4253 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4257 if (simd_clone_subparts (vectype
) < nunits
)
4260 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4261 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4262 k
= nunits
/ simd_clone_subparts (vectype
);
4263 gcc_assert ((k
& (k
- 1)) == 0);
4264 for (l
= 0; l
< k
; l
++)
4269 t
= build_fold_addr_expr (new_temp
);
4270 t
= build2 (MEM_REF
, vectype
, t
,
4271 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4274 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4275 bitsize_int (prec
), bitsize_int (l
* prec
));
4277 = gimple_build_assign (make_ssa_name (vectype
), t
);
4278 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4279 if (j
== 0 && l
== 0)
4280 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4282 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4284 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4288 vect_clobber_variable (stmt
, gsi
, new_temp
);
4291 else if (simd_clone_subparts (vectype
) > nunits
)
4293 unsigned int k
= (simd_clone_subparts (vectype
)
4294 / simd_clone_subparts (rtype
));
4295 gcc_assert ((k
& (k
- 1)) == 0);
4296 if ((j
& (k
- 1)) == 0)
4297 vec_alloc (ret_ctor_elts
, k
);
4300 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
4301 for (m
= 0; m
< o
; m
++)
4303 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4304 size_int (m
), NULL_TREE
, NULL_TREE
);
4306 = gimple_build_assign (make_ssa_name (rtype
), tem
);
4307 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4308 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4309 gimple_assign_lhs (new_stmt
));
4311 vect_clobber_variable (stmt
, gsi
, new_temp
);
4314 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4315 if ((j
& (k
- 1)) != k
- 1)
4317 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4319 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4320 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4322 if ((unsigned) j
== k
- 1)
4323 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4325 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4327 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4332 tree t
= build_fold_addr_expr (new_temp
);
4333 t
= build2 (MEM_REF
, vectype
, t
,
4334 build_int_cst (TREE_TYPE (t
), 0));
4336 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
4337 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4338 vect_clobber_variable (stmt
, gsi
, new_temp
);
4343 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4345 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4347 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4352 /* The call in STMT might prevent it from being removed in dce.
4353 We however cannot remove it here, due to the way the ssa name
4354 it defines is mapped to the new definition. So just replace
4355 rhs of the statement with something harmless. */
4362 type
= TREE_TYPE (scalar_dest
);
4363 if (is_pattern_stmt_p (stmt_info
))
4364 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
4366 lhs
= gimple_call_lhs (stmt
);
4367 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4370 new_stmt
= gimple_build_nop ();
4371 set_vinfo_for_stmt (new_stmt
, stmt_info
);
4372 set_vinfo_for_stmt (stmt
, NULL
);
4373 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
4374 gsi_replace (gsi
, new_stmt
, true);
4375 unlink_stmt_vdef (stmt
);
4381 /* Function vect_gen_widened_results_half
4383 Create a vector stmt whose code, type, number of arguments, and result
4384 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4385 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4386 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4387 needs to be created (DECL is a function-decl of a target-builtin).
4388 STMT is the original scalar stmt that we are vectorizing. */
4391 vect_gen_widened_results_half (enum tree_code code
,
4393 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4394 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4400 /* Generate half of the widened result: */
4401 if (code
== CALL_EXPR
)
4403 /* Target specific support */
4404 if (op_type
== binary_op
)
4405 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
4407 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
4408 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4409 gimple_call_set_lhs (new_stmt
, new_temp
);
4413 /* Generic support */
4414 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4415 if (op_type
!= binary_op
)
4417 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4418 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4419 gimple_assign_set_lhs (new_stmt
, new_temp
);
4421 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4427 /* Get vectorized definitions for loop-based vectorization. For the first
4428 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4429 scalar operand), and for the rest we get a copy with
4430 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4431 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4432 The vectors are collected into VEC_OPRNDS. */
4435 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
4436 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
4440 /* Get first vector operand. */
4441 /* All the vector operands except the very first one (that is scalar oprnd)
4443 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4444 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
4446 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
4448 vec_oprnds
->quick_push (vec_oprnd
);
4450 /* Get second vector operand. */
4451 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
4452 vec_oprnds
->quick_push (vec_oprnd
);
4456 /* For conversion in multiple steps, continue to get operands
4459 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
4463 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4464 For multi-step conversions store the resulting vectors and call the function
4468 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
4469 int multi_step_cvt
, gimple
*stmt
,
4471 gimple_stmt_iterator
*gsi
,
4472 slp_tree slp_node
, enum tree_code code
,
4473 stmt_vec_info
*prev_stmt_info
)
4476 tree vop0
, vop1
, new_tmp
, vec_dest
;
4478 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4480 vec_dest
= vec_dsts
.pop ();
4482 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4484 /* Create demotion operation. */
4485 vop0
= (*vec_oprnds
)[i
];
4486 vop1
= (*vec_oprnds
)[i
+ 1];
4487 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4488 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4489 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4490 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4493 /* Store the resulting vector for next recursive call. */
4494 (*vec_oprnds
)[i
/2] = new_tmp
;
4497 /* This is the last step of the conversion sequence. Store the
4498 vectors in SLP_NODE or in vector info of the scalar statement
4499 (or in STMT_VINFO_RELATED_STMT chain). */
4501 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4504 if (!*prev_stmt_info
)
4505 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4507 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
4509 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4514 /* For multi-step demotion operations we first generate demotion operations
4515 from the source type to the intermediate types, and then combine the
4516 results (stored in VEC_OPRNDS) in demotion operation to the destination
4520 /* At each level of recursion we have half of the operands we had at the
4522 vec_oprnds
->truncate ((i
+1)/2);
4523 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
4524 stmt
, vec_dsts
, gsi
, slp_node
,
4525 VEC_PACK_TRUNC_EXPR
,
4529 vec_dsts
.quick_push (vec_dest
);
4533 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4534 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4535 the resulting vectors and call the function recursively. */
4538 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4539 vec
<tree
> *vec_oprnds1
,
4540 gimple
*stmt
, tree vec_dest
,
4541 gimple_stmt_iterator
*gsi
,
4542 enum tree_code code1
,
4543 enum tree_code code2
, tree decl1
,
4544 tree decl2
, int op_type
)
4547 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4548 gimple
*new_stmt1
, *new_stmt2
;
4549 vec
<tree
> vec_tmp
= vNULL
;
4551 vec_tmp
.create (vec_oprnds0
->length () * 2);
4552 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4554 if (op_type
== binary_op
)
4555 vop1
= (*vec_oprnds1
)[i
];
4559 /* Generate the two halves of promotion operation. */
4560 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4561 op_type
, vec_dest
, gsi
, stmt
);
4562 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4563 op_type
, vec_dest
, gsi
, stmt
);
4564 if (is_gimple_call (new_stmt1
))
4566 new_tmp1
= gimple_call_lhs (new_stmt1
);
4567 new_tmp2
= gimple_call_lhs (new_stmt2
);
4571 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4572 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4575 /* Store the results for the next step. */
4576 vec_tmp
.quick_push (new_tmp1
);
4577 vec_tmp
.quick_push (new_tmp2
);
4580 vec_oprnds0
->release ();
4581 *vec_oprnds0
= vec_tmp
;
4585 /* Check if STMT performs a conversion operation, that can be vectorized.
4586 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4587 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4588 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4591 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4592 gimple
**vec_stmt
, slp_tree slp_node
,
4593 stmt_vector_for_cost
*cost_vec
)
4597 tree op0
, op1
= NULL_TREE
;
4598 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4599 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4600 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4601 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4602 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4603 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4605 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4607 gimple
*new_stmt
= NULL
;
4608 stmt_vec_info prev_stmt_info
;
4609 poly_uint64 nunits_in
;
4610 poly_uint64 nunits_out
;
4611 tree vectype_out
, vectype_in
;
4613 tree lhs_type
, rhs_type
;
4614 enum { NARROW
, NONE
, WIDEN
} modifier
;
4615 vec
<tree
> vec_oprnds0
= vNULL
;
4616 vec
<tree
> vec_oprnds1
= vNULL
;
4618 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4619 vec_info
*vinfo
= stmt_info
->vinfo
;
4620 int multi_step_cvt
= 0;
4621 vec
<tree
> interm_types
= vNULL
;
4622 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4624 unsigned short fltsz
;
4626 /* Is STMT a vectorizable conversion? */
4628 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4631 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4635 if (!is_gimple_assign (stmt
))
4638 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4641 code
= gimple_assign_rhs_code (stmt
);
4642 if (!CONVERT_EXPR_CODE_P (code
)
4643 && code
!= FIX_TRUNC_EXPR
4644 && code
!= FLOAT_EXPR
4645 && code
!= WIDEN_MULT_EXPR
4646 && code
!= WIDEN_LSHIFT_EXPR
)
4649 op_type
= TREE_CODE_LENGTH (code
);
4651 /* Check types of lhs and rhs. */
4652 scalar_dest
= gimple_assign_lhs (stmt
);
4653 lhs_type
= TREE_TYPE (scalar_dest
);
4654 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4656 op0
= gimple_assign_rhs1 (stmt
);
4657 rhs_type
= TREE_TYPE (op0
);
4659 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4660 && !((INTEGRAL_TYPE_P (lhs_type
)
4661 && INTEGRAL_TYPE_P (rhs_type
))
4662 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4663 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4666 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4667 && ((INTEGRAL_TYPE_P (lhs_type
)
4668 && !type_has_mode_precision_p (lhs_type
))
4669 || (INTEGRAL_TYPE_P (rhs_type
)
4670 && !type_has_mode_precision_p (rhs_type
))))
4672 if (dump_enabled_p ())
4673 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4674 "type conversion to/from bit-precision unsupported."
4679 /* Check the operands of the operation. */
4680 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype_in
))
4682 if (dump_enabled_p ())
4683 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4684 "use not simple.\n");
4687 if (op_type
== binary_op
)
4691 op1
= gimple_assign_rhs2 (stmt
);
4692 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4693 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4695 if (CONSTANT_CLASS_P (op0
))
4696 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1], &vectype_in
);
4698 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1]);
4702 if (dump_enabled_p ())
4703 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4704 "use not simple.\n");
4709 /* If op0 is an external or constant defs use a vector type of
4710 the same size as the output vector type. */
4712 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4714 gcc_assert (vectype_in
);
4717 if (dump_enabled_p ())
4719 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4720 "no vectype for scalar type ");
4721 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4722 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4728 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4729 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4731 if (dump_enabled_p ())
4733 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4734 "can't convert between boolean and non "
4736 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
4737 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4743 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4744 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4745 if (known_eq (nunits_out
, nunits_in
))
4747 else if (multiple_p (nunits_out
, nunits_in
))
4751 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4755 /* Multiple types in SLP are handled by creating the appropriate number of
4756 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4760 else if (modifier
== NARROW
)
4761 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4763 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4765 /* Sanity check: make sure that at least one copy of the vectorized stmt
4766 needs to be generated. */
4767 gcc_assert (ncopies
>= 1);
4769 bool found_mode
= false;
4770 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4771 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4772 opt_scalar_mode rhs_mode_iter
;
4774 /* Supportable by target? */
4778 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4780 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4785 if (dump_enabled_p ())
4786 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4787 "conversion not supported by target.\n");
4791 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
4792 &code1
, &code2
, &multi_step_cvt
,
4795 /* Binary widening operation can only be supported directly by the
4797 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4801 if (code
!= FLOAT_EXPR
4802 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4805 fltsz
= GET_MODE_SIZE (lhs_mode
);
4806 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4808 rhs_mode
= rhs_mode_iter
.require ();
4809 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4813 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4814 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4815 if (cvt_type
== NULL_TREE
)
4818 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4820 if (!supportable_convert_operation (code
, vectype_out
,
4821 cvt_type
, &decl1
, &codecvt1
))
4824 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
4825 cvt_type
, &codecvt1
,
4826 &codecvt2
, &multi_step_cvt
,
4830 gcc_assert (multi_step_cvt
== 0);
4832 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
4833 vectype_in
, &code1
, &code2
,
4834 &multi_step_cvt
, &interm_types
))
4844 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4845 codecvt2
= ERROR_MARK
;
4849 interm_types
.safe_push (cvt_type
);
4850 cvt_type
= NULL_TREE
;
4855 gcc_assert (op_type
== unary_op
);
4856 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4857 &code1
, &multi_step_cvt
,
4861 if (code
!= FIX_TRUNC_EXPR
4862 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4866 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4867 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4868 if (cvt_type
== NULL_TREE
)
4870 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4873 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4874 &code1
, &multi_step_cvt
,
4883 if (!vec_stmt
) /* transformation not required. */
4885 DUMP_VECT_SCOPE ("vectorizable_conversion");
4886 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4888 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4889 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
,
4892 else if (modifier
== NARROW
)
4894 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4895 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
,
4900 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4901 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
,
4904 interm_types
.release ();
4909 if (dump_enabled_p ())
4910 dump_printf_loc (MSG_NOTE
, vect_location
,
4911 "transform conversion. ncopies = %d.\n", ncopies
);
4913 if (op_type
== binary_op
)
4915 if (CONSTANT_CLASS_P (op0
))
4916 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4917 else if (CONSTANT_CLASS_P (op1
))
4918 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4921 /* In case of multi-step conversion, we first generate conversion operations
4922 to the intermediate types, and then from that types to the final one.
4923 We create vector destinations for the intermediate type (TYPES) received
4924 from supportable_*_operation, and store them in the correct order
4925 for future use in vect_create_vectorized_*_stmts (). */
4926 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4927 vec_dest
= vect_create_destination_var (scalar_dest
,
4928 (cvt_type
&& modifier
== WIDEN
)
4929 ? cvt_type
: vectype_out
);
4930 vec_dsts
.quick_push (vec_dest
);
4934 for (i
= interm_types
.length () - 1;
4935 interm_types
.iterate (i
, &intermediate_type
); i
--)
4937 vec_dest
= vect_create_destination_var (scalar_dest
,
4939 vec_dsts
.quick_push (vec_dest
);
4944 vec_dest
= vect_create_destination_var (scalar_dest
,
4946 ? vectype_out
: cvt_type
);
4950 if (modifier
== WIDEN
)
4952 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
4953 if (op_type
== binary_op
)
4954 vec_oprnds1
.create (1);
4956 else if (modifier
== NARROW
)
4957 vec_oprnds0
.create (
4958 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4960 else if (code
== WIDEN_LSHIFT_EXPR
)
4961 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4964 prev_stmt_info
= NULL
;
4968 for (j
= 0; j
< ncopies
; j
++)
4971 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
);
4973 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
4975 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4977 /* Arguments are ready, create the new vector stmt. */
4978 if (code1
== CALL_EXPR
)
4980 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4981 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4982 gimple_call_set_lhs (new_stmt
, new_temp
);
4986 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4987 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4988 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4989 gimple_assign_set_lhs (new_stmt
, new_temp
);
4992 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4994 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4997 if (!prev_stmt_info
)
4998 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5000 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5001 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5008 /* In case the vectorization factor (VF) is bigger than the number
5009 of elements that we can fit in a vectype (nunits), we have to
5010 generate more than one vector stmt - i.e - we need to "unroll"
5011 the vector stmt by a factor VF/nunits. */
5012 for (j
= 0; j
< ncopies
; j
++)
5019 if (code
== WIDEN_LSHIFT_EXPR
)
5024 /* Store vec_oprnd1 for every vector stmt to be created
5025 for SLP_NODE. We check during the analysis that all
5026 the shift arguments are the same. */
5027 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5028 vec_oprnds1
.quick_push (vec_oprnd1
);
5030 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5034 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
5035 &vec_oprnds1
, slp_node
);
5039 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
5040 vec_oprnds0
.quick_push (vec_oprnd0
);
5041 if (op_type
== binary_op
)
5043 if (code
== WIDEN_LSHIFT_EXPR
)
5046 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
5047 vec_oprnds1
.quick_push (vec_oprnd1
);
5053 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
5054 vec_oprnds0
.truncate (0);
5055 vec_oprnds0
.quick_push (vec_oprnd0
);
5056 if (op_type
== binary_op
)
5058 if (code
== WIDEN_LSHIFT_EXPR
)
5061 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
5063 vec_oprnds1
.truncate (0);
5064 vec_oprnds1
.quick_push (vec_oprnd1
);
5068 /* Arguments are ready. Create the new vector stmts. */
5069 for (i
= multi_step_cvt
; i
>= 0; i
--)
5071 tree this_dest
= vec_dsts
[i
];
5072 enum tree_code c1
= code1
, c2
= code2
;
5073 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5078 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
5080 stmt
, this_dest
, gsi
,
5081 c1
, c2
, decl1
, decl2
,
5085 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5089 if (codecvt1
== CALL_EXPR
)
5091 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5092 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5093 gimple_call_set_lhs (new_stmt
, new_temp
);
5097 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5098 new_temp
= make_ssa_name (vec_dest
);
5099 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
5103 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5106 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
5109 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5112 if (!prev_stmt_info
)
5113 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
5115 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5116 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5121 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5125 /* In case the vectorization factor (VF) is bigger than the number
5126 of elements that we can fit in a vectype (nunits), we have to
5127 generate more than one vector stmt - i.e - we need to "unroll"
5128 the vector stmt by a factor VF/nunits. */
5129 for (j
= 0; j
< ncopies
; j
++)
5133 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5137 vec_oprnds0
.truncate (0);
5138 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
5139 vect_pow2 (multi_step_cvt
) - 1);
5142 /* Arguments are ready. Create the new vector stmts. */
5144 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5146 if (codecvt1
== CALL_EXPR
)
5148 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5149 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5150 gimple_call_set_lhs (new_stmt
, new_temp
);
5154 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5155 new_temp
= make_ssa_name (vec_dest
);
5156 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
5160 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5161 vec_oprnds0
[i
] = new_temp
;
5164 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
5165 stmt
, vec_dsts
, gsi
,
5170 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5174 vec_oprnds0
.release ();
5175 vec_oprnds1
.release ();
5176 interm_types
.release ();
5182 /* Function vectorizable_assignment.
5184 Check if STMT performs an assignment (copy) that can be vectorized.
5185 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5186 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5187 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5190 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5191 gimple
**vec_stmt
, slp_tree slp_node
,
5192 stmt_vector_for_cost
*cost_vec
)
5197 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5198 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5200 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5204 vec
<tree
> vec_oprnds
= vNULL
;
5206 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5207 vec_info
*vinfo
= stmt_info
->vinfo
;
5208 gimple
*new_stmt
= NULL
;
5209 stmt_vec_info prev_stmt_info
= NULL
;
5210 enum tree_code code
;
5213 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5216 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5220 /* Is vectorizable assignment? */
5221 if (!is_gimple_assign (stmt
))
5224 scalar_dest
= gimple_assign_lhs (stmt
);
5225 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5228 code
= gimple_assign_rhs_code (stmt
);
5229 if (gimple_assign_single_p (stmt
)
5230 || code
== PAREN_EXPR
5231 || CONVERT_EXPR_CODE_P (code
))
5232 op
= gimple_assign_rhs1 (stmt
);
5236 if (code
== VIEW_CONVERT_EXPR
)
5237 op
= TREE_OPERAND (op
, 0);
5239 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5240 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5242 /* Multiple types in SLP are handled by creating the appropriate number of
5243 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5248 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5250 gcc_assert (ncopies
>= 1);
5252 if (!vect_is_simple_use (op
, vinfo
, &dt
[0], &vectype_in
))
5254 if (dump_enabled_p ())
5255 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5256 "use not simple.\n");
5260 /* We can handle NOP_EXPR conversions that do not change the number
5261 of elements or the vector size. */
5262 if ((CONVERT_EXPR_CODE_P (code
)
5263 || code
== VIEW_CONVERT_EXPR
)
5265 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5266 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5267 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5270 /* We do not handle bit-precision changes. */
5271 if ((CONVERT_EXPR_CODE_P (code
)
5272 || code
== VIEW_CONVERT_EXPR
)
5273 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5274 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5275 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5276 /* But a conversion that does not change the bit-pattern is ok. */
5277 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5278 > TYPE_PRECISION (TREE_TYPE (op
)))
5279 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5280 /* Conversion between boolean types of different sizes is
5281 a simple assignment in case their vectypes are same
5283 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
5284 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
5286 if (dump_enabled_p ())
5287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5288 "type conversion to/from bit-precision "
5293 if (!vec_stmt
) /* transformation not required. */
5295 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5296 DUMP_VECT_SCOPE ("vectorizable_assignment");
5297 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5302 if (dump_enabled_p ())
5303 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5306 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5309 for (j
= 0; j
< ncopies
; j
++)
5313 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
);
5315 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
5317 /* Arguments are ready. create the new vector stmt. */
5318 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5320 if (CONVERT_EXPR_CODE_P (code
)
5321 || code
== VIEW_CONVERT_EXPR
)
5322 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5323 new_stmt
= gimple_build_assign (vec_dest
, vop
);
5324 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5325 gimple_assign_set_lhs (new_stmt
, new_temp
);
5326 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5328 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5335 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5337 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5339 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5342 vec_oprnds
.release ();
5347 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5348 either as shift by a scalar or by a vector. */
5351 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
5354 machine_mode vec_mode
;
5359 vectype
= get_vectype_for_scalar_type (scalar_type
);
5363 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5365 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5367 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5369 || (optab_handler (optab
, TYPE_MODE (vectype
))
5370 == CODE_FOR_nothing
))
5374 vec_mode
= TYPE_MODE (vectype
);
5375 icode
= (int) optab_handler (optab
, vec_mode
);
5376 if (icode
== CODE_FOR_nothing
)
5383 /* Function vectorizable_shift.
5385 Check if STMT performs a shift operation that can be vectorized.
5386 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5387 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5388 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5391 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5392 gimple
**vec_stmt
, slp_tree slp_node
,
5393 stmt_vector_for_cost
*cost_vec
)
5397 tree op0
, op1
= NULL
;
5398 tree vec_oprnd1
= NULL_TREE
;
5399 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5401 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5402 enum tree_code code
;
5403 machine_mode vec_mode
;
5407 machine_mode optab_op2_mode
;
5408 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5410 gimple
*new_stmt
= NULL
;
5411 stmt_vec_info prev_stmt_info
;
5412 poly_uint64 nunits_in
;
5413 poly_uint64 nunits_out
;
5418 vec
<tree
> vec_oprnds0
= vNULL
;
5419 vec
<tree
> vec_oprnds1
= vNULL
;
5422 bool scalar_shift_arg
= true;
5423 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5424 vec_info
*vinfo
= stmt_info
->vinfo
;
5426 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5429 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5433 /* Is STMT a vectorizable binary/unary operation? */
5434 if (!is_gimple_assign (stmt
))
5437 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5440 code
= gimple_assign_rhs_code (stmt
);
5442 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5443 || code
== RROTATE_EXPR
))
5446 scalar_dest
= gimple_assign_lhs (stmt
);
5447 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5448 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5450 if (dump_enabled_p ())
5451 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5452 "bit-precision shifts not supported.\n");
5456 op0
= gimple_assign_rhs1 (stmt
);
5457 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5459 if (dump_enabled_p ())
5460 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5461 "use not simple.\n");
5464 /* If op0 is an external or constant def use a vector type with
5465 the same size as the output vector type. */
5467 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5469 gcc_assert (vectype
);
5472 if (dump_enabled_p ())
5473 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5474 "no vectype for scalar type\n");
5478 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5479 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5480 if (maybe_ne (nunits_out
, nunits_in
))
5483 op1
= gimple_assign_rhs2 (stmt
);
5484 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1], &op1_vectype
))
5486 if (dump_enabled_p ())
5487 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5488 "use not simple.\n");
5492 /* Multiple types in SLP are handled by creating the appropriate number of
5493 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5498 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5500 gcc_assert (ncopies
>= 1);
5502 /* Determine whether the shift amount is a vector, or scalar. If the
5503 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5505 if ((dt
[1] == vect_internal_def
5506 || dt
[1] == vect_induction_def
)
5508 scalar_shift_arg
= false;
5509 else if (dt
[1] == vect_constant_def
5510 || dt
[1] == vect_external_def
5511 || dt
[1] == vect_internal_def
)
5513 /* In SLP, need to check whether the shift count is the same,
5514 in loops if it is a constant or invariant, it is always
5518 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5521 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
5522 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5523 scalar_shift_arg
= false;
5526 /* If the shift amount is computed by a pattern stmt we cannot
5527 use the scalar amount directly thus give up and use a vector
5529 if (dt
[1] == vect_internal_def
)
5531 gimple
*def
= SSA_NAME_DEF_STMT (op1
);
5532 if (is_pattern_stmt_p (vinfo_for_stmt (def
)))
5533 scalar_shift_arg
= false;
5538 if (dump_enabled_p ())
5539 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5540 "operand mode requires invariant argument.\n");
5544 /* Vector shifted by vector. */
5545 if (!scalar_shift_arg
)
5547 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5548 if (dump_enabled_p ())
5549 dump_printf_loc (MSG_NOTE
, vect_location
,
5550 "vector/vector shift/rotate found.\n");
5553 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
5554 if (op1_vectype
== NULL_TREE
5555 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
5557 if (dump_enabled_p ())
5558 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5559 "unusable type for last operand in"
5560 " vector/vector shift/rotate.\n");
5564 /* See if the machine has a vector shifted by scalar insn and if not
5565 then see if it has a vector shifted by vector insn. */
5568 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5570 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5572 if (dump_enabled_p ())
5573 dump_printf_loc (MSG_NOTE
, vect_location
,
5574 "vector/scalar shift/rotate found.\n");
5578 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5580 && (optab_handler (optab
, TYPE_MODE (vectype
))
5581 != CODE_FOR_nothing
))
5583 scalar_shift_arg
= false;
5585 if (dump_enabled_p ())
5586 dump_printf_loc (MSG_NOTE
, vect_location
,
5587 "vector/vector shift/rotate found.\n");
5589 /* Unlike the other binary operators, shifts/rotates have
5590 the rhs being int, instead of the same type as the lhs,
5591 so make sure the scalar is the right type if we are
5592 dealing with vectors of long long/long/short/char. */
5593 if (dt
[1] == vect_constant_def
)
5594 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5595 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5599 && TYPE_MODE (TREE_TYPE (vectype
))
5600 != TYPE_MODE (TREE_TYPE (op1
)))
5602 if (dump_enabled_p ())
5603 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5604 "unusable type for last operand in"
5605 " vector/vector shift/rotate.\n");
5608 if (vec_stmt
&& !slp_node
)
5610 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5611 op1
= vect_init_vector (stmt
, op1
,
5612 TREE_TYPE (vectype
), NULL
);
5619 /* Supportable by target? */
5622 if (dump_enabled_p ())
5623 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5627 vec_mode
= TYPE_MODE (vectype
);
5628 icode
= (int) optab_handler (optab
, vec_mode
);
5629 if (icode
== CODE_FOR_nothing
)
5631 if (dump_enabled_p ())
5632 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5633 "op not supported by target.\n");
5634 /* Check only during analysis. */
5635 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5637 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5639 if (dump_enabled_p ())
5640 dump_printf_loc (MSG_NOTE
, vect_location
,
5641 "proceeding using word mode.\n");
5644 /* Worthwhile without SIMD support? Check only during analysis. */
5646 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5647 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5649 if (dump_enabled_p ())
5650 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5651 "not worthwhile without SIMD support.\n");
5655 if (!vec_stmt
) /* transformation not required. */
5657 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5658 DUMP_VECT_SCOPE ("vectorizable_shift");
5659 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5665 if (dump_enabled_p ())
5666 dump_printf_loc (MSG_NOTE
, vect_location
,
5667 "transform binary/unary operation.\n");
5670 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5672 prev_stmt_info
= NULL
;
5673 for (j
= 0; j
< ncopies
; j
++)
5678 if (scalar_shift_arg
)
5680 /* Vector shl and shr insn patterns can be defined with scalar
5681 operand 2 (shift operand). In this case, use constant or loop
5682 invariant op1 directly, without extending it to vector mode
5684 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5685 if (!VECTOR_MODE_P (optab_op2_mode
))
5687 if (dump_enabled_p ())
5688 dump_printf_loc (MSG_NOTE
, vect_location
,
5689 "operand 1 using scalar mode.\n");
5691 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5692 vec_oprnds1
.quick_push (vec_oprnd1
);
5695 /* Store vec_oprnd1 for every vector stmt to be created
5696 for SLP_NODE. We check during the analysis that all
5697 the shift arguments are the same.
5698 TODO: Allow different constants for different vector
5699 stmts generated for an SLP instance. */
5700 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5701 vec_oprnds1
.quick_push (vec_oprnd1
);
5706 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5707 (a special case for certain kind of vector shifts); otherwise,
5708 operand 1 should be of a vector type (the usual case). */
5710 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5713 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5717 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5719 /* Arguments are ready. Create the new vector stmt. */
5720 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5722 vop1
= vec_oprnds1
[i
];
5723 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5724 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5725 gimple_assign_set_lhs (new_stmt
, new_temp
);
5726 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5728 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5735 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5737 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5738 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5741 vec_oprnds0
.release ();
5742 vec_oprnds1
.release ();
5748 /* Function vectorizable_operation.
5750 Check if STMT performs a binary, unary or ternary operation that can
5752 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5753 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5754 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5757 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
5758 gimple
**vec_stmt
, slp_tree slp_node
,
5759 stmt_vector_for_cost
*cost_vec
)
5763 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5764 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5766 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5767 enum tree_code code
, orig_code
;
5768 machine_mode vec_mode
;
5772 bool target_support_p
;
5773 enum vect_def_type dt
[3]
5774 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5776 gimple
*new_stmt
= NULL
;
5777 stmt_vec_info prev_stmt_info
;
5778 poly_uint64 nunits_in
;
5779 poly_uint64 nunits_out
;
5783 vec
<tree
> vec_oprnds0
= vNULL
;
5784 vec
<tree
> vec_oprnds1
= vNULL
;
5785 vec
<tree
> vec_oprnds2
= vNULL
;
5786 tree vop0
, vop1
, vop2
;
5787 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5788 vec_info
*vinfo
= stmt_info
->vinfo
;
5790 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5793 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5797 /* Is STMT a vectorizable binary/unary operation? */
5798 if (!is_gimple_assign (stmt
))
5801 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5804 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5806 /* For pointer addition and subtraction, we should use the normal
5807 plus and minus for the vector operation. */
5808 if (code
== POINTER_PLUS_EXPR
)
5810 if (code
== POINTER_DIFF_EXPR
)
5813 /* Support only unary or binary operations. */
5814 op_type
= TREE_CODE_LENGTH (code
);
5815 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5817 if (dump_enabled_p ())
5818 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5819 "num. args = %d (not unary/binary/ternary op).\n",
5824 scalar_dest
= gimple_assign_lhs (stmt
);
5825 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5827 /* Most operations cannot handle bit-precision types without extra
5829 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5830 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5831 /* Exception are bitwise binary operations. */
5832 && code
!= BIT_IOR_EXPR
5833 && code
!= BIT_XOR_EXPR
5834 && code
!= BIT_AND_EXPR
)
5836 if (dump_enabled_p ())
5837 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5838 "bit-precision arithmetic not supported.\n");
5842 op0
= gimple_assign_rhs1 (stmt
);
5843 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5845 if (dump_enabled_p ())
5846 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5847 "use not simple.\n");
5850 /* If op0 is an external or constant def use a vector type with
5851 the same size as the output vector type. */
5854 /* For boolean type we cannot determine vectype by
5855 invariant value (don't know whether it is a vector
5856 of booleans or vector of integers). We use output
5857 vectype because operations on boolean don't change
5859 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5861 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5863 if (dump_enabled_p ())
5864 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5865 "not supported operation on bool value.\n");
5868 vectype
= vectype_out
;
5871 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5874 gcc_assert (vectype
);
5877 if (dump_enabled_p ())
5879 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5880 "no vectype for scalar type ");
5881 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5883 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5889 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5890 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5891 if (maybe_ne (nunits_out
, nunits_in
))
5894 if (op_type
== binary_op
|| op_type
== ternary_op
)
5896 op1
= gimple_assign_rhs2 (stmt
);
5897 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1]))
5899 if (dump_enabled_p ())
5900 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5901 "use not simple.\n");
5905 if (op_type
== ternary_op
)
5907 op2
= gimple_assign_rhs3 (stmt
);
5908 if (!vect_is_simple_use (op2
, vinfo
, &dt
[2]))
5910 if (dump_enabled_p ())
5911 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5912 "use not simple.\n");
5917 /* Multiple types in SLP are handled by creating the appropriate number of
5918 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5923 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5925 gcc_assert (ncopies
>= 1);
5927 /* Shifts are handled in vectorizable_shift (). */
5928 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5929 || code
== RROTATE_EXPR
)
5932 /* Supportable by target? */
5934 vec_mode
= TYPE_MODE (vectype
);
5935 if (code
== MULT_HIGHPART_EXPR
)
5936 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5939 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5942 if (dump_enabled_p ())
5943 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5947 target_support_p
= (optab_handler (optab
, vec_mode
)
5948 != CODE_FOR_nothing
);
5951 if (!target_support_p
)
5953 if (dump_enabled_p ())
5954 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5955 "op not supported by target.\n");
5956 /* Check only during analysis. */
5957 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5958 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
5960 if (dump_enabled_p ())
5961 dump_printf_loc (MSG_NOTE
, vect_location
,
5962 "proceeding using word mode.\n");
5965 /* Worthwhile without SIMD support? Check only during analysis. */
5966 if (!VECTOR_MODE_P (vec_mode
)
5968 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5970 if (dump_enabled_p ())
5971 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5972 "not worthwhile without SIMD support.\n");
5976 if (!vec_stmt
) /* transformation not required. */
5978 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5979 DUMP_VECT_SCOPE ("vectorizable_operation");
5980 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5986 if (dump_enabled_p ())
5987 dump_printf_loc (MSG_NOTE
, vect_location
,
5988 "transform binary/unary operation.\n");
5990 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5991 vectors with unsigned elements, but the result is signed. So, we
5992 need to compute the MINUS_EXPR into vectype temporary and
5993 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5994 tree vec_cvt_dest
= NULL_TREE
;
5995 if (orig_code
== POINTER_DIFF_EXPR
)
5997 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5998 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6002 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6004 /* In case the vectorization factor (VF) is bigger than the number
6005 of elements that we can fit in a vectype (nunits), we have to generate
6006 more than one vector stmt - i.e - we need to "unroll" the
6007 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6008 from one copy of the vector stmt to the next, in the field
6009 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6010 stages to find the correct vector defs to be used when vectorizing
6011 stmts that use the defs of the current stmt. The example below
6012 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6013 we need to create 4 vectorized stmts):
6015 before vectorization:
6016 RELATED_STMT VEC_STMT
6020 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6022 RELATED_STMT VEC_STMT
6023 VS1_0: vx0 = memref0 VS1_1 -
6024 VS1_1: vx1 = memref1 VS1_2 -
6025 VS1_2: vx2 = memref2 VS1_3 -
6026 VS1_3: vx3 = memref3 - -
6027 S1: x = load - VS1_0
6030 step2: vectorize stmt S2 (done here):
6031 To vectorize stmt S2 we first need to find the relevant vector
6032 def for the first operand 'x'. This is, as usual, obtained from
6033 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6034 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6035 relevant vector def 'vx0'. Having found 'vx0' we can generate
6036 the vector stmt VS2_0, and as usual, record it in the
6037 STMT_VINFO_VEC_STMT of stmt S2.
6038 When creating the second copy (VS2_1), we obtain the relevant vector
6039 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6040 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6041 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6042 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6043 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6044 chain of stmts and pointers:
6045 RELATED_STMT VEC_STMT
6046 VS1_0: vx0 = memref0 VS1_1 -
6047 VS1_1: vx1 = memref1 VS1_2 -
6048 VS1_2: vx2 = memref2 VS1_3 -
6049 VS1_3: vx3 = memref3 - -
6050 S1: x = load - VS1_0
6051 VS2_0: vz0 = vx0 + v1 VS2_1 -
6052 VS2_1: vz1 = vx1 + v1 VS2_2 -
6053 VS2_2: vz2 = vx2 + v1 VS2_3 -
6054 VS2_3: vz3 = vx3 + v1 - -
6055 S2: z = x + 1 - VS2_0 */
6057 prev_stmt_info
= NULL
;
6058 for (j
= 0; j
< ncopies
; j
++)
6063 if (op_type
== binary_op
)
6064 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
6066 else if (op_type
== ternary_op
)
6070 auto_vec
<tree
> ops(3);
6071 ops
.quick_push (op0
);
6072 ops
.quick_push (op1
);
6073 ops
.quick_push (op2
);
6074 auto_vec
<vec
<tree
> > vec_defs(3);
6075 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
6076 vec_oprnds0
= vec_defs
[0];
6077 vec_oprnds1
= vec_defs
[1];
6078 vec_oprnds2
= vec_defs
[2];
6082 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
6084 vect_get_vec_defs (op2
, NULL_TREE
, stmt
, &vec_oprnds2
, NULL
,
6089 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
6094 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
6095 if (op_type
== ternary_op
)
6097 tree vec_oprnd
= vec_oprnds2
.pop ();
6098 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
6103 /* Arguments are ready. Create the new vector stmt. */
6104 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6106 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6107 ? vec_oprnds1
[i
] : NULL_TREE
);
6108 vop2
= ((op_type
== ternary_op
)
6109 ? vec_oprnds2
[i
] : NULL_TREE
);
6110 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
6111 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6112 gimple_assign_set_lhs (new_stmt
, new_temp
);
6113 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6116 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6117 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6119 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6120 gimple_assign_set_lhs (new_stmt
, new_temp
);
6121 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6124 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6131 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6133 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6134 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6137 vec_oprnds0
.release ();
6138 vec_oprnds1
.release ();
6139 vec_oprnds2
.release ();
6144 /* A helper function to ensure data reference DR's base alignment. */
6147 ensure_base_align (struct data_reference
*dr
)
6149 if (DR_VECT_AUX (dr
)->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6152 if (DR_VECT_AUX (dr
)->base_misaligned
)
6154 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
6156 unsigned int align_base_to
= DR_TARGET_ALIGNMENT (dr
) * BITS_PER_UNIT
;
6158 if (decl_in_symtab_p (base_decl
))
6159 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6162 SET_DECL_ALIGN (base_decl
, align_base_to
);
6163 DECL_USER_ALIGN (base_decl
) = 1;
6165 DR_VECT_AUX (dr
)->base_misaligned
= false;
6170 /* Function get_group_alias_ptr_type.
6172 Return the alias type for the group starting at FIRST_STMT. */
6175 get_group_alias_ptr_type (gimple
*first_stmt
)
6177 struct data_reference
*first_dr
, *next_dr
;
6180 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6181 next_stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt
));
6184 next_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt
));
6185 if (get_alias_set (DR_REF (first_dr
))
6186 != get_alias_set (DR_REF (next_dr
)))
6188 if (dump_enabled_p ())
6189 dump_printf_loc (MSG_NOTE
, vect_location
,
6190 "conflicting alias set types.\n");
6191 return ptr_type_node
;
6193 next_stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6195 return reference_alias_ptr_type (DR_REF (first_dr
));
6199 /* Function vectorizable_store.
6201 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6203 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6204 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6205 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6208 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6209 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
6213 tree vec_oprnd
= NULL_TREE
;
6214 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6215 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6217 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6218 struct loop
*loop
= NULL
;
6219 machine_mode vec_mode
;
6221 enum dr_alignment_support alignment_support_scheme
;
6222 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
6223 enum vect_def_type mask_dt
= vect_unknown_def_type
;
6224 stmt_vec_info prev_stmt_info
= NULL
;
6225 tree dataref_ptr
= NULL_TREE
;
6226 tree dataref_offset
= NULL_TREE
;
6227 gimple
*ptr_incr
= NULL
;
6230 gimple
*next_stmt
, *first_stmt
;
6232 unsigned int group_size
, i
;
6233 vec
<tree
> oprnds
= vNULL
;
6234 vec
<tree
> result_chain
= vNULL
;
6236 tree offset
= NULL_TREE
;
6237 vec
<tree
> vec_oprnds
= vNULL
;
6238 bool slp
= (slp_node
!= NULL
);
6239 unsigned int vec_num
;
6240 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6241 vec_info
*vinfo
= stmt_info
->vinfo
;
6243 gather_scatter_info gs_info
;
6246 vec_load_store_type vls_type
;
6249 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6252 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6256 /* Is vectorizable store? */
6258 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
6259 if (is_gimple_assign (stmt
))
6261 tree scalar_dest
= gimple_assign_lhs (stmt
);
6262 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
6263 && is_pattern_stmt_p (stmt_info
))
6264 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
6265 if (TREE_CODE (scalar_dest
) != ARRAY_REF
6266 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
6267 && TREE_CODE (scalar_dest
) != INDIRECT_REF
6268 && TREE_CODE (scalar_dest
) != COMPONENT_REF
6269 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
6270 && TREE_CODE (scalar_dest
) != REALPART_EXPR
6271 && TREE_CODE (scalar_dest
) != MEM_REF
)
6276 gcall
*call
= dyn_cast
<gcall
*> (stmt
);
6277 if (!call
|| !gimple_call_internal_p (call
))
6280 internal_fn ifn
= gimple_call_internal_fn (call
);
6281 if (!internal_store_fn_p (ifn
))
6284 if (slp_node
!= NULL
)
6286 if (dump_enabled_p ())
6287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6288 "SLP of masked stores not supported.\n");
6292 int mask_index
= internal_fn_mask_index (ifn
);
6293 if (mask_index
>= 0)
6295 mask
= gimple_call_arg (call
, mask_index
);
6296 if (!vect_check_load_store_mask (stmt
, mask
, &mask_dt
,
6302 op
= vect_get_store_rhs (stmt
);
6304 /* Cannot have hybrid store SLP -- that would mean storing to the
6305 same location twice. */
6306 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
6308 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
6309 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6313 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6314 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6319 /* Multiple types in SLP are handled by creating the appropriate number of
6320 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6325 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6327 gcc_assert (ncopies
>= 1);
6329 /* FORNOW. This restriction should be relaxed. */
6330 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
6332 if (dump_enabled_p ())
6333 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6334 "multiple types in nested loop.\n");
6338 if (!vect_check_store_rhs (stmt
, op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
6341 elem_type
= TREE_TYPE (vectype
);
6342 vec_mode
= TYPE_MODE (vectype
);
6344 if (!STMT_VINFO_DATA_REF (stmt_info
))
6347 vect_memory_access_type memory_access_type
;
6348 if (!get_load_store_type (stmt
, vectype
, slp
, mask
, vls_type
, ncopies
,
6349 &memory_access_type
, &gs_info
))
6354 if (memory_access_type
== VMAT_CONTIGUOUS
)
6356 if (!VECTOR_MODE_P (vec_mode
)
6357 || !can_vec_mask_load_store_p (vec_mode
,
6358 TYPE_MODE (mask_vectype
), false))
6361 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
6362 && (memory_access_type
!= VMAT_GATHER_SCATTER
|| gs_info
.decl
))
6364 if (dump_enabled_p ())
6365 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6366 "unsupported access type for masked store.\n");
6372 /* FORNOW. In some cases can vectorize even if data-type not supported
6373 (e.g. - array initialization with 0). */
6374 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
6378 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6379 && memory_access_type
!= VMAT_GATHER_SCATTER
6380 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
6383 first_stmt
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
6384 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6385 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6391 group_size
= vec_num
= 1;
6394 if (!vec_stmt
) /* transformation not required. */
6396 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
6399 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
6400 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
6401 memory_access_type
, &gs_info
);
6403 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
6404 vect_model_store_cost (stmt_info
, ncopies
, rhs_dt
, memory_access_type
,
6405 vls_type
, slp_node
, cost_vec
);
6408 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
6412 ensure_base_align (dr
);
6414 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
6416 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
6417 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
6418 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6419 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
6420 edge pe
= loop_preheader_edge (loop
);
6423 enum { NARROW
, NONE
, WIDEN
} modifier
;
6424 poly_uint64 scatter_off_nunits
6425 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
6427 if (known_eq (nunits
, scatter_off_nunits
))
6429 else if (known_eq (nunits
* 2, scatter_off_nunits
))
6433 /* Currently gathers and scatters are only supported for
6434 fixed-length vectors. */
6435 unsigned int count
= scatter_off_nunits
.to_constant ();
6436 vec_perm_builder
sel (count
, count
, 1);
6437 for (i
= 0; i
< (unsigned int) count
; ++i
)
6438 sel
.quick_push (i
| (count
/ 2));
6440 vec_perm_indices
indices (sel
, 1, count
);
6441 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
6443 gcc_assert (perm_mask
!= NULL_TREE
);
6445 else if (known_eq (nunits
, scatter_off_nunits
* 2))
6449 /* Currently gathers and scatters are only supported for
6450 fixed-length vectors. */
6451 unsigned int count
= nunits
.to_constant ();
6452 vec_perm_builder
sel (count
, count
, 1);
6453 for (i
= 0; i
< (unsigned int) count
; ++i
)
6454 sel
.quick_push (i
| (count
/ 2));
6456 vec_perm_indices
indices (sel
, 2, count
);
6457 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
6458 gcc_assert (perm_mask
!= NULL_TREE
);
6464 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6465 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6466 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6467 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6468 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6469 scaletype
= TREE_VALUE (arglist
);
6471 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
6472 && TREE_CODE (rettype
) == VOID_TYPE
);
6474 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6475 if (!is_gimple_min_invariant (ptr
))
6477 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6478 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6479 gcc_assert (!new_bb
);
6482 /* Currently we support only unconditional scatter stores,
6483 so mask should be all ones. */
6484 mask
= build_int_cst (masktype
, -1);
6485 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6487 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6489 prev_stmt_info
= NULL
;
6490 for (j
= 0; j
< ncopies
; ++j
)
6495 = vect_get_vec_def_for_operand (op
, stmt
);
6497 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt
);
6499 else if (modifier
!= NONE
&& (j
& 1))
6501 if (modifier
== WIDEN
)
6504 = vect_get_vec_def_for_stmt_copy (rhs_dt
, vec_oprnd1
);
6505 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
6508 else if (modifier
== NARROW
)
6510 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
6513 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
6522 = vect_get_vec_def_for_stmt_copy (rhs_dt
, vec_oprnd1
);
6524 = vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
6528 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
6530 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
6531 TYPE_VECTOR_SUBPARTS (srctype
)));
6532 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
6533 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
6534 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
6535 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6539 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6541 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
6542 TYPE_VECTOR_SUBPARTS (idxtype
)));
6543 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6544 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6545 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6546 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6551 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask
, op
, src
, scale
);
6553 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6555 if (prev_stmt_info
== NULL
)
6556 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6558 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6559 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6564 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6566 gimple
*group_stmt
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
6567 DR_GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt
))++;
6573 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
6575 /* We vectorize all the stmts of the interleaving group when we
6576 reach the last stmt in the group. */
6577 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
6578 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
))
6587 grouped_store
= false;
6588 /* VEC_NUM is the number of vect stmts to be created for this
6590 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6591 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6592 gcc_assert (DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
6593 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6594 op
= vect_get_store_rhs (first_stmt
);
6597 /* VEC_NUM is the number of vect stmts to be created for this
6599 vec_num
= group_size
;
6601 ref_type
= get_group_alias_ptr_type (first_stmt
);
6604 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
6606 if (dump_enabled_p ())
6607 dump_printf_loc (MSG_NOTE
, vect_location
,
6608 "transform store. ncopies = %d\n", ncopies
);
6610 if (memory_access_type
== VMAT_ELEMENTWISE
6611 || memory_access_type
== VMAT_STRIDED_SLP
)
6613 gimple_stmt_iterator incr_gsi
;
6619 tree stride_base
, stride_step
, alias_off
;
6622 /* Checked by get_load_store_type. */
6623 unsigned int const_nunits
= nunits
.to_constant ();
6625 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
6626 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
6629 = fold_build_pointer_plus
6630 (DR_BASE_ADDRESS (first_dr
),
6631 size_binop (PLUS_EXPR
,
6632 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
6633 convert_to_ptrofftype (DR_INIT (first_dr
))));
6634 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
6636 /* For a store with loop-invariant (but other than power-of-2)
6637 stride (i.e. not a grouped access) like so:
6639 for (i = 0; i < n; i += stride)
6642 we generate a new induction variable and new stores from
6643 the components of the (vectorized) rhs:
6645 for (j = 0; ; j += VF*stride)
6650 array[j + stride] = tmp2;
6654 unsigned nstores
= const_nunits
;
6656 tree ltype
= elem_type
;
6657 tree lvectype
= vectype
;
6660 if (group_size
< const_nunits
6661 && const_nunits
% group_size
== 0)
6663 nstores
= const_nunits
/ group_size
;
6665 ltype
= build_vector_type (elem_type
, group_size
);
6668 /* First check if vec_extract optab doesn't support extraction
6669 of vector elts directly. */
6670 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
6672 if (!mode_for_vector (elmode
, group_size
).exists (&vmode
)
6673 || !VECTOR_MODE_P (vmode
)
6674 || !targetm
.vector_mode_supported_p (vmode
)
6675 || (convert_optab_handler (vec_extract_optab
,
6676 TYPE_MODE (vectype
), vmode
)
6677 == CODE_FOR_nothing
))
6679 /* Try to avoid emitting an extract of vector elements
6680 by performing the extracts using an integer type of the
6681 same size, extracting from a vector of those and then
6682 re-interpreting it as the original vector type if
6685 = group_size
* GET_MODE_BITSIZE (elmode
);
6686 elmode
= int_mode_for_size (lsize
, 0).require ();
6687 unsigned int lnunits
= const_nunits
/ group_size
;
6688 /* If we can't construct such a vector fall back to
6689 element extracts from the original vector type and
6690 element size stores. */
6691 if (mode_for_vector (elmode
, lnunits
).exists (&vmode
)
6692 && VECTOR_MODE_P (vmode
)
6693 && targetm
.vector_mode_supported_p (vmode
)
6694 && (convert_optab_handler (vec_extract_optab
,
6696 != CODE_FOR_nothing
))
6700 ltype
= build_nonstandard_integer_type (lsize
, 1);
6701 lvectype
= build_vector_type (ltype
, nstores
);
6703 /* Else fall back to vector extraction anyway.
6704 Fewer stores are more important than avoiding spilling
6705 of the vector we extract from. Compared to the
6706 construction case in vectorizable_load no store-forwarding
6707 issue exists here for reasonable archs. */
6710 else if (group_size
>= const_nunits
6711 && group_size
% const_nunits
== 0)
6714 lnel
= const_nunits
;
6718 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
6719 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6722 ivstep
= stride_step
;
6723 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6724 build_int_cst (TREE_TYPE (ivstep
), vf
));
6726 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6728 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
6729 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
6730 create_iv (stride_base
, ivstep
, NULL
,
6731 loop
, &incr_gsi
, insert_after
,
6733 incr
= gsi_stmt (incr_gsi
);
6734 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6736 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
6738 prev_stmt_info
= NULL
;
6739 alias_off
= build_int_cst (ref_type
, 0);
6740 next_stmt
= first_stmt
;
6741 for (g
= 0; g
< group_size
; g
++)
6743 running_off
= offvar
;
6746 tree size
= TYPE_SIZE_UNIT (ltype
);
6747 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
6749 tree newoff
= copy_ssa_name (running_off
, NULL
);
6750 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6752 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6753 running_off
= newoff
;
6755 unsigned int group_el
= 0;
6756 unsigned HOST_WIDE_INT
6757 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6758 for (j
= 0; j
< ncopies
; j
++)
6760 /* We've set op and dt above, from vect_get_store_rhs,
6761 and first_stmt == stmt. */
6766 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
6768 vec_oprnd
= vec_oprnds
[0];
6772 op
= vect_get_store_rhs (next_stmt
);
6773 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6779 vec_oprnd
= vec_oprnds
[j
];
6782 vect_is_simple_use (op
, vinfo
, &rhs_dt
);
6783 vec_oprnd
= vect_get_vec_def_for_stmt_copy (rhs_dt
,
6787 /* Pun the vector to extract from if necessary. */
6788 if (lvectype
!= vectype
)
6790 tree tem
= make_ssa_name (lvectype
);
6792 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
6793 lvectype
, vec_oprnd
));
6794 vect_finish_stmt_generation (stmt
, pun
, gsi
);
6797 for (i
= 0; i
< nstores
; i
++)
6799 tree newref
, newoff
;
6800 gimple
*incr
, *assign
;
6801 tree size
= TYPE_SIZE (ltype
);
6802 /* Extract the i'th component. */
6803 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
6804 bitsize_int (i
), size
);
6805 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
6808 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
6812 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6814 newref
= build2 (MEM_REF
, ltype
,
6815 running_off
, this_off
);
6816 vect_copy_ref_info (newref
, DR_REF (first_dr
));
6818 /* And store it to *running_off. */
6819 assign
= gimple_build_assign (newref
, elem
);
6820 vect_finish_stmt_generation (stmt
, assign
, gsi
);
6824 || group_el
== group_size
)
6826 newoff
= copy_ssa_name (running_off
, NULL
);
6827 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6828 running_off
, stride_step
);
6829 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6831 running_off
= newoff
;
6834 if (g
== group_size
- 1
6837 if (j
== 0 && i
== 0)
6838 STMT_VINFO_VEC_STMT (stmt_info
)
6839 = *vec_stmt
= assign
;
6841 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
6842 prev_stmt_info
= vinfo_for_stmt (assign
);
6846 next_stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6851 vec_oprnds
.release ();
6855 auto_vec
<tree
> dr_chain (group_size
);
6856 oprnds
.create (group_size
);
6858 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6859 gcc_assert (alignment_support_scheme
);
6860 vec_loop_masks
*loop_masks
6861 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6862 ? &LOOP_VINFO_MASKS (loop_vinfo
)
6864 /* Targets with store-lane instructions must not require explicit
6865 realignment. vect_supportable_dr_alignment always returns either
6866 dr_aligned or dr_unaligned_supported for masked operations. */
6867 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
6870 || alignment_support_scheme
== dr_aligned
6871 || alignment_support_scheme
== dr_unaligned_supported
);
6873 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
6874 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6875 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6878 tree vec_offset
= NULL_TREE
;
6879 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6881 aggr_type
= NULL_TREE
;
6884 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
6886 aggr_type
= elem_type
;
6887 vect_get_strided_load_store_ops (stmt
, loop_vinfo
, &gs_info
,
6888 &bump
, &vec_offset
);
6892 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6893 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6895 aggr_type
= vectype
;
6896 bump
= vect_get_data_ptr_increment (dr
, aggr_type
, memory_access_type
);
6900 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
6902 /* In case the vectorization factor (VF) is bigger than the number
6903 of elements that we can fit in a vectype (nunits), we have to generate
6904 more than one vector stmt - i.e - we need to "unroll" the
6905 vector stmt by a factor VF/nunits. For more details see documentation in
6906 vect_get_vec_def_for_copy_stmt. */
6908 /* In case of interleaving (non-unit grouped access):
6915 We create vectorized stores starting from base address (the access of the
6916 first stmt in the chain (S2 in the above example), when the last store stmt
6917 of the chain (S4) is reached:
6920 VS2: &base + vec_size*1 = vx0
6921 VS3: &base + vec_size*2 = vx1
6922 VS4: &base + vec_size*3 = vx3
6924 Then permutation statements are generated:
6926 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6927 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6930 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6931 (the order of the data-refs in the output of vect_permute_store_chain
6932 corresponds to the order of scalar stmts in the interleaving chain - see
6933 the documentation of vect_permute_store_chain()).
6935 In case of both multiple types and interleaving, above vector stores and
6936 permutation stmts are created for every copy. The result vector stmts are
6937 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6938 STMT_VINFO_RELATED_STMT for the next copies.
6941 prev_stmt_info
= NULL
;
6942 tree vec_mask
= NULL_TREE
;
6943 for (j
= 0; j
< ncopies
; j
++)
6950 /* Get vectorized arguments for SLP_NODE. */
6951 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
6954 vec_oprnd
= vec_oprnds
[0];
6958 /* For interleaved stores we collect vectorized defs for all the
6959 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6960 used as an input to vect_permute_store_chain(), and OPRNDS as
6961 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6963 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6964 OPRNDS are of size 1. */
6965 next_stmt
= first_stmt
;
6966 for (i
= 0; i
< group_size
; i
++)
6968 /* Since gaps are not supported for interleaved stores,
6969 DR_GROUP_SIZE is the exact number of stmts in the chain.
6970 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6971 there is no interleaving, DR_GROUP_SIZE is 1, and only one
6972 iteration of the loop will be executed. */
6973 op
= vect_get_store_rhs (next_stmt
);
6974 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
6975 dr_chain
.quick_push (vec_oprnd
);
6976 oprnds
.quick_push (vec_oprnd
);
6977 next_stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6980 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
6984 /* We should have catched mismatched types earlier. */
6985 gcc_assert (useless_type_conversion_p (vectype
,
6986 TREE_TYPE (vec_oprnd
)));
6987 bool simd_lane_access_p
6988 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6989 if (simd_lane_access_p
6990 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6991 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6992 && integer_zerop (DR_OFFSET (first_dr
))
6993 && integer_zerop (DR_INIT (first_dr
))
6994 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6995 get_alias_set (TREE_TYPE (ref_type
))))
6997 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6998 dataref_offset
= build_int_cst (ref_type
, 0);
7001 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7003 vect_get_gather_scatter_ops (loop
, stmt
, &gs_info
,
7004 &dataref_ptr
, &vec_offset
);
7009 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
7010 simd_lane_access_p
? loop
: NULL
,
7011 offset
, &dummy
, gsi
, &ptr_incr
,
7012 simd_lane_access_p
, &inv_p
,
7014 gcc_assert (bb_vinfo
|| !inv_p
);
7018 /* For interleaved stores we created vectorized defs for all the
7019 defs stored in OPRNDS in the previous iteration (previous copy).
7020 DR_CHAIN is then used as an input to vect_permute_store_chain(),
7021 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7023 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7024 OPRNDS are of size 1. */
7025 for (i
= 0; i
< group_size
; i
++)
7028 vect_is_simple_use (op
, vinfo
, &rhs_dt
);
7029 vec_oprnd
= vect_get_vec_def_for_stmt_copy (rhs_dt
, op
);
7030 dr_chain
[i
] = vec_oprnd
;
7031 oprnds
[i
] = vec_oprnd
;
7034 vec_mask
= vect_get_vec_def_for_stmt_copy (mask_dt
, vec_mask
);
7037 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7038 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7039 vec_offset
= vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
7042 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7046 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7050 /* Get an array into which we can store the individual vectors. */
7051 vec_array
= create_vector_array (vectype
, vec_num
);
7053 /* Invalidate the current contents of VEC_ARRAY. This should
7054 become an RTL clobber too, which prevents the vector registers
7055 from being upward-exposed. */
7056 vect_clobber_variable (stmt
, gsi
, vec_array
);
7058 /* Store the individual vectors into the array. */
7059 for (i
= 0; i
< vec_num
; i
++)
7061 vec_oprnd
= dr_chain
[i
];
7062 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
7065 tree final_mask
= NULL
;
7067 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
7070 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7077 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7079 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
7080 tree alias_ptr
= build_int_cst (ref_type
, align
);
7081 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
7082 dataref_ptr
, alias_ptr
,
7083 final_mask
, vec_array
);
7088 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7089 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7090 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
7092 gimple_call_set_lhs (call
, data_ref
);
7094 gimple_call_set_nothrow (call
, true);
7096 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7098 /* Record that VEC_ARRAY is now dead. */
7099 vect_clobber_variable (stmt
, gsi
, vec_array
);
7107 result_chain
.create (group_size
);
7109 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
7113 next_stmt
= first_stmt
;
7114 for (i
= 0; i
< vec_num
; i
++)
7116 unsigned align
, misalign
;
7118 tree final_mask
= NULL_TREE
;
7120 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
7122 vectype
, vec_num
* j
+ i
);
7124 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7127 if (memory_access_type
== VMAT_GATHER_SCATTER
)
7129 tree scale
= size_int (gs_info
.scale
);
7132 call
= gimple_build_call_internal
7133 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
7134 scale
, vec_oprnd
, final_mask
);
7136 call
= gimple_build_call_internal
7137 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
7139 gimple_call_set_nothrow (call
, true);
7141 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7146 /* Bump the vector pointer. */
7147 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7151 vec_oprnd
= vec_oprnds
[i
];
7152 else if (grouped_store
)
7153 /* For grouped stores vectorized defs are interleaved in
7154 vect_permute_store_chain(). */
7155 vec_oprnd
= result_chain
[i
];
7157 align
= DR_TARGET_ALIGNMENT (first_dr
);
7158 if (aligned_access_p (first_dr
))
7160 else if (DR_MISALIGNMENT (first_dr
) == -1)
7162 align
= dr_alignment (vect_dr_behavior (first_dr
));
7166 misalign
= DR_MISALIGNMENT (first_dr
);
7167 if (dataref_offset
== NULL_TREE
7168 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7169 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
7172 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7174 tree perm_mask
= perm_mask_for_reverse (vectype
);
7176 = vect_create_destination_var (vect_get_store_rhs (stmt
),
7178 tree new_temp
= make_ssa_name (perm_dest
);
7180 /* Generate the permute statement. */
7182 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
7183 vec_oprnd
, perm_mask
);
7184 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
7186 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7187 vec_oprnd
= new_temp
;
7190 /* Arguments are ready. Create the new vector stmt. */
7193 align
= least_bit_hwi (misalign
| align
);
7194 tree ptr
= build_int_cst (ref_type
, align
);
7196 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
7198 final_mask
, vec_oprnd
);
7199 gimple_call_set_nothrow (call
, true);
7204 data_ref
= fold_build2 (MEM_REF
, vectype
,
7208 : build_int_cst (ref_type
, 0));
7209 if (aligned_access_p (first_dr
))
7211 else if (DR_MISALIGNMENT (first_dr
) == -1)
7212 TREE_TYPE (data_ref
)
7213 = build_aligned_type (TREE_TYPE (data_ref
),
7214 align
* BITS_PER_UNIT
);
7216 TREE_TYPE (data_ref
)
7217 = build_aligned_type (TREE_TYPE (data_ref
),
7218 TYPE_ALIGN (elem_type
));
7219 vect_copy_ref_info (data_ref
, DR_REF (first_dr
));
7220 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
7222 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7227 next_stmt
= DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
7235 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7237 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7238 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7243 result_chain
.release ();
7244 vec_oprnds
.release ();
7249 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7250 VECTOR_CST mask. No checks are made that the target platform supports the
7251 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7252 vect_gen_perm_mask_checked. */
7255 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
7259 poly_uint64 nunits
= sel
.length ();
7260 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
7262 mask_type
= build_vector_type (ssizetype
, nunits
);
7263 return vec_perm_indices_to_tree (mask_type
, sel
);
7266 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7267 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7270 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
7272 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
7273 return vect_gen_perm_mask_any (vectype
, sel
);
7276 /* Given a vector variable X and Y, that was generated for the scalar
7277 STMT, generate instructions to permute the vector elements of X and Y
7278 using permutation mask MASK_VEC, insert them at *GSI and return the
7279 permuted vector variable. */
7282 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
7283 gimple_stmt_iterator
*gsi
)
7285 tree vectype
= TREE_TYPE (x
);
7286 tree perm_dest
, data_ref
;
7289 tree scalar_dest
= gimple_get_lhs (stmt
);
7290 if (TREE_CODE (scalar_dest
) == SSA_NAME
)
7291 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7293 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
7294 data_ref
= make_ssa_name (perm_dest
);
7296 /* Generate the permute statement. */
7297 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
7298 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
7303 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7304 inserting them on the loops preheader edge. Returns true if we
7305 were successful in doing so (and thus STMT can be moved then),
7306 otherwise returns false. */
7309 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
7315 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
7317 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
7318 if (!gimple_nop_p (def_stmt
)
7319 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
7321 /* Make sure we don't need to recurse. While we could do
7322 so in simple cases when there are more complex use webs
7323 we don't have an easy way to preserve stmt order to fulfil
7324 dependencies within them. */
7327 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
7329 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
7331 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
7332 if (!gimple_nop_p (def_stmt2
)
7333 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
7343 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
7345 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
7346 if (!gimple_nop_p (def_stmt
)
7347 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
7349 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
7350 gsi_remove (&gsi
, false);
7351 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
7358 /* vectorizable_load.
7360 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7362 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7363 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7364 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7367 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
7368 slp_tree slp_node
, slp_instance slp_node_instance
,
7369 stmt_vector_for_cost
*cost_vec
)
7372 tree vec_dest
= NULL
;
7373 tree data_ref
= NULL
;
7374 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7375 stmt_vec_info prev_stmt_info
;
7376 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7377 struct loop
*loop
= NULL
;
7378 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
7379 bool nested_in_vect_loop
= false;
7380 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
7384 gimple
*new_stmt
= NULL
;
7386 enum dr_alignment_support alignment_support_scheme
;
7387 tree dataref_ptr
= NULL_TREE
;
7388 tree dataref_offset
= NULL_TREE
;
7389 gimple
*ptr_incr
= NULL
;
7392 unsigned int group_size
;
7393 poly_uint64 group_gap_adj
;
7394 tree msq
= NULL_TREE
, lsq
;
7395 tree offset
= NULL_TREE
;
7396 tree byte_offset
= NULL_TREE
;
7397 tree realignment_token
= NULL_TREE
;
7399 vec
<tree
> dr_chain
= vNULL
;
7400 bool grouped_load
= false;
7402 gimple
*first_stmt_for_drptr
= NULL
;
7404 bool compute_in_loop
= false;
7405 struct loop
*at_loop
;
7407 bool slp
= (slp_node
!= NULL
);
7408 bool slp_perm
= false;
7409 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7412 gather_scatter_info gs_info
;
7413 vec_info
*vinfo
= stmt_info
->vinfo
;
7415 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7417 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7420 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7424 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7425 if (is_gimple_assign (stmt
))
7427 scalar_dest
= gimple_assign_lhs (stmt
);
7428 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
7431 tree_code code
= gimple_assign_rhs_code (stmt
);
7432 if (code
!= ARRAY_REF
7433 && code
!= BIT_FIELD_REF
7434 && code
!= INDIRECT_REF
7435 && code
!= COMPONENT_REF
7436 && code
!= IMAGPART_EXPR
7437 && code
!= REALPART_EXPR
7439 && TREE_CODE_CLASS (code
) != tcc_declaration
)
7444 gcall
*call
= dyn_cast
<gcall
*> (stmt
);
7445 if (!call
|| !gimple_call_internal_p (call
))
7448 internal_fn ifn
= gimple_call_internal_fn (call
);
7449 if (!internal_load_fn_p (ifn
))
7452 scalar_dest
= gimple_call_lhs (call
);
7456 if (slp_node
!= NULL
)
7458 if (dump_enabled_p ())
7459 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7460 "SLP of masked loads not supported.\n");
7464 int mask_index
= internal_fn_mask_index (ifn
);
7465 if (mask_index
>= 0)
7467 mask
= gimple_call_arg (call
, mask_index
);
7468 if (!vect_check_load_store_mask (stmt
, mask
, &mask_dt
,
7474 if (!STMT_VINFO_DATA_REF (stmt_info
))
7477 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7478 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7482 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7483 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
7484 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7489 /* Multiple types in SLP are handled by creating the appropriate number of
7490 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7495 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7497 gcc_assert (ncopies
>= 1);
7499 /* FORNOW. This restriction should be relaxed. */
7500 if (nested_in_vect_loop
&& ncopies
> 1)
7502 if (dump_enabled_p ())
7503 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7504 "multiple types in nested loop.\n");
7508 /* Invalidate assumptions made by dependence analysis when vectorization
7509 on the unrolled body effectively re-orders stmts. */
7511 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
7512 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
7513 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
7515 if (dump_enabled_p ())
7516 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7517 "cannot perform implicit CSE when unrolling "
7518 "with negative dependence distance\n");
7522 elem_type
= TREE_TYPE (vectype
);
7523 mode
= TYPE_MODE (vectype
);
7525 /* FORNOW. In some cases can vectorize even if data-type not supported
7526 (e.g. - data copies). */
7527 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
7529 if (dump_enabled_p ())
7530 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7531 "Aligned load, but unsupported type.\n");
7535 /* Check if the load is a part of an interleaving chain. */
7536 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7538 grouped_load
= true;
7540 gcc_assert (!nested_in_vect_loop
);
7541 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
7543 first_stmt
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7544 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7546 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7549 /* Invalidate assumptions made by dependence analysis when vectorization
7550 on the unrolled body effectively re-orders stmts. */
7551 if (!PURE_SLP_STMT (stmt_info
)
7552 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
7553 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
7554 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
7556 if (dump_enabled_p ())
7557 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7558 "cannot perform implicit CSE when performing "
7559 "group loads with negative dependence distance\n");
7563 /* Similarly when the stmt is a load that is both part of a SLP
7564 instance and a loop vectorized stmt via the same-dr mechanism
7565 we have to give up. */
7566 if (DR_GROUP_SAME_DR_STMT (stmt_info
)
7567 && (STMT_SLP_TYPE (stmt_info
)
7568 != STMT_SLP_TYPE (vinfo_for_stmt
7569 (DR_GROUP_SAME_DR_STMT (stmt_info
)))))
7571 if (dump_enabled_p ())
7572 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7573 "conflicting SLP types for CSEd load\n");
7580 vect_memory_access_type memory_access_type
;
7581 if (!get_load_store_type (stmt
, vectype
, slp
, mask
, VLS_LOAD
, ncopies
,
7582 &memory_access_type
, &gs_info
))
7587 if (memory_access_type
== VMAT_CONTIGUOUS
)
7589 machine_mode vec_mode
= TYPE_MODE (vectype
);
7590 if (!VECTOR_MODE_P (vec_mode
)
7591 || !can_vec_mask_load_store_p (vec_mode
,
7592 TYPE_MODE (mask_vectype
), true))
7595 else if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7597 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7599 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
7600 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
7602 if (dump_enabled_p ())
7603 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7604 "masked gather with integer mask not"
7609 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7610 && memory_access_type
!= VMAT_GATHER_SCATTER
)
7612 if (dump_enabled_p ())
7613 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7614 "unsupported access type for masked load.\n");
7619 if (!vec_stmt
) /* transformation not required. */
7622 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7625 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7626 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
7627 memory_access_type
, &gs_info
);
7629 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
7630 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
7631 slp_node_instance
, slp_node
, cost_vec
);
7636 gcc_assert (memory_access_type
7637 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7639 if (dump_enabled_p ())
7640 dump_printf_loc (MSG_NOTE
, vect_location
,
7641 "transform load. ncopies = %d\n", ncopies
);
7645 ensure_base_align (dr
);
7647 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7649 vect_build_gather_load_calls (stmt
, gsi
, vec_stmt
, &gs_info
, mask
,
7654 if (memory_access_type
== VMAT_ELEMENTWISE
7655 || memory_access_type
== VMAT_STRIDED_SLP
)
7657 gimple_stmt_iterator incr_gsi
;
7663 vec
<constructor_elt
, va_gc
> *v
= NULL
;
7664 tree stride_base
, stride_step
, alias_off
;
7665 /* Checked by get_load_store_type. */
7666 unsigned int const_nunits
= nunits
.to_constant ();
7667 unsigned HOST_WIDE_INT cst_offset
= 0;
7669 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7670 gcc_assert (!nested_in_vect_loop
);
7674 first_stmt
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7675 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7682 if (slp
&& grouped_load
)
7684 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7685 ref_type
= get_group_alias_ptr_type (first_stmt
);
7691 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
7692 * vect_get_place_in_interleaving_chain (stmt
, first_stmt
));
7694 ref_type
= reference_alias_ptr_type (DR_REF (dr
));
7698 = fold_build_pointer_plus
7699 (DR_BASE_ADDRESS (first_dr
),
7700 size_binop (PLUS_EXPR
,
7701 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
7702 convert_to_ptrofftype (DR_INIT (first_dr
))));
7703 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
7705 /* For a load with loop-invariant (but other than power-of-2)
7706 stride (i.e. not a grouped access) like so:
7708 for (i = 0; i < n; i += stride)
7711 we generate a new induction variable and new accesses to
7712 form a new vector (or vectors, depending on ncopies):
7714 for (j = 0; ; j += VF*stride)
7716 tmp2 = array[j + stride];
7718 vectemp = {tmp1, tmp2, ...}
7721 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
7722 build_int_cst (TREE_TYPE (stride_step
), vf
));
7724 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7726 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7727 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7728 create_iv (stride_base
, ivstep
, NULL
,
7729 loop
, &incr_gsi
, insert_after
,
7731 incr
= gsi_stmt (incr_gsi
);
7732 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
7734 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7736 prev_stmt_info
= NULL
;
7737 running_off
= offvar
;
7738 alias_off
= build_int_cst (ref_type
, 0);
7739 int nloads
= const_nunits
;
7741 tree ltype
= TREE_TYPE (vectype
);
7742 tree lvectype
= vectype
;
7743 auto_vec
<tree
> dr_chain
;
7744 if (memory_access_type
== VMAT_STRIDED_SLP
)
7746 if (group_size
< const_nunits
)
7748 /* First check if vec_init optab supports construction from
7749 vector elts directly. */
7750 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
7752 if (mode_for_vector (elmode
, group_size
).exists (&vmode
)
7753 && VECTOR_MODE_P (vmode
)
7754 && targetm
.vector_mode_supported_p (vmode
)
7755 && (convert_optab_handler (vec_init_optab
,
7756 TYPE_MODE (vectype
), vmode
)
7757 != CODE_FOR_nothing
))
7759 nloads
= const_nunits
/ group_size
;
7761 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
7765 /* Otherwise avoid emitting a constructor of vector elements
7766 by performing the loads using an integer type of the same
7767 size, constructing a vector of those and then
7768 re-interpreting it as the original vector type.
7769 This avoids a huge runtime penalty due to the general
7770 inability to perform store forwarding from smaller stores
7771 to a larger load. */
7773 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
7774 elmode
= int_mode_for_size (lsize
, 0).require ();
7775 unsigned int lnunits
= const_nunits
/ group_size
;
7776 /* If we can't construct such a vector fall back to
7777 element loads of the original vector type. */
7778 if (mode_for_vector (elmode
, lnunits
).exists (&vmode
)
7779 && VECTOR_MODE_P (vmode
)
7780 && targetm
.vector_mode_supported_p (vmode
)
7781 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
7782 != CODE_FOR_nothing
))
7786 ltype
= build_nonstandard_integer_type (lsize
, 1);
7787 lvectype
= build_vector_type (ltype
, nloads
);
7794 lnel
= const_nunits
;
7797 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
7799 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7800 else if (nloads
== 1)
7805 /* For SLP permutation support we need to load the whole group,
7806 not only the number of vector stmts the permutation result
7810 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7812 unsigned int const_vf
= vf
.to_constant ();
7813 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
7814 dr_chain
.create (ncopies
);
7817 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7819 unsigned int group_el
= 0;
7820 unsigned HOST_WIDE_INT
7821 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7822 for (j
= 0; j
< ncopies
; j
++)
7825 vec_alloc (v
, nloads
);
7826 for (i
= 0; i
< nloads
; i
++)
7828 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7829 group_el
* elsz
+ cst_offset
);
7830 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
7831 vect_copy_ref_info (data_ref
, DR_REF (first_dr
));
7832 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
7833 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7835 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
7836 gimple_assign_lhs (new_stmt
));
7840 || group_el
== group_size
)
7842 tree newoff
= copy_ssa_name (running_off
);
7843 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7844 running_off
, stride_step
);
7845 vect_finish_stmt_generation (stmt
, incr
, gsi
);
7847 running_off
= newoff
;
7853 tree vec_inv
= build_constructor (lvectype
, v
);
7854 new_temp
= vect_init_vector (stmt
, vec_inv
, lvectype
, gsi
);
7855 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7856 if (lvectype
!= vectype
)
7858 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
7860 build1 (VIEW_CONVERT_EXPR
,
7861 vectype
, new_temp
));
7862 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7869 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
7871 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7876 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7878 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7879 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7885 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7886 slp_node_instance
, false, &n_perms
);
7891 if (memory_access_type
== VMAT_GATHER_SCATTER
7892 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
7893 grouped_load
= false;
7897 first_stmt
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7898 group_size
= DR_GROUP_SIZE (vinfo_for_stmt (first_stmt
));
7899 /* For SLP vectorization we directly vectorize a subchain
7900 without permutation. */
7901 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7902 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7903 /* For BB vectorization always use the first stmt to base
7904 the data ref pointer on. */
7906 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7908 /* Check if the chain of loads is already vectorized. */
7909 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
7910 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7911 ??? But we can only do so if there is exactly one
7912 as we have no way to get at the rest. Leave the CSE
7914 ??? With the group load eventually participating
7915 in multiple different permutations (having multiple
7916 slp nodes which refer to the same group) the CSE
7917 is even wrong code. See PR56270. */
7920 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7923 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
7926 /* VEC_NUM is the number of vect stmts to be created for this group. */
7929 grouped_load
= false;
7930 /* For SLP permutation support we need to load the whole group,
7931 not only the number of vector stmts the permutation result
7935 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7937 unsigned int const_vf
= vf
.to_constant ();
7938 unsigned int const_nunits
= nunits
.to_constant ();
7939 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
7940 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
7944 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7946 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
7950 vec_num
= group_size
;
7952 ref_type
= get_group_alias_ptr_type (first_stmt
);
7958 group_size
= vec_num
= 1;
7960 ref_type
= reference_alias_ptr_type (DR_REF (first_dr
));
7963 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
7964 gcc_assert (alignment_support_scheme
);
7965 vec_loop_masks
*loop_masks
7966 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7967 ? &LOOP_VINFO_MASKS (loop_vinfo
)
7969 /* Targets with store-lane instructions must not require explicit
7970 realignment. vect_supportable_dr_alignment always returns either
7971 dr_aligned or dr_unaligned_supported for masked operations. */
7972 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
7975 || alignment_support_scheme
== dr_aligned
7976 || alignment_support_scheme
== dr_unaligned_supported
);
7978 /* In case the vectorization factor (VF) is bigger than the number
7979 of elements that we can fit in a vectype (nunits), we have to generate
7980 more than one vector stmt - i.e - we need to "unroll" the
7981 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7982 from one copy of the vector stmt to the next, in the field
7983 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7984 stages to find the correct vector defs to be used when vectorizing
7985 stmts that use the defs of the current stmt. The example below
7986 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7987 need to create 4 vectorized stmts):
7989 before vectorization:
7990 RELATED_STMT VEC_STMT
7994 step 1: vectorize stmt S1:
7995 We first create the vector stmt VS1_0, and, as usual, record a
7996 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7997 Next, we create the vector stmt VS1_1, and record a pointer to
7998 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7999 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8001 RELATED_STMT VEC_STMT
8002 VS1_0: vx0 = memref0 VS1_1 -
8003 VS1_1: vx1 = memref1 VS1_2 -
8004 VS1_2: vx2 = memref2 VS1_3 -
8005 VS1_3: vx3 = memref3 - -
8006 S1: x = load - VS1_0
8009 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8010 information we recorded in RELATED_STMT field is used to vectorize
8013 /* In case of interleaving (non-unit grouped access):
8020 Vectorized loads are created in the order of memory accesses
8021 starting from the access of the first stmt of the chain:
8024 VS2: vx1 = &base + vec_size*1
8025 VS3: vx3 = &base + vec_size*2
8026 VS4: vx4 = &base + vec_size*3
8028 Then permutation statements are generated:
8030 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8031 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8034 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8035 (the order of the data-refs in the output of vect_permute_load_chain
8036 corresponds to the order of scalar stmts in the interleaving chain - see
8037 the documentation of vect_permute_load_chain()).
8038 The generation of permutation stmts and recording them in
8039 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8041 In case of both multiple types and interleaving, the vector loads and
8042 permutation stmts above are created for every copy. The result vector
8043 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8044 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8046 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8047 on a target that supports unaligned accesses (dr_unaligned_supported)
8048 we generate the following code:
8052 p = p + indx * vectype_size;
8057 Otherwise, the data reference is potentially unaligned on a target that
8058 does not support unaligned accesses (dr_explicit_realign_optimized) -
8059 then generate the following code, in which the data in each iteration is
8060 obtained by two vector loads, one from the previous iteration, and one
8061 from the current iteration:
8063 msq_init = *(floor(p1))
8064 p2 = initial_addr + VS - 1;
8065 realignment_token = call target_builtin;
8068 p2 = p2 + indx * vectype_size
8070 vec_dest = realign_load (msq, lsq, realignment_token)
8075 /* If the misalignment remains the same throughout the execution of the
8076 loop, we can create the init_addr and permutation mask at the loop
8077 preheader. Otherwise, it needs to be created inside the loop.
8078 This can only occur when vectorizing memory accesses in the inner-loop
8079 nested within an outer-loop that is being vectorized. */
8081 if (nested_in_vect_loop
8082 && !multiple_p (DR_STEP_ALIGNMENT (dr
),
8083 GET_MODE_SIZE (TYPE_MODE (vectype
))))
8085 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
8086 compute_in_loop
= true;
8089 if ((alignment_support_scheme
== dr_explicit_realign_optimized
8090 || alignment_support_scheme
== dr_explicit_realign
)
8091 && !compute_in_loop
)
8093 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
8094 alignment_support_scheme
, NULL_TREE
,
8096 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
8098 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
8099 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
8106 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8107 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
8110 tree vec_offset
= NULL_TREE
;
8111 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8113 aggr_type
= NULL_TREE
;
8116 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8118 aggr_type
= elem_type
;
8119 vect_get_strided_load_store_ops (stmt
, loop_vinfo
, &gs_info
,
8120 &bump
, &vec_offset
);
8124 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8125 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8127 aggr_type
= vectype
;
8128 bump
= vect_get_data_ptr_increment (dr
, aggr_type
, memory_access_type
);
8131 tree vec_mask
= NULL_TREE
;
8132 prev_stmt_info
= NULL
;
8133 poly_uint64 group_elt
= 0;
8134 for (j
= 0; j
< ncopies
; j
++)
8136 /* 1. Create the vector or array pointer update chain. */
8139 bool simd_lane_access_p
8140 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
8141 if (simd_lane_access_p
8142 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
8143 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
8144 && integer_zerop (DR_OFFSET (first_dr
))
8145 && integer_zerop (DR_INIT (first_dr
))
8146 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8147 get_alias_set (TREE_TYPE (ref_type
)))
8148 && (alignment_support_scheme
== dr_aligned
8149 || alignment_support_scheme
== dr_unaligned_supported
))
8151 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
8152 dataref_offset
= build_int_cst (ref_type
, 0);
8155 else if (first_stmt_for_drptr
8156 && first_stmt
!= first_stmt_for_drptr
)
8159 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
8160 at_loop
, offset
, &dummy
, gsi
,
8161 &ptr_incr
, simd_lane_access_p
,
8162 &inv_p
, byte_offset
, bump
);
8163 /* Adjust the pointer by the difference to first_stmt. */
8164 data_reference_p ptrdr
8165 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
8166 tree diff
= fold_convert (sizetype
,
8167 size_binop (MINUS_EXPR
,
8170 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8173 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8175 vect_get_gather_scatter_ops (loop
, stmt
, &gs_info
,
8176 &dataref_ptr
, &vec_offset
);
8181 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
8182 offset
, &dummy
, gsi
, &ptr_incr
,
8183 simd_lane_access_p
, &inv_p
,
8186 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
,
8192 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
8194 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8195 vec_offset
= vect_get_vec_def_for_stmt_copy (gs_info
.offset_dt
,
8198 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8201 vec_mask
= vect_get_vec_def_for_stmt_copy (mask_dt
, vec_mask
);
8204 if (grouped_load
|| slp_perm
)
8205 dr_chain
.create (vec_num
);
8207 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8211 vec_array
= create_vector_array (vectype
, vec_num
);
8213 tree final_mask
= NULL_TREE
;
8215 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8218 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8225 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8227 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
8228 tree alias_ptr
= build_int_cst (ref_type
, align
);
8229 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
8230 dataref_ptr
, alias_ptr
,
8236 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8237 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8238 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
8240 gimple_call_set_lhs (call
, vec_array
);
8241 gimple_call_set_nothrow (call
, true);
8243 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8245 /* Extract each vector into an SSA_NAME. */
8246 for (i
= 0; i
< vec_num
; i
++)
8248 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
8250 dr_chain
.quick_push (new_temp
);
8253 /* Record the mapping between SSA_NAMEs and statements. */
8254 vect_record_grouped_load_vectors (stmt
, dr_chain
);
8256 /* Record that VEC_ARRAY is now dead. */
8257 vect_clobber_variable (stmt
, gsi
, vec_array
);
8261 for (i
= 0; i
< vec_num
; i
++)
8263 tree final_mask
= NULL_TREE
;
8265 && memory_access_type
!= VMAT_INVARIANT
)
8266 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8268 vectype
, vec_num
* j
+ i
);
8270 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8274 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8277 /* 2. Create the vector-load in the loop. */
8278 switch (alignment_support_scheme
)
8281 case dr_unaligned_supported
:
8283 unsigned int align
, misalign
;
8285 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8287 tree scale
= size_int (gs_info
.scale
);
8290 call
= gimple_build_call_internal
8291 (IFN_MASK_GATHER_LOAD
, 4, dataref_ptr
,
8292 vec_offset
, scale
, final_mask
);
8294 call
= gimple_build_call_internal
8295 (IFN_GATHER_LOAD
, 3, dataref_ptr
,
8297 gimple_call_set_nothrow (call
, true);
8299 data_ref
= NULL_TREE
;
8303 align
= DR_TARGET_ALIGNMENT (dr
);
8304 if (alignment_support_scheme
== dr_aligned
)
8306 gcc_assert (aligned_access_p (first_dr
));
8309 else if (DR_MISALIGNMENT (first_dr
) == -1)
8311 align
= dr_alignment (vect_dr_behavior (first_dr
));
8315 misalign
= DR_MISALIGNMENT (first_dr
);
8316 if (dataref_offset
== NULL_TREE
8317 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8318 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
8323 align
= least_bit_hwi (misalign
| align
);
8324 tree ptr
= build_int_cst (ref_type
, align
);
8326 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
8329 gimple_call_set_nothrow (call
, true);
8331 data_ref
= NULL_TREE
;
8336 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
8339 : build_int_cst (ref_type
, 0));
8340 if (alignment_support_scheme
== dr_aligned
)
8342 else if (DR_MISALIGNMENT (first_dr
) == -1)
8343 TREE_TYPE (data_ref
)
8344 = build_aligned_type (TREE_TYPE (data_ref
),
8345 align
* BITS_PER_UNIT
);
8347 TREE_TYPE (data_ref
)
8348 = build_aligned_type (TREE_TYPE (data_ref
),
8349 TYPE_ALIGN (elem_type
));
8353 case dr_explicit_realign
:
8357 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
8359 if (compute_in_loop
)
8360 msq
= vect_setup_realignment (first_stmt
, gsi
,
8362 dr_explicit_realign
,
8365 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
8366 ptr
= copy_ssa_name (dataref_ptr
);
8368 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
8369 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
8370 new_stmt
= gimple_build_assign
8371 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
8373 (TREE_TYPE (dataref_ptr
),
8374 -(HOST_WIDE_INT
) align
));
8375 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8377 = build2 (MEM_REF
, vectype
, ptr
,
8378 build_int_cst (ref_type
, 0));
8379 vect_copy_ref_info (data_ref
, DR_REF (first_dr
));
8380 vec_dest
= vect_create_destination_var (scalar_dest
,
8382 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
8383 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8384 gimple_assign_set_lhs (new_stmt
, new_temp
);
8385 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
8386 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
8387 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8390 bump
= size_binop (MULT_EXPR
, vs
,
8391 TYPE_SIZE_UNIT (elem_type
));
8392 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
8393 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
8394 new_stmt
= gimple_build_assign
8395 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
8397 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
8398 ptr
= copy_ssa_name (ptr
, new_stmt
);
8399 gimple_assign_set_lhs (new_stmt
, ptr
);
8400 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8402 = build2 (MEM_REF
, vectype
, ptr
,
8403 build_int_cst (ref_type
, 0));
8406 case dr_explicit_realign_optimized
:
8408 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
8409 new_temp
= copy_ssa_name (dataref_ptr
);
8411 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
8412 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr
);
8413 new_stmt
= gimple_build_assign
8414 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
8415 build_int_cst (TREE_TYPE (dataref_ptr
),
8416 -(HOST_WIDE_INT
) align
));
8417 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8419 = build2 (MEM_REF
, vectype
, new_temp
,
8420 build_int_cst (ref_type
, 0));
8426 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8427 /* DATA_REF is null if we've already built the statement. */
8430 vect_copy_ref_info (data_ref
, DR_REF (first_dr
));
8431 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
8433 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8434 gimple_set_lhs (new_stmt
, new_temp
);
8435 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8437 /* 3. Handle explicit realignment if necessary/supported.
8439 vec_dest = realign_load (msq, lsq, realignment_token) */
8440 if (alignment_support_scheme
== dr_explicit_realign_optimized
8441 || alignment_support_scheme
== dr_explicit_realign
)
8443 lsq
= gimple_assign_lhs (new_stmt
);
8444 if (!realignment_token
)
8445 realignment_token
= dataref_ptr
;
8446 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8447 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
8448 msq
, lsq
, realignment_token
);
8449 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8450 gimple_assign_set_lhs (new_stmt
, new_temp
);
8451 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8453 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
8456 if (i
== vec_num
- 1 && j
== ncopies
- 1)
8457 add_phi_arg (phi
, lsq
,
8458 loop_latch_edge (containing_loop
),
8464 /* 4. Handle invariant-load. */
8465 if (inv_p
&& !bb_vinfo
)
8467 gcc_assert (!grouped_load
);
8468 /* If we have versioned for aliasing or the loop doesn't
8469 have any data dependencies that would preclude this,
8470 then we are sure this is a loop invariant load and
8471 thus we can insert it on the preheader edge. */
8472 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8473 && !nested_in_vect_loop
8474 && hoist_defs_of_uses (stmt
, loop
))
8476 if (dump_enabled_p ())
8478 dump_printf_loc (MSG_NOTE
, vect_location
,
8479 "hoisting out of the vectorized "
8481 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
8483 tree tem
= copy_ssa_name (scalar_dest
);
8484 gsi_insert_on_edge_immediate
8485 (loop_preheader_edge (loop
),
8486 gimple_build_assign (tem
,
8488 (gimple_assign_rhs1 (stmt
))));
8489 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
8490 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8491 set_vinfo_for_stmt (new_stmt
,
8492 new_stmt_vec_info (new_stmt
, vinfo
));
8496 gimple_stmt_iterator gsi2
= *gsi
;
8498 new_temp
= vect_init_vector (stmt
, scalar_dest
,
8500 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8504 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8506 tree perm_mask
= perm_mask_for_reverse (vectype
);
8507 new_temp
= permute_vec_elements (new_temp
, new_temp
,
8508 perm_mask
, stmt
, gsi
);
8509 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8512 /* Collect vector loads and later create their permutation in
8513 vect_transform_grouped_load (). */
8514 if (grouped_load
|| slp_perm
)
8515 dr_chain
.quick_push (new_temp
);
8517 /* Store vector loads in the corresponding SLP_NODE. */
8518 if (slp
&& !slp_perm
)
8519 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8521 /* With SLP permutation we load the gaps as well, without
8522 we need to skip the gaps after we manage to fully load
8523 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8524 group_elt
+= nunits
;
8525 if (maybe_ne (group_gap_adj
, 0U)
8527 && known_eq (group_elt
, group_size
- group_gap_adj
))
8529 poly_wide_int bump_val
8530 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
8532 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
8533 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8538 /* Bump the vector pointer to account for a gap or for excess
8539 elements loaded for a permuted SLP load. */
8540 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
8542 poly_wide_int bump_val
8543 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
8545 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
8546 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8551 if (slp
&& !slp_perm
)
8557 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
8558 slp_node_instance
, false,
8561 dr_chain
.release ();
8569 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
8570 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
8571 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8576 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
8578 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
8579 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
8582 dr_chain
.release ();
8588 /* Function vect_is_simple_cond.
8591 LOOP - the loop that is being vectorized.
8592 COND - Condition that is checked for simple use.
8595 *COMP_VECTYPE - the vector type for the comparison.
8596 *DTS - The def types for the arguments of the comparison
8598 Returns whether a COND can be vectorized. Checks whether
8599 condition operands are supportable using vec_is_simple_use. */
8602 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
8603 tree
*comp_vectype
, enum vect_def_type
*dts
,
8607 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8610 if (TREE_CODE (cond
) == SSA_NAME
8611 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
8613 if (!vect_is_simple_use (cond
, vinfo
, &dts
[0], comp_vectype
)
8615 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
8620 if (!COMPARISON_CLASS_P (cond
))
8623 lhs
= TREE_OPERAND (cond
, 0);
8624 rhs
= TREE_OPERAND (cond
, 1);
8626 if (TREE_CODE (lhs
) == SSA_NAME
)
8628 if (!vect_is_simple_use (lhs
, vinfo
, &dts
[0], &vectype1
))
8631 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
8632 || TREE_CODE (lhs
) == FIXED_CST
)
8633 dts
[0] = vect_constant_def
;
8637 if (TREE_CODE (rhs
) == SSA_NAME
)
8639 if (!vect_is_simple_use (rhs
, vinfo
, &dts
[1], &vectype2
))
8642 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
8643 || TREE_CODE (rhs
) == FIXED_CST
)
8644 dts
[1] = vect_constant_def
;
8648 if (vectype1
&& vectype2
8649 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
8650 TYPE_VECTOR_SUBPARTS (vectype2
)))
8653 *comp_vectype
= vectype1
? vectype1
: vectype2
;
8654 /* Invariant comparison. */
8655 if (! *comp_vectype
&& vectype
)
8657 tree scalar_type
= TREE_TYPE (lhs
);
8658 /* If we can widen the comparison to match vectype do so. */
8659 if (INTEGRAL_TYPE_P (scalar_type
)
8660 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
8661 TYPE_SIZE (TREE_TYPE (vectype
))))
8662 scalar_type
= build_nonstandard_integer_type
8663 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype
))),
8664 TYPE_UNSIGNED (scalar_type
));
8665 *comp_vectype
= get_vectype_for_scalar_type (scalar_type
);
8671 /* vectorizable_condition.
8673 Check if STMT is conditional modify expression that can be vectorized.
8674 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8675 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8678 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8679 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
8680 else clause if it is 2).
8682 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8685 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8686 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
8687 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
8689 tree scalar_dest
= NULL_TREE
;
8690 tree vec_dest
= NULL_TREE
;
8691 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
8692 tree then_clause
, else_clause
;
8693 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8694 tree comp_vectype
= NULL_TREE
;
8695 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
8696 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
8699 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8700 enum vect_def_type dts
[4]
8701 = {vect_unknown_def_type
, vect_unknown_def_type
,
8702 vect_unknown_def_type
, vect_unknown_def_type
};
8705 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8706 stmt_vec_info prev_stmt_info
= NULL
;
8708 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8709 vec
<tree
> vec_oprnds0
= vNULL
;
8710 vec
<tree
> vec_oprnds1
= vNULL
;
8711 vec
<tree
> vec_oprnds2
= vNULL
;
8712 vec
<tree
> vec_oprnds3
= vNULL
;
8714 bool masked
= false;
8716 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
8719 vect_reduction_type reduction_type
8720 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
);
8721 if (reduction_type
== TREE_CODE_REDUCTION
)
8723 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8726 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8727 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8731 /* FORNOW: not yet supported. */
8732 if (STMT_VINFO_LIVE_P (stmt_info
))
8734 if (dump_enabled_p ())
8735 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8736 "value used after loop.\n");
8741 /* Is vectorizable conditional operation? */
8742 if (!is_gimple_assign (stmt
))
8745 code
= gimple_assign_rhs_code (stmt
);
8747 if (code
!= COND_EXPR
)
8750 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8751 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8756 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8758 gcc_assert (ncopies
>= 1);
8759 if (reduc_index
&& ncopies
> 1)
8760 return false; /* FORNOW */
8762 cond_expr
= gimple_assign_rhs1 (stmt
);
8763 then_clause
= gimple_assign_rhs2 (stmt
);
8764 else_clause
= gimple_assign_rhs3 (stmt
);
8766 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
8767 &comp_vectype
, &dts
[0], slp_node
? NULL
: vectype
)
8771 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &dts
[2], &vectype1
))
8773 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &dts
[3], &vectype2
))
8776 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
8779 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
8782 masked
= !COMPARISON_CLASS_P (cond_expr
);
8783 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
8785 if (vec_cmp_type
== NULL_TREE
)
8788 cond_code
= TREE_CODE (cond_expr
);
8791 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
8792 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
8795 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
8797 /* Boolean values may have another representation in vectors
8798 and therefore we prefer bit operations over comparison for
8799 them (which also works for scalar masks). We store opcodes
8800 to use in bitop1 and bitop2. Statement is vectorized as
8801 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8802 depending on bitop1 and bitop2 arity. */
8806 bitop1
= BIT_NOT_EXPR
;
8807 bitop2
= BIT_AND_EXPR
;
8810 bitop1
= BIT_NOT_EXPR
;
8811 bitop2
= BIT_IOR_EXPR
;
8814 bitop1
= BIT_NOT_EXPR
;
8815 bitop2
= BIT_AND_EXPR
;
8816 std::swap (cond_expr0
, cond_expr1
);
8819 bitop1
= BIT_NOT_EXPR
;
8820 bitop2
= BIT_IOR_EXPR
;
8821 std::swap (cond_expr0
, cond_expr1
);
8824 bitop1
= BIT_XOR_EXPR
;
8827 bitop1
= BIT_XOR_EXPR
;
8828 bitop2
= BIT_NOT_EXPR
;
8833 cond_code
= SSA_NAME
;
8838 if (bitop1
!= NOP_EXPR
)
8840 machine_mode mode
= TYPE_MODE (comp_vectype
);
8843 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
8844 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8847 if (bitop2
!= NOP_EXPR
)
8849 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
8851 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8855 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
8858 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
8859 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, slp_node
,
8870 vec_oprnds0
.create (1);
8871 vec_oprnds1
.create (1);
8872 vec_oprnds2
.create (1);
8873 vec_oprnds3
.create (1);
8877 scalar_dest
= gimple_assign_lhs (stmt
);
8878 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
8879 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8881 /* Handle cond expr. */
8882 for (j
= 0; j
< ncopies
; j
++)
8884 gimple
*new_stmt
= NULL
;
8889 auto_vec
<tree
, 4> ops
;
8890 auto_vec
<vec
<tree
>, 4> vec_defs
;
8893 ops
.safe_push (cond_expr
);
8896 ops
.safe_push (cond_expr0
);
8897 ops
.safe_push (cond_expr1
);
8899 ops
.safe_push (then_clause
);
8900 ops
.safe_push (else_clause
);
8901 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8902 vec_oprnds3
= vec_defs
.pop ();
8903 vec_oprnds2
= vec_defs
.pop ();
8905 vec_oprnds1
= vec_defs
.pop ();
8906 vec_oprnds0
= vec_defs
.pop ();
8913 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
8915 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
, &dts
[0]);
8920 = vect_get_vec_def_for_operand (cond_expr0
,
8921 stmt
, comp_vectype
);
8922 vect_is_simple_use (cond_expr0
, loop_vinfo
, &dts
[0]);
8925 = vect_get_vec_def_for_operand (cond_expr1
,
8926 stmt
, comp_vectype
);
8927 vect_is_simple_use (cond_expr1
, loop_vinfo
, &dts
[1]);
8929 if (reduc_index
== 1)
8930 vec_then_clause
= reduc_def
;
8933 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
8935 vect_is_simple_use (then_clause
, loop_vinfo
, &dts
[2]);
8937 if (reduc_index
== 2)
8938 vec_else_clause
= reduc_def
;
8941 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
8943 vect_is_simple_use (else_clause
, loop_vinfo
, &dts
[3]);
8950 = vect_get_vec_def_for_stmt_copy (dts
[0],
8951 vec_oprnds0
.pop ());
8954 = vect_get_vec_def_for_stmt_copy (dts
[1],
8955 vec_oprnds1
.pop ());
8957 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
8958 vec_oprnds2
.pop ());
8959 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
8960 vec_oprnds3
.pop ());
8965 vec_oprnds0
.quick_push (vec_cond_lhs
);
8967 vec_oprnds1
.quick_push (vec_cond_rhs
);
8968 vec_oprnds2
.quick_push (vec_then_clause
);
8969 vec_oprnds3
.quick_push (vec_else_clause
);
8972 /* Arguments are ready. Create the new vector stmt. */
8973 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
8975 vec_then_clause
= vec_oprnds2
[i
];
8976 vec_else_clause
= vec_oprnds3
[i
];
8979 vec_compare
= vec_cond_lhs
;
8982 vec_cond_rhs
= vec_oprnds1
[i
];
8983 if (bitop1
== NOP_EXPR
)
8984 vec_compare
= build2 (cond_code
, vec_cmp_type
,
8985 vec_cond_lhs
, vec_cond_rhs
);
8988 new_temp
= make_ssa_name (vec_cmp_type
);
8989 if (bitop1
== BIT_NOT_EXPR
)
8990 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
8994 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
8996 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
8997 if (bitop2
== NOP_EXPR
)
8998 vec_compare
= new_temp
;
8999 else if (bitop2
== BIT_NOT_EXPR
)
9001 /* Instead of doing ~x ? y : z do x ? z : y. */
9002 vec_compare
= new_temp
;
9003 std::swap (vec_then_clause
, vec_else_clause
);
9007 vec_compare
= make_ssa_name (vec_cmp_type
);
9009 = gimple_build_assign (vec_compare
, bitop2
,
9010 vec_cond_lhs
, new_temp
);
9011 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
9015 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
9017 if (!is_gimple_val (vec_compare
))
9019 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
9020 new_stmt
= gimple_build_assign (vec_compare_name
,
9022 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
9023 vec_compare
= vec_compare_name
;
9025 gcc_assert (reduc_index
== 2);
9026 new_stmt
= gimple_build_call_internal
9027 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
9029 gimple_call_set_lhs (new_stmt
, scalar_dest
);
9030 SSA_NAME_DEF_STMT (scalar_dest
) = new_stmt
;
9031 if (stmt
== gsi_stmt (*gsi
))
9032 vect_finish_replace_stmt (stmt
, new_stmt
);
9035 /* In this case we're moving the definition to later in the
9036 block. That doesn't matter because the only uses of the
9037 lhs are in phi statements. */
9038 gimple_stmt_iterator old_gsi
= gsi_for_stmt (stmt
);
9039 gsi_remove (&old_gsi
, true);
9040 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
9045 new_temp
= make_ssa_name (vec_dest
);
9046 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
9047 vec_compare
, vec_then_clause
,
9049 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
9052 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9059 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
9061 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
9063 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
9066 vec_oprnds0
.release ();
9067 vec_oprnds1
.release ();
9068 vec_oprnds2
.release ();
9069 vec_oprnds3
.release ();
9074 /* vectorizable_comparison.
9076 Check if STMT is comparison expression that can be vectorized.
9077 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
9078 comparison, put it in VEC_STMT, and insert it at GSI.
9080 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
9083 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
9084 gimple
**vec_stmt
, tree reduc_def
,
9085 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
9087 tree lhs
, rhs1
, rhs2
;
9088 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9089 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9090 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9091 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
9093 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9094 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
9098 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
9099 stmt_vec_info prev_stmt_info
= NULL
;
9101 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
9102 vec
<tree
> vec_oprnds0
= vNULL
;
9103 vec
<tree
> vec_oprnds1
= vNULL
;
9107 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9110 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
9113 mask_type
= vectype
;
9114 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
9119 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9121 gcc_assert (ncopies
>= 1);
9122 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
9123 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
9127 if (STMT_VINFO_LIVE_P (stmt_info
))
9129 if (dump_enabled_p ())
9130 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9131 "value used after loop.\n");
9135 if (!is_gimple_assign (stmt
))
9138 code
= gimple_assign_rhs_code (stmt
);
9140 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
9143 rhs1
= gimple_assign_rhs1 (stmt
);
9144 rhs2
= gimple_assign_rhs2 (stmt
);
9146 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &dts
[0], &vectype1
))
9149 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &dts
[1], &vectype2
))
9152 if (vectype1
&& vectype2
9153 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
9154 TYPE_VECTOR_SUBPARTS (vectype2
)))
9157 vectype
= vectype1
? vectype1
: vectype2
;
9159 /* Invariant comparison. */
9162 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
9163 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
9166 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
9169 /* Can't compare mask and non-mask types. */
9170 if (vectype1
&& vectype2
9171 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
9174 /* Boolean values may have another representation in vectors
9175 and therefore we prefer bit operations over comparison for
9176 them (which also works for scalar masks). We store opcodes
9177 to use in bitop1 and bitop2. Statement is vectorized as
9178 BITOP2 (rhs1 BITOP1 rhs2) or
9179 rhs1 BITOP2 (BITOP1 rhs2)
9180 depending on bitop1 and bitop2 arity. */
9181 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
9183 if (code
== GT_EXPR
)
9185 bitop1
= BIT_NOT_EXPR
;
9186 bitop2
= BIT_AND_EXPR
;
9188 else if (code
== GE_EXPR
)
9190 bitop1
= BIT_NOT_EXPR
;
9191 bitop2
= BIT_IOR_EXPR
;
9193 else if (code
== LT_EXPR
)
9195 bitop1
= BIT_NOT_EXPR
;
9196 bitop2
= BIT_AND_EXPR
;
9197 std::swap (rhs1
, rhs2
);
9198 std::swap (dts
[0], dts
[1]);
9200 else if (code
== LE_EXPR
)
9202 bitop1
= BIT_NOT_EXPR
;
9203 bitop2
= BIT_IOR_EXPR
;
9204 std::swap (rhs1
, rhs2
);
9205 std::swap (dts
[0], dts
[1]);
9209 bitop1
= BIT_XOR_EXPR
;
9210 if (code
== EQ_EXPR
)
9211 bitop2
= BIT_NOT_EXPR
;
9217 if (bitop1
== NOP_EXPR
)
9219 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
9224 machine_mode mode
= TYPE_MODE (vectype
);
9227 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
9228 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9231 if (bitop2
!= NOP_EXPR
)
9233 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
9234 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9239 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
9240 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
9241 dts
, ndts
, slp_node
, cost_vec
);
9248 vec_oprnds0
.create (1);
9249 vec_oprnds1
.create (1);
9253 lhs
= gimple_assign_lhs (stmt
);
9254 mask
= vect_create_destination_var (lhs
, mask_type
);
9256 /* Handle cmp expr. */
9257 for (j
= 0; j
< ncopies
; j
++)
9259 gassign
*new_stmt
= NULL
;
9264 auto_vec
<tree
, 2> ops
;
9265 auto_vec
<vec
<tree
>, 2> vec_defs
;
9267 ops
.safe_push (rhs1
);
9268 ops
.safe_push (rhs2
);
9269 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
9270 vec_oprnds1
= vec_defs
.pop ();
9271 vec_oprnds0
= vec_defs
.pop ();
9275 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
9276 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
9281 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
9282 vec_oprnds0
.pop ());
9283 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
9284 vec_oprnds1
.pop ());
9289 vec_oprnds0
.quick_push (vec_rhs1
);
9290 vec_oprnds1
.quick_push (vec_rhs2
);
9293 /* Arguments are ready. Create the new vector stmt. */
9294 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
9296 vec_rhs2
= vec_oprnds1
[i
];
9298 new_temp
= make_ssa_name (mask
);
9299 if (bitop1
== NOP_EXPR
)
9301 new_stmt
= gimple_build_assign (new_temp
, code
,
9302 vec_rhs1
, vec_rhs2
);
9303 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
9307 if (bitop1
== BIT_NOT_EXPR
)
9308 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
9310 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
9312 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
9313 if (bitop2
!= NOP_EXPR
)
9315 tree res
= make_ssa_name (mask
);
9316 if (bitop2
== BIT_NOT_EXPR
)
9317 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
9319 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
9321 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
9325 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9332 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
9334 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
9336 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
9339 vec_oprnds0
.release ();
9340 vec_oprnds1
.release ();
9345 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9346 can handle all live statements in the node. Otherwise return true
9347 if STMT is not live or if vectorizable_live_operation can handle it.
9348 GSI and VEC_STMT are as for vectorizable_live_operation. */
9351 can_vectorize_live_stmts (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
9352 slp_tree slp_node
, gimple
**vec_stmt
,
9353 stmt_vector_for_cost
*cost_vec
)
9359 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt
)
9361 stmt_vec_info slp_stmt_info
= vinfo_for_stmt (slp_stmt
);
9362 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
9363 && !vectorizable_live_operation (slp_stmt
, gsi
, slp_node
, i
,
9364 vec_stmt
, cost_vec
))
9368 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt
))
9369 && !vectorizable_live_operation (stmt
, gsi
, slp_node
, -1, vec_stmt
,
9376 /* Make sure the statement is vectorizable. */
9379 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
,
9380 slp_instance node_instance
, stmt_vector_for_cost
*cost_vec
)
9382 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9383 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
9384 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
9386 gimple
*pattern_stmt
;
9387 gimple_seq pattern_def_seq
;
9389 if (dump_enabled_p ())
9391 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
9392 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
9395 if (gimple_has_volatile_ops (stmt
))
9397 if (dump_enabled_p ())
9398 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9399 "not vectorized: stmt has volatile operands\n");
9404 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9406 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
9408 gimple_stmt_iterator si
;
9410 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
9412 gimple
*pattern_def_stmt
= gsi_stmt (si
);
9413 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
9414 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
9416 /* Analyze def stmt of STMT if it's a pattern stmt. */
9417 if (dump_enabled_p ())
9419 dump_printf_loc (MSG_NOTE
, vect_location
,
9420 "==> examining pattern def statement: ");
9421 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
9424 if (!vect_analyze_stmt (pattern_def_stmt
,
9425 need_to_vectorize
, node
, node_instance
,
9432 /* Skip stmts that do not need to be vectorized. In loops this is expected
9434 - the COND_EXPR which is the loop exit condition
9435 - any LABEL_EXPRs in the loop
9436 - computations that are used only for array indexing or loop control.
9437 In basic blocks we only analyze statements that are a part of some SLP
9438 instance, therefore, all the statements are relevant.
9440 Pattern statement needs to be analyzed instead of the original statement
9441 if the original statement is not relevant. Otherwise, we analyze both
9442 statements. In basic blocks we are called from some SLP instance
9443 traversal, don't analyze pattern stmts instead, the pattern stmts
9444 already will be part of SLP instance. */
9446 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
9447 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
9448 && !STMT_VINFO_LIVE_P (stmt_info
))
9450 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9452 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
9453 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
9455 /* Analyze PATTERN_STMT instead of the original stmt. */
9456 stmt
= pattern_stmt
;
9457 stmt_info
= vinfo_for_stmt (pattern_stmt
);
9458 if (dump_enabled_p ())
9460 dump_printf_loc (MSG_NOTE
, vect_location
,
9461 "==> examining pattern statement: ");
9462 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
9467 if (dump_enabled_p ())
9468 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
9473 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9476 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
9477 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
9479 /* Analyze PATTERN_STMT too. */
9480 if (dump_enabled_p ())
9482 dump_printf_loc (MSG_NOTE
, vect_location
,
9483 "==> examining pattern statement: ");
9484 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
9487 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
,
9488 node_instance
, cost_vec
))
9492 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
9494 case vect_internal_def
:
9497 case vect_reduction_def
:
9498 case vect_nested_cycle
:
9499 gcc_assert (!bb_vinfo
9500 && (relevance
== vect_used_in_outer
9501 || relevance
== vect_used_in_outer_by_reduction
9502 || relevance
== vect_used_by_reduction
9503 || relevance
== vect_unused_in_scope
9504 || relevance
== vect_used_only_live
));
9507 case vect_induction_def
:
9508 gcc_assert (!bb_vinfo
);
9511 case vect_constant_def
:
9512 case vect_external_def
:
9513 case vect_unknown_def_type
:
9518 if (STMT_VINFO_RELEVANT_P (stmt_info
))
9520 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
9521 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
9522 || (is_gimple_call (stmt
)
9523 && gimple_call_lhs (stmt
) == NULL_TREE
));
9524 *need_to_vectorize
= true;
9527 if (PURE_SLP_STMT (stmt_info
) && !node
)
9529 dump_printf_loc (MSG_NOTE
, vect_location
,
9530 "handled only by SLP analysis\n");
9536 && (STMT_VINFO_RELEVANT_P (stmt_info
)
9537 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
9538 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
, cost_vec
)
9539 || vectorizable_conversion (stmt
, NULL
, NULL
, node
, cost_vec
)
9540 || vectorizable_shift (stmt
, NULL
, NULL
, node
, cost_vec
)
9541 || vectorizable_operation (stmt
, NULL
, NULL
, node
, cost_vec
)
9542 || vectorizable_assignment (stmt
, NULL
, NULL
, node
, cost_vec
)
9543 || vectorizable_load (stmt
, NULL
, NULL
, node
, node_instance
, cost_vec
)
9544 || vectorizable_call (stmt
, NULL
, NULL
, node
, cost_vec
)
9545 || vectorizable_store (stmt
, NULL
, NULL
, node
, cost_vec
)
9546 || vectorizable_reduction (stmt
, NULL
, NULL
, node
, node_instance
,
9548 || vectorizable_induction (stmt
, NULL
, NULL
, node
, cost_vec
)
9549 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
, cost_vec
)
9550 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
, cost_vec
));
9554 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
, cost_vec
)
9555 || vectorizable_conversion (stmt
, NULL
, NULL
, node
, cost_vec
)
9556 || vectorizable_shift (stmt
, NULL
, NULL
, node
, cost_vec
)
9557 || vectorizable_operation (stmt
, NULL
, NULL
, node
, cost_vec
)
9558 || vectorizable_assignment (stmt
, NULL
, NULL
, node
, cost_vec
)
9559 || vectorizable_load (stmt
, NULL
, NULL
, node
, node_instance
,
9561 || vectorizable_call (stmt
, NULL
, NULL
, node
, cost_vec
)
9562 || vectorizable_store (stmt
, NULL
, NULL
, node
, cost_vec
)
9563 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
,
9565 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
,
9571 if (dump_enabled_p ())
9573 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9574 "not vectorized: relevant stmt not ");
9575 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
9576 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
9582 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9583 need extra handling, except for vectorizable reductions. */
9585 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
9586 && !can_vectorize_live_stmts (stmt
, NULL
, node
, NULL
, cost_vec
))
9588 if (dump_enabled_p ())
9590 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9591 "not vectorized: live stmt not supported: ");
9592 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
9602 /* Function vect_transform_stmt.
9604 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9607 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
9608 bool *grouped_store
, slp_tree slp_node
,
9609 slp_instance slp_node_instance
)
9611 bool is_store
= false;
9612 gimple
*vec_stmt
= NULL
;
9613 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9616 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
9617 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
9619 bool nested_p
= (STMT_VINFO_LOOP_VINFO (stmt_info
)
9620 && nested_in_vect_loop_p
9621 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info
)),
9624 switch (STMT_VINFO_TYPE (stmt_info
))
9626 case type_demotion_vec_info_type
:
9627 case type_promotion_vec_info_type
:
9628 case type_conversion_vec_info_type
:
9629 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
, NULL
);
9633 case induc_vec_info_type
:
9634 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
, slp_node
, NULL
);
9638 case shift_vec_info_type
:
9639 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
, NULL
);
9643 case op_vec_info_type
:
9644 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
, NULL
);
9648 case assignment_vec_info_type
:
9649 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
, NULL
);
9653 case load_vec_info_type
:
9654 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
9655 slp_node_instance
, NULL
);
9659 case store_vec_info_type
:
9660 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
, NULL
);
9662 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
9664 /* In case of interleaving, the whole chain is vectorized when the
9665 last store in the chain is reached. Store stmts before the last
9666 one are skipped, and there vec_stmt_info shouldn't be freed
9668 *grouped_store
= true;
9669 stmt_vec_info group_info
9670 = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info
));
9671 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
9678 case condition_vec_info_type
:
9679 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
, NULL
);
9683 case comparison_vec_info_type
:
9684 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
, NULL
);
9688 case call_vec_info_type
:
9689 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
, NULL
);
9690 stmt
= gsi_stmt (*gsi
);
9693 case call_simd_clone_vec_info_type
:
9694 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
, NULL
);
9695 stmt
= gsi_stmt (*gsi
);
9698 case reduc_vec_info_type
:
9699 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
,
9700 slp_node_instance
, NULL
);
9705 if (!STMT_VINFO_LIVE_P (stmt_info
))
9707 if (dump_enabled_p ())
9708 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9709 "stmt not supported.\n");
9714 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9715 This would break hybrid SLP vectorization. */
9717 gcc_assert (!vec_stmt
9718 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
9720 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9721 is being vectorized, but outside the immediately enclosing loop. */
9724 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
9725 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
9726 || STMT_VINFO_RELEVANT (stmt_info
) ==
9727 vect_used_in_outer_by_reduction
))
9729 struct loop
*innerloop
= LOOP_VINFO_LOOP (
9730 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
9731 imm_use_iterator imm_iter
;
9732 use_operand_p use_p
;
9736 if (dump_enabled_p ())
9737 dump_printf_loc (MSG_NOTE
, vect_location
,
9738 "Record the vdef for outer-loop vectorization.\n");
9740 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9741 (to be used when vectorizing outer-loop stmts that use the DEF of
9743 if (gimple_code (stmt
) == GIMPLE_PHI
)
9744 scalar_dest
= PHI_RESULT (stmt
);
9746 scalar_dest
= gimple_assign_lhs (stmt
);
9748 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
9750 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
9752 exit_phi
= USE_STMT (use_p
);
9753 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
9758 /* Handle stmts whose DEF is used outside the loop-nest that is
9759 being vectorized. */
9760 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
9762 done
= can_vectorize_live_stmts (stmt
, gsi
, slp_node
, &vec_stmt
, NULL
);
9767 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
9773 /* Remove a group of stores (for SLP or interleaving), free their
9777 vect_remove_stores (gimple
*first_stmt
)
9779 gimple
*next
= first_stmt
;
9781 gimple_stmt_iterator next_si
;
9785 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
9787 tmp
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
9788 if (is_pattern_stmt_p (stmt_info
))
9789 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
9790 /* Free the attached stmt_vec_info and remove the stmt. */
9791 next_si
= gsi_for_stmt (next
);
9792 unlink_stmt_vdef (next
);
9793 gsi_remove (&next_si
, true);
9794 release_defs (next
);
9795 free_stmt_vec_info (next
);
9801 /* Function new_stmt_vec_info.
9803 Create and initialize a new stmt_vec_info struct for STMT. */
9806 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
9809 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
9811 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
9812 STMT_VINFO_STMT (res
) = stmt
;
9814 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
9815 STMT_VINFO_LIVE_P (res
) = false;
9816 STMT_VINFO_VECTYPE (res
) = NULL
;
9817 STMT_VINFO_VEC_STMT (res
) = NULL
;
9818 STMT_VINFO_VECTORIZABLE (res
) = true;
9819 STMT_VINFO_IN_PATTERN_P (res
) = false;
9820 STMT_VINFO_RELATED_STMT (res
) = NULL
;
9821 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
9822 STMT_VINFO_DATA_REF (res
) = NULL
;
9823 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
9824 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res
) = ERROR_MARK
;
9826 if (gimple_code (stmt
) == GIMPLE_PHI
9827 && is_loop_header_bb_p (gimple_bb (stmt
)))
9828 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
9830 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
9832 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
9833 STMT_SLP_TYPE (res
) = loop_vect
;
9834 STMT_VINFO_NUM_SLP_USES (res
) = 0;
9836 res
->first_element
= NULL
; /* GROUP_FIRST_ELEMENT */
9837 res
->next_element
= NULL
; /* GROUP_NEXT_ELEMENT */
9838 res
->size
= 0; /* GROUP_SIZE */
9839 res
->store_count
= 0; /* GROUP_STORE_COUNT */
9840 res
->gap
= 0; /* GROUP_GAP */
9841 res
->same_dr_stmt
= NULL
; /* GROUP_SAME_DR_STMT */
9843 /* This is really "uninitialized" until vect_compute_data_ref_alignment. */
9844 res
->dr_aux
.misalignment
= DR_MISALIGNMENT_UNINITIALIZED
;
9850 /* Set the current stmt_vec_info vector to V. */
9853 set_stmt_vec_info_vec (vec
<stmt_vec_info
> *v
)
9855 stmt_vec_info_vec
= v
;
9858 /* Free the stmt_vec_info entries in V and release V. */
9861 free_stmt_vec_infos (vec
<stmt_vec_info
> *v
)
9865 FOR_EACH_VEC_ELT (*v
, i
, info
)
9867 free_stmt_vec_info (STMT_VINFO_STMT (info
));
9868 if (v
== stmt_vec_info_vec
)
9869 stmt_vec_info_vec
= NULL
;
9874 /* Free stmt vectorization related info. */
9877 free_stmt_vec_info (gimple
*stmt
)
9879 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
9884 /* Check if this statement has a related "pattern stmt"
9885 (introduced by the vectorizer during the pattern recognition
9886 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9888 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
9890 if (gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
))
9891 for (gimple_stmt_iterator si
= gsi_start (seq
);
9892 !gsi_end_p (si
); gsi_next (&si
))
9894 gimple
*seq_stmt
= gsi_stmt (si
);
9895 gimple_set_bb (seq_stmt
, NULL
);
9896 tree lhs
= gimple_get_lhs (seq_stmt
);
9897 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9898 release_ssa_name (lhs
);
9899 free_stmt_vec_info (seq_stmt
);
9901 stmt_vec_info patt_info
9902 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
9905 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
9906 gimple_set_bb (patt_stmt
, NULL
);
9907 tree lhs
= gimple_get_lhs (patt_stmt
);
9908 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
9909 release_ssa_name (lhs
);
9910 free_stmt_vec_info (patt_stmt
);
9914 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
9915 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
9916 set_vinfo_for_stmt (stmt
, NULL
);
9921 /* Function get_vectype_for_scalar_type_and_size.
9923 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9927 get_vectype_for_scalar_type_and_size (tree scalar_type
, poly_uint64 size
)
9929 tree orig_scalar_type
= scalar_type
;
9930 scalar_mode inner_mode
;
9931 machine_mode simd_mode
;
9935 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
9936 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
9939 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
9941 /* For vector types of elements whose mode precision doesn't
9942 match their types precision we use a element type of mode
9943 precision. The vectorization routines will have to make sure
9944 they support the proper result truncation/extension.
9945 We also make sure to build vector types with INTEGER_TYPE
9946 component type only. */
9947 if (INTEGRAL_TYPE_P (scalar_type
)
9948 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
9949 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
9950 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
9951 TYPE_UNSIGNED (scalar_type
));
9953 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9954 When the component mode passes the above test simply use a type
9955 corresponding to that mode. The theory is that any use that
9956 would cause problems with this will disable vectorization anyway. */
9957 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
9958 && !INTEGRAL_TYPE_P (scalar_type
))
9959 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
9961 /* We can't build a vector type of elements with alignment bigger than
9963 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
9964 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
9965 TYPE_UNSIGNED (scalar_type
));
9967 /* If we felt back to using the mode fail if there was
9968 no scalar type for it. */
9969 if (scalar_type
== NULL_TREE
)
9972 /* If no size was supplied use the mode the target prefers. Otherwise
9973 lookup a vector mode of the specified size. */
9974 if (known_eq (size
, 0U))
9975 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
9976 else if (!multiple_p (size
, nbytes
, &nunits
)
9977 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
9979 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9980 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
))
9983 vectype
= build_vector_type (scalar_type
, nunits
);
9985 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
9986 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
9989 /* Re-attach the address-space qualifier if we canonicalized the scalar
9991 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
9992 return build_qualified_type
9993 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
9998 poly_uint64 current_vector_size
;
10000 /* Function get_vectype_for_scalar_type.
10002 Returns the vector type corresponding to SCALAR_TYPE as supported
10006 get_vectype_for_scalar_type (tree scalar_type
)
10009 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
10010 current_vector_size
);
10012 && known_eq (current_vector_size
, 0U))
10013 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
10017 /* Function get_mask_type_for_scalar_type.
10019 Returns the mask type corresponding to a result of comparison
10020 of vectors of specified SCALAR_TYPE as supported by target. */
10023 get_mask_type_for_scalar_type (tree scalar_type
)
10025 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
10030 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
10031 current_vector_size
);
10034 /* Function get_same_sized_vectype
10036 Returns a vector type corresponding to SCALAR_TYPE of size
10037 VECTOR_TYPE if supported by the target. */
10040 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
10042 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
10043 return build_same_sized_truth_vector_type (vector_type
);
10045 return get_vectype_for_scalar_type_and_size
10046 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
10049 /* Function vect_is_simple_use.
10052 VINFO - the vect info of the loop or basic block that is being vectorized.
10053 OPERAND - operand in the loop or bb.
10055 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME.
10056 DT - the type of definition
10058 Returns whether a stmt with OPERAND can be vectorized.
10059 For loops, supportable operands are constants, loop invariants, and operands
10060 that are defined by the current iteration of the loop. Unsupportable
10061 operands are those that are defined by a previous iteration of the loop (as
10062 is the case in reduction/induction computations).
10063 For basic blocks, supportable operands are constants and bb invariants.
10064 For now, operands defined outside the basic block are not supported. */
10067 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
10068 gimple
**def_stmt_out
)
10071 *def_stmt_out
= NULL
;
10072 *dt
= vect_unknown_def_type
;
10074 if (dump_enabled_p ())
10076 dump_printf_loc (MSG_NOTE
, vect_location
,
10077 "vect_is_simple_use: operand ");
10078 if (TREE_CODE (operand
) == SSA_NAME
10079 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
10080 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
10082 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
10085 if (CONSTANT_CLASS_P (operand
))
10086 *dt
= vect_constant_def
;
10087 else if (is_gimple_min_invariant (operand
))
10088 *dt
= vect_external_def
;
10089 else if (TREE_CODE (operand
) != SSA_NAME
)
10090 *dt
= vect_unknown_def_type
;
10091 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
10092 *dt
= vect_external_def
;
10095 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
10096 if (! vect_stmt_in_region_p (vinfo
, def_stmt
))
10097 *dt
= vect_external_def
;
10100 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (def_stmt
);
10101 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
10103 def_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
10104 stmt_vinfo
= vinfo_for_stmt (def_stmt
);
10106 switch (gimple_code (def_stmt
))
10109 case GIMPLE_ASSIGN
:
10111 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
10114 *dt
= vect_unknown_def_type
;
10119 *def_stmt_out
= def_stmt
;
10122 if (dump_enabled_p ())
10124 dump_printf (MSG_NOTE
, ", type of def: ");
10127 case vect_uninitialized_def
:
10128 dump_printf (MSG_NOTE
, "uninitialized\n");
10130 case vect_constant_def
:
10131 dump_printf (MSG_NOTE
, "constant\n");
10133 case vect_external_def
:
10134 dump_printf (MSG_NOTE
, "external\n");
10136 case vect_internal_def
:
10137 dump_printf (MSG_NOTE
, "internal\n");
10139 case vect_induction_def
:
10140 dump_printf (MSG_NOTE
, "induction\n");
10142 case vect_reduction_def
:
10143 dump_printf (MSG_NOTE
, "reduction\n");
10145 case vect_double_reduction_def
:
10146 dump_printf (MSG_NOTE
, "double reduction\n");
10148 case vect_nested_cycle
:
10149 dump_printf (MSG_NOTE
, "nested cycle\n");
10151 case vect_unknown_def_type
:
10152 dump_printf (MSG_NOTE
, "unknown\n");
10157 if (*dt
== vect_unknown_def_type
)
10159 if (dump_enabled_p ())
10160 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10161 "Unsupported pattern.\n");
10168 /* Function vect_is_simple_use.
10170 Same as vect_is_simple_use but also determines the vector operand
10171 type of OPERAND and stores it to *VECTYPE. If the definition of
10172 OPERAND is vect_uninitialized_def, vect_constant_def or
10173 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10174 is responsible to compute the best suited vector type for the
10178 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
10179 tree
*vectype
, gimple
**def_stmt_out
)
10182 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt
))
10186 *def_stmt_out
= def_stmt
;
10188 /* Now get a vector type if the def is internal, otherwise supply
10189 NULL_TREE and leave it up to the caller to figure out a proper
10190 type for the use stmt. */
10191 if (*dt
== vect_internal_def
10192 || *dt
== vect_induction_def
10193 || *dt
== vect_reduction_def
10194 || *dt
== vect_double_reduction_def
10195 || *dt
== vect_nested_cycle
)
10197 stmt_vec_info stmt_info
= vinfo_for_stmt (def_stmt
);
10198 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10199 gcc_assert (*vectype
!= NULL_TREE
);
10200 if (dump_enabled_p ())
10202 dump_printf_loc (MSG_NOTE
, vect_location
,
10203 "vect_is_simple_use: vectype ");
10204 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, *vectype
);
10205 dump_printf (MSG_NOTE
, "\n");
10208 else if (*dt
== vect_uninitialized_def
10209 || *dt
== vect_constant_def
10210 || *dt
== vect_external_def
)
10211 *vectype
= NULL_TREE
;
10213 gcc_unreachable ();
10219 /* Function supportable_widening_operation
10221 Check whether an operation represented by the code CODE is a
10222 widening operation that is supported by the target platform in
10223 vector form (i.e., when operating on arguments of type VECTYPE_IN
10224 producing a result of type VECTYPE_OUT).
10226 Widening operations we currently support are NOP (CONVERT), FLOAT,
10227 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10228 are supported by the target platform either directly (via vector
10229 tree-codes), or via target builtins.
10232 - CODE1 and CODE2 are codes of vector operations to be used when
10233 vectorizing the operation, if available.
10234 - MULTI_STEP_CVT determines the number of required intermediate steps in
10235 case of multi-step conversion (like char->short->int - in that case
10236 MULTI_STEP_CVT will be 1).
10237 - INTERM_TYPES contains the intermediate type required to perform the
10238 widening operation (short in the above example). */
10241 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
10242 tree vectype_out
, tree vectype_in
,
10243 enum tree_code
*code1
, enum tree_code
*code2
,
10244 int *multi_step_cvt
,
10245 vec
<tree
> *interm_types
)
10247 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
10248 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
10249 struct loop
*vect_loop
= NULL
;
10250 machine_mode vec_mode
;
10251 enum insn_code icode1
, icode2
;
10252 optab optab1
, optab2
;
10253 tree vectype
= vectype_in
;
10254 tree wide_vectype
= vectype_out
;
10255 enum tree_code c1
, c2
;
10257 tree prev_type
, intermediate_type
;
10258 machine_mode intermediate_mode
, prev_mode
;
10259 optab optab3
, optab4
;
10261 *multi_step_cvt
= 0;
10263 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
10267 case WIDEN_MULT_EXPR
:
10268 /* The result of a vectorized widening operation usually requires
10269 two vectors (because the widened results do not fit into one vector).
10270 The generated vector results would normally be expected to be
10271 generated in the same order as in the original scalar computation,
10272 i.e. if 8 results are generated in each vector iteration, they are
10273 to be organized as follows:
10274 vect1: [res1,res2,res3,res4],
10275 vect2: [res5,res6,res7,res8].
10277 However, in the special case that the result of the widening
10278 operation is used in a reduction computation only, the order doesn't
10279 matter (because when vectorizing a reduction we change the order of
10280 the computation). Some targets can take advantage of this and
10281 generate more efficient code. For example, targets like Altivec,
10282 that support widen_mult using a sequence of {mult_even,mult_odd}
10283 generate the following vectors:
10284 vect1: [res1,res3,res5,res7],
10285 vect2: [res2,res4,res6,res8].
10287 When vectorizing outer-loops, we execute the inner-loop sequentially
10288 (each vectorized inner-loop iteration contributes to VF outer-loop
10289 iterations in parallel). We therefore don't allow to change the
10290 order of the computation in the inner-loop during outer-loop
10292 /* TODO: Another case in which order doesn't *really* matter is when we
10293 widen and then contract again, e.g. (short)((int)x * y >> 8).
10294 Normally, pack_trunc performs an even/odd permute, whereas the
10295 repack from an even/odd expansion would be an interleave, which
10296 would be significantly simpler for e.g. AVX2. */
10297 /* In any case, in order to avoid duplicating the code below, recurse
10298 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10299 are properly set up for the caller. If we fail, we'll continue with
10300 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10302 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
10303 && !nested_in_vect_loop_p (vect_loop
, stmt
)
10304 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
10305 stmt
, vectype_out
, vectype_in
,
10306 code1
, code2
, multi_step_cvt
,
10309 /* Elements in a vector with vect_used_by_reduction property cannot
10310 be reordered if the use chain with this property does not have the
10311 same operation. One such an example is s += a * b, where elements
10312 in a and b cannot be reordered. Here we check if the vector defined
10313 by STMT is only directly used in the reduction statement. */
10314 tree lhs
= gimple_assign_lhs (stmt
);
10315 use_operand_p dummy
;
10317 stmt_vec_info use_stmt_info
= NULL
;
10318 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
10319 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
10320 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
10323 c1
= VEC_WIDEN_MULT_LO_EXPR
;
10324 c2
= VEC_WIDEN_MULT_HI_EXPR
;
10327 case DOT_PROD_EXPR
:
10328 c1
= DOT_PROD_EXPR
;
10329 c2
= DOT_PROD_EXPR
;
10337 case VEC_WIDEN_MULT_EVEN_EXPR
:
10338 /* Support the recursion induced just above. */
10339 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
10340 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
10343 case WIDEN_LSHIFT_EXPR
:
10344 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
10345 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
10349 c1
= VEC_UNPACK_LO_EXPR
;
10350 c2
= VEC_UNPACK_HI_EXPR
;
10354 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
10355 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
10358 case FIX_TRUNC_EXPR
:
10359 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
10360 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
10364 gcc_unreachable ();
10367 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
10368 std::swap (c1
, c2
);
10370 if (code
== FIX_TRUNC_EXPR
)
10372 /* The signedness is determined from output operand. */
10373 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
10374 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
10378 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
10379 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
10382 if (!optab1
|| !optab2
)
10385 vec_mode
= TYPE_MODE (vectype
);
10386 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
10387 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
10393 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
10394 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
10395 /* For scalar masks we may have different boolean
10396 vector types having the same QImode. Thus we
10397 add additional check for elements number. */
10398 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10399 || known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
10400 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2));
10402 /* Check if it's a multi-step conversion that can be done using intermediate
10405 prev_type
= vectype
;
10406 prev_mode
= vec_mode
;
10408 if (!CONVERT_EXPR_CODE_P (code
))
10411 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10412 intermediate steps in promotion sequence. We try
10413 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10415 interm_types
->create (MAX_INTERM_CVT_STEPS
);
10416 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
10418 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
10419 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
10421 intermediate_type
= vect_halve_mask_nunits (prev_type
);
10422 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
10427 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
10428 TYPE_UNSIGNED (prev_type
));
10430 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
10431 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
10433 if (!optab3
|| !optab4
10434 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
10435 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
10436 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
10437 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
10438 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
10439 == CODE_FOR_nothing
)
10440 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
10441 == CODE_FOR_nothing
))
10444 interm_types
->quick_push (intermediate_type
);
10445 (*multi_step_cvt
)++;
10447 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
10448 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
10449 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10450 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
10451 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2));
10453 prev_type
= intermediate_type
;
10454 prev_mode
= intermediate_mode
;
10457 interm_types
->release ();
10462 /* Function supportable_narrowing_operation
10464 Check whether an operation represented by the code CODE is a
10465 narrowing operation that is supported by the target platform in
10466 vector form (i.e., when operating on arguments of type VECTYPE_IN
10467 and producing a result of type VECTYPE_OUT).
10469 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10470 and FLOAT. This function checks if these operations are supported by
10471 the target platform directly via vector tree-codes.
10474 - CODE1 is the code of a vector operation to be used when
10475 vectorizing the operation, if available.
10476 - MULTI_STEP_CVT determines the number of required intermediate steps in
10477 case of multi-step conversion (like int->short->char - in that case
10478 MULTI_STEP_CVT will be 1).
10479 - INTERM_TYPES contains the intermediate type required to perform the
10480 narrowing operation (short in the above example). */
10483 supportable_narrowing_operation (enum tree_code code
,
10484 tree vectype_out
, tree vectype_in
,
10485 enum tree_code
*code1
, int *multi_step_cvt
,
10486 vec
<tree
> *interm_types
)
10488 machine_mode vec_mode
;
10489 enum insn_code icode1
;
10490 optab optab1
, interm_optab
;
10491 tree vectype
= vectype_in
;
10492 tree narrow_vectype
= vectype_out
;
10494 tree intermediate_type
, prev_type
;
10495 machine_mode intermediate_mode
, prev_mode
;
10499 *multi_step_cvt
= 0;
10503 c1
= VEC_PACK_TRUNC_EXPR
;
10506 case FIX_TRUNC_EXPR
:
10507 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
10511 c1
= VEC_PACK_FLOAT_EXPR
;
10515 gcc_unreachable ();
10518 if (code
== FIX_TRUNC_EXPR
)
10519 /* The signedness is determined from output operand. */
10520 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
10522 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
10527 vec_mode
= TYPE_MODE (vectype
);
10528 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
10533 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
10534 /* For scalar masks we may have different boolean
10535 vector types having the same QImode. Thus we
10536 add additional check for elements number. */
10537 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10538 || known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
10539 TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
10541 if (code
== FLOAT_EXPR
)
10544 /* Check if it's a multi-step conversion that can be done using intermediate
10546 prev_mode
= vec_mode
;
10547 prev_type
= vectype
;
10548 if (code
== FIX_TRUNC_EXPR
)
10549 uns
= TYPE_UNSIGNED (vectype_out
);
10551 uns
= TYPE_UNSIGNED (vectype
);
10553 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10554 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10555 costly than signed. */
10556 if (code
== FIX_TRUNC_EXPR
&& uns
)
10558 enum insn_code icode2
;
10561 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
10563 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
10564 if (interm_optab
!= unknown_optab
10565 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
10566 && insn_data
[icode1
].operand
[0].mode
10567 == insn_data
[icode2
].operand
[0].mode
)
10570 optab1
= interm_optab
;
10575 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10576 intermediate steps in promotion sequence. We try
10577 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10578 interm_types
->create (MAX_INTERM_CVT_STEPS
);
10579 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
10581 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
10582 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
10584 intermediate_type
= vect_double_mask_nunits (prev_type
);
10585 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
10590 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
10592 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
10595 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
10596 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
10597 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
10598 == CODE_FOR_nothing
))
10601 interm_types
->quick_push (intermediate_type
);
10602 (*multi_step_cvt
)++;
10604 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
10605 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10606 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
10607 TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
10609 prev_mode
= intermediate_mode
;
10610 prev_type
= intermediate_type
;
10611 optab1
= interm_optab
;
10614 interm_types
->release ();
10618 /* Generate and return a statement that sets vector mask MASK such that
10619 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10622 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
10624 tree cmp_type
= TREE_TYPE (start_index
);
10625 tree mask_type
= TREE_TYPE (mask
);
10626 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
10627 cmp_type
, mask_type
,
10628 OPTIMIZE_FOR_SPEED
));
10629 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
10630 start_index
, end_index
,
10631 build_zero_cst (mask_type
));
10632 gimple_call_set_lhs (call
, mask
);
10636 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10637 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10640 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
10643 tree tmp
= make_ssa_name (mask_type
);
10644 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
10645 gimple_seq_add_stmt (seq
, call
);
10646 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
10649 /* Try to compute the vector types required to vectorize STMT_INFO,
10650 returning true on success and false if vectorization isn't possible.
10654 - Set *STMT_VECTYPE_OUT to:
10655 - NULL_TREE if the statement doesn't need to be vectorized;
10656 - boolean_type_node if the statement is a boolean operation whose
10657 vector type can only be determined once all the other vector types
10659 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10661 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10662 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10663 statement does not help to determine the overall number of units. */
10666 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info
,
10667 tree
*stmt_vectype_out
,
10668 tree
*nunits_vectype_out
)
10670 gimple
*stmt
= stmt_info
->stmt
;
10672 *stmt_vectype_out
= NULL_TREE
;
10673 *nunits_vectype_out
= NULL_TREE
;
10675 if (gimple_get_lhs (stmt
) == NULL_TREE
10676 /* MASK_STORE has no lhs, but is ok. */
10677 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
10679 if (is_a
<gcall
*> (stmt
))
10681 /* Ignore calls with no lhs. These must be calls to
10682 #pragma omp simd functions, and what vectorization factor
10683 it really needs can't be determined until
10684 vectorizable_simd_clone_call. */
10685 if (dump_enabled_p ())
10686 dump_printf_loc (MSG_NOTE
, vect_location
,
10687 "defer to SIMD clone analysis.\n");
10691 if (dump_enabled_p ())
10693 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10694 "not vectorized: irregular stmt.");
10695 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
10700 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
10702 if (dump_enabled_p ())
10704 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10705 "not vectorized: vector stmt in loop:");
10706 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
10712 tree scalar_type
= NULL_TREE
;
10713 if (STMT_VINFO_VECTYPE (stmt_info
))
10714 *stmt_vectype_out
= vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10717 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info
));
10718 if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
10719 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
10721 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
10723 /* Pure bool ops don't participate in number-of-units computation.
10724 For comparisons use the types being compared. */
10725 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
)
10726 && is_gimple_assign (stmt
)
10727 && gimple_assign_rhs_code (stmt
) != COND_EXPR
)
10729 *stmt_vectype_out
= boolean_type_node
;
10731 tree rhs1
= gimple_assign_rhs1 (stmt
);
10732 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
10733 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10734 scalar_type
= TREE_TYPE (rhs1
);
10737 if (dump_enabled_p ())
10738 dump_printf_loc (MSG_NOTE
, vect_location
,
10739 "pure bool operation.\n");
10744 if (dump_enabled_p ())
10746 dump_printf_loc (MSG_NOTE
, vect_location
,
10747 "get vectype for scalar type: ");
10748 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
10749 dump_printf (MSG_NOTE
, "\n");
10751 vectype
= get_vectype_for_scalar_type (scalar_type
);
10754 if (dump_enabled_p ())
10756 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10757 "not vectorized: unsupported data-type ");
10758 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
10760 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
10765 if (!*stmt_vectype_out
)
10766 *stmt_vectype_out
= vectype
;
10768 if (dump_enabled_p ())
10770 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
10771 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
10772 dump_printf (MSG_NOTE
, "\n");
10776 /* Don't try to compute scalar types if the stmt produces a boolean
10777 vector; use the existing vector type instead. */
10778 tree nunits_vectype
;
10779 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10780 nunits_vectype
= vectype
;
10783 /* The number of units is set according to the smallest scalar
10784 type (or the largest vector size, but we only support one
10785 vector size per vectorization). */
10786 if (*stmt_vectype_out
!= boolean_type_node
)
10788 HOST_WIDE_INT dummy
;
10789 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
10791 if (dump_enabled_p ())
10793 dump_printf_loc (MSG_NOTE
, vect_location
,
10794 "get vectype for scalar type: ");
10795 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
10796 dump_printf (MSG_NOTE
, "\n");
10798 nunits_vectype
= get_vectype_for_scalar_type (scalar_type
);
10800 if (!nunits_vectype
)
10802 if (dump_enabled_p ())
10804 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10805 "not vectorized: unsupported data-type ");
10806 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
10807 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
10812 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
10813 GET_MODE_SIZE (TYPE_MODE (nunits_vectype
))))
10815 if (dump_enabled_p ())
10817 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10818 "not vectorized: different sized vector "
10819 "types in statement, ");
10820 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, vectype
);
10821 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
10822 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, nunits_vectype
);
10823 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
10828 if (dump_enabled_p ())
10830 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
10831 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, nunits_vectype
);
10832 dump_printf (MSG_NOTE
, "\n");
10834 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
10835 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
10836 dump_printf (MSG_NOTE
, "\n");
10839 *nunits_vectype_out
= nunits_vectype
;
10843 /* Try to determine the correct vector type for STMT_INFO, which is a
10844 statement that produces a scalar boolean result. Return the vector
10845 type on success, otherwise return NULL_TREE. */
10848 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info
)
10850 gimple
*stmt
= stmt_info
->stmt
;
10851 tree mask_type
= NULL
;
10852 tree vectype
, scalar_type
;
10854 if (is_gimple_assign (stmt
)
10855 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
10856 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt
))))
10858 scalar_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
10859 mask_type
= get_mask_type_for_scalar_type (scalar_type
);
10863 if (dump_enabled_p ())
10864 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10865 "not vectorized: unsupported mask\n");
10873 enum vect_def_type dt
;
10875 FOR_EACH_SSA_TREE_OPERAND (rhs
, stmt
, iter
, SSA_OP_USE
)
10877 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &dt
, &vectype
))
10879 if (dump_enabled_p ())
10881 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10882 "not vectorized: can't compute mask type "
10883 "for statement, ");
10884 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
,
10890 /* No vectype probably means external definition.
10891 Allow it in case there is another operand which
10892 allows to determine mask type. */
10897 mask_type
= vectype
;
10898 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type
),
10899 TYPE_VECTOR_SUBPARTS (vectype
)))
10901 if (dump_enabled_p ())
10903 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10904 "not vectorized: different sized masks "
10905 "types in statement, ");
10906 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
10908 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
10909 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
10911 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
10915 else if (VECTOR_BOOLEAN_TYPE_P (mask_type
)
10916 != VECTOR_BOOLEAN_TYPE_P (vectype
))
10918 if (dump_enabled_p ())
10920 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10921 "not vectorized: mixed mask and "
10922 "nonmask vector types in statement, ");
10923 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
10925 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
10926 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
10928 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
10934 /* We may compare boolean value loaded as vector of integers.
10935 Fix mask_type in such case. */
10937 && !VECTOR_BOOLEAN_TYPE_P (mask_type
)
10938 && gimple_code (stmt
) == GIMPLE_ASSIGN
10939 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
)
10940 mask_type
= build_same_sized_truth_vector_type (mask_type
);
10943 /* No mask_type should mean loop invariant predicate.
10944 This is probably a subject for optimization in if-conversion. */
10945 if (!mask_type
&& dump_enabled_p ())
10947 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10948 "not vectorized: can't compute mask type "
10949 "for statement, ");
10950 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);