1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Return the vectorized type for the given statement. */
58 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
60 return STMT_VINFO_VECTYPE (stmt_info
);
63 /* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
66 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
68 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
69 basic_block bb
= gimple_bb (stmt
);
70 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
76 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
78 return (bb
->loop_father
== loop
->inner
);
81 /* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
86 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
87 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
88 int misalign
, enum vect_cost_model_location where
)
92 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
93 stmt_info_for_cost si
= { count
, kind
,
94 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
96 body_cost_vec
->safe_push (si
);
98 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
101 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
102 count
, kind
, stmt_info
, misalign
, where
);
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
108 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
110 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
120 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
121 tree array
, unsigned HOST_WIDE_INT n
)
123 tree vect_type
, vect
, vect_name
, array_ref
;
126 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
127 vect_type
= TREE_TYPE (TREE_TYPE (array
));
128 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
129 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
130 build_int_cst (size_type_node
, n
),
131 NULL_TREE
, NULL_TREE
);
133 new_stmt
= gimple_build_assign (vect
, array_ref
);
134 vect_name
= make_ssa_name (vect
, new_stmt
);
135 gimple_assign_set_lhs (new_stmt
, vect_name
);
136 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
146 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
147 tree array
, unsigned HOST_WIDE_INT n
)
152 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
153 build_int_cst (size_type_node
, n
),
154 NULL_TREE
, NULL_TREE
);
156 new_stmt
= gimple_build_assign (array_ref
, vect
);
157 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
165 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
167 tree mem_ref
, alias_ptr_type
;
169 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
170 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
183 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
184 enum vect_relevant relevant
, bool live_p
,
185 bool used_in_pattern
)
187 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
188 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
189 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
190 gimple
*pattern_stmt
;
192 if (dump_enabled_p ())
194 dump_printf_loc (MSG_NOTE
, vect_location
,
195 "mark relevant %d, live %d: ", relevant
, live_p
);
196 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
199 /* If this stmt is an original stmt in a pattern, we might need to mark its
200 related pattern stmt instead of the original stmt. However, such stmts
201 may have their own uses that are not in any pattern, in such cases the
202 stmt itself should be marked. */
203 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
206 if (!used_in_pattern
)
208 imm_use_iterator imm_iter
;
212 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
213 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
215 if (is_gimple_assign (stmt
))
216 lhs
= gimple_assign_lhs (stmt
);
218 lhs
= gimple_call_lhs (stmt
);
220 /* This use is out of pattern use, if LHS has other uses that are
221 pattern uses, we should mark the stmt itself, and not the pattern
223 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
224 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
226 if (is_gimple_debug (USE_STMT (use_p
)))
228 use_stmt
= USE_STMT (use_p
);
230 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
233 if (vinfo_for_stmt (use_stmt
)
234 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
244 /* This is the last stmt in a sequence that was detected as a
245 pattern that can potentially be vectorized. Don't mark the stmt
246 as relevant/live because it's not going to be vectorized.
247 Instead mark the pattern-stmt that replaces it. */
249 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
251 if (dump_enabled_p ())
252 dump_printf_loc (MSG_NOTE
, vect_location
,
253 "last stmt in pattern. don't mark"
254 " relevant/live.\n");
255 stmt_info
= vinfo_for_stmt (pattern_stmt
);
256 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
257 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
258 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
263 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
264 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
265 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
267 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
268 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
270 if (dump_enabled_p ())
271 dump_printf_loc (MSG_NOTE
, vect_location
,
272 "already marked relevant/live.\n");
276 worklist
->safe_push (stmt
);
280 /* Function vect_stmt_relevant_p.
282 Return true if STMT in loop that is represented by LOOP_VINFO is
283 "relevant for vectorization".
285 A stmt is considered "relevant for vectorization" if:
286 - it has uses outside the loop.
287 - it has vdefs (it alters memory).
288 - control stmts in the loop (except for the exit condition).
290 CHECKME: what other side effects would the vectorizer allow? */
293 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
294 enum vect_relevant
*relevant
, bool *live_p
)
296 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
298 imm_use_iterator imm_iter
;
302 *relevant
= vect_unused_in_scope
;
305 /* cond stmt other than loop exit cond. */
306 if (is_ctrl_stmt (stmt
)
307 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
308 != loop_exit_ctrl_vec_info_type
)
309 *relevant
= vect_used_in_scope
;
311 /* changing memory. */
312 if (gimple_code (stmt
) != GIMPLE_PHI
)
313 if (gimple_vdef (stmt
)
314 && !gimple_clobber_p (stmt
))
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE
, vect_location
,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant
= vect_used_in_scope
;
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
325 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
327 basic_block bb
= gimple_bb (USE_STMT (use_p
));
328 if (!flow_bb_inside_loop_p (loop
, bb
))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE
, vect_location
,
332 "vec_stmt_relevant_p: used out of loop.\n");
334 if (is_gimple_debug (USE_STMT (use_p
)))
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
340 gcc_assert (bb
== single_exit (loop
)->dest
);
347 return (*live_p
|| *relevant
);
351 /* Function exist_non_indexing_operands_for_use_p
353 USE is one of the uses attached to STMT. Check if USE is
354 used in STMT for anything other than indexing an array. */
357 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
360 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
362 /* USE corresponds to some operand in STMT. If there is no data
363 reference in STMT, then any operand that corresponds to USE
364 is not indexing an array. */
365 if (!STMT_VINFO_DATA_REF (stmt_info
))
368 /* STMT has a data_ref. FORNOW this means that its of one of
372 (This should have been verified in analyze_data_refs).
374 'var' in the second case corresponds to a def, not a use,
375 so USE cannot correspond to any operands that are not used
378 Therefore, all we need to check is if STMT falls into the
379 first case, and whether var corresponds to USE. */
381 if (!gimple_assign_copy_p (stmt
))
383 if (is_gimple_call (stmt
)
384 && gimple_call_internal_p (stmt
))
385 switch (gimple_call_internal_fn (stmt
))
388 operand
= gimple_call_arg (stmt
, 3);
393 operand
= gimple_call_arg (stmt
, 2);
403 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
405 operand
= gimple_assign_rhs1 (stmt
);
406 if (TREE_CODE (operand
) != SSA_NAME
)
417 Function process_use.
420 - a USE in STMT in a loop represented by LOOP_VINFO
421 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
422 that defined USE. This is done by calling mark_relevant and passing it
423 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
424 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
428 Generally, LIVE_P and RELEVANT are used to define the liveness and
429 relevance info of the DEF_STMT of this USE:
430 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
431 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
433 - case 1: If USE is used only for address computations (e.g. array indexing),
434 which does not need to be directly vectorized, then the liveness/relevance
435 of the respective DEF_STMT is left unchanged.
436 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
437 skip DEF_STMT cause it had already been processed.
438 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
439 be modified accordingly.
441 Return true if everything is as expected. Return false otherwise. */
444 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
445 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
448 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
449 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
450 stmt_vec_info dstmt_vinfo
;
451 basic_block bb
, def_bb
;
453 enum vect_def_type dt
;
455 /* case 1: we are only interested in uses that need to be vectorized. Uses
456 that are used for address computation are not considered relevant. */
457 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
460 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
462 if (dump_enabled_p ())
463 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
464 "not vectorized: unsupported use in stmt.\n");
468 if (!def_stmt
|| gimple_nop_p (def_stmt
))
471 def_bb
= gimple_bb (def_stmt
);
472 if (!flow_bb_inside_loop_p (loop
, def_bb
))
474 if (dump_enabled_p ())
475 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
479 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
480 DEF_STMT must have already been processed, because this should be the
481 only way that STMT, which is a reduction-phi, was put in the worklist,
482 as there should be no other uses for DEF_STMT in the loop. So we just
483 check that everything is as expected, and we are done. */
484 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
485 bb
= gimple_bb (stmt
);
486 if (gimple_code (stmt
) == GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
488 && gimple_code (def_stmt
) != GIMPLE_PHI
489 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
490 && bb
->loop_father
== def_bb
->loop_father
)
492 if (dump_enabled_p ())
493 dump_printf_loc (MSG_NOTE
, vect_location
,
494 "reduc-stmt defining reduc-phi in the same nest.\n");
495 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
496 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
497 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
498 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
499 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
503 /* case 3a: outer-loop stmt defining an inner-loop stmt:
504 outer-loop-header-bb:
510 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
512 if (dump_enabled_p ())
513 dump_printf_loc (MSG_NOTE
, vect_location
,
514 "outer-loop def-stmt defining inner-loop stmt.\n");
518 case vect_unused_in_scope
:
519 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
520 vect_used_in_scope
: vect_unused_in_scope
;
523 case vect_used_in_outer_by_reduction
:
524 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
525 relevant
= vect_used_by_reduction
;
528 case vect_used_in_outer
:
529 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
530 relevant
= vect_used_in_scope
;
533 case vect_used_in_scope
:
541 /* case 3b: inner-loop stmt defining an outer-loop stmt:
542 outer-loop-header-bb:
546 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
548 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
550 if (dump_enabled_p ())
551 dump_printf_loc (MSG_NOTE
, vect_location
,
552 "inner-loop def-stmt defining outer-loop stmt.\n");
556 case vect_unused_in_scope
:
557 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
558 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
559 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
562 case vect_used_by_reduction
:
563 relevant
= vect_used_in_outer_by_reduction
;
566 case vect_used_in_scope
:
567 relevant
= vect_used_in_outer
;
575 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
576 is_pattern_stmt_p (stmt_vinfo
));
581 /* Function vect_mark_stmts_to_be_vectorized.
583 Not all stmts in the loop need to be vectorized. For example:
592 Stmt 1 and 3 do not need to be vectorized, because loop control and
593 addressing of vectorized data-refs are handled differently.
595 This pass detects such stmts. */
598 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
600 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
601 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
602 unsigned int nbbs
= loop
->num_nodes
;
603 gimple_stmt_iterator si
;
606 stmt_vec_info stmt_vinfo
;
610 enum vect_relevant relevant
, tmp_relevant
;
611 enum vect_def_type def_type
;
613 if (dump_enabled_p ())
614 dump_printf_loc (MSG_NOTE
, vect_location
,
615 "=== vect_mark_stmts_to_be_vectorized ===\n");
617 auto_vec
<gimple
*, 64> worklist
;
619 /* 1. Init worklist. */
620 for (i
= 0; i
< nbbs
; i
++)
623 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
626 if (dump_enabled_p ())
628 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
629 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
632 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
633 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
635 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
637 stmt
= gsi_stmt (si
);
638 if (dump_enabled_p ())
640 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
641 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
644 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
645 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
649 /* 2. Process_worklist */
650 while (worklist
.length () > 0)
655 stmt
= worklist
.pop ();
656 if (dump_enabled_p ())
658 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
659 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant and live/dead according to the
664 liveness and relevance properties of STMT. */
665 stmt_vinfo
= vinfo_for_stmt (stmt
);
666 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
667 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
669 /* Generally, the liveness and relevance properties of STMT are
670 propagated as is to the DEF_STMTs of its USEs:
671 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
672 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
674 One exception is when STMT has been identified as defining a reduction
675 variable; in this case we set the liveness/relevance as follows:
677 relevant = vect_used_by_reduction
678 This is because we distinguish between two kinds of relevant stmts -
679 those that are used by a reduction computation, and those that are
680 (also) used by a regular computation. This allows us later on to
681 identify stmts that are used solely by a reduction, and therefore the
682 order of the results that they produce does not have to be kept. */
684 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
685 tmp_relevant
= relevant
;
688 case vect_reduction_def
:
689 switch (tmp_relevant
)
691 case vect_unused_in_scope
:
692 relevant
= vect_used_by_reduction
;
695 case vect_used_by_reduction
:
696 if (gimple_code (stmt
) == GIMPLE_PHI
)
701 if (dump_enabled_p ())
702 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
703 "unsupported use of reduction.\n");
710 case vect_nested_cycle
:
711 if (tmp_relevant
!= vect_unused_in_scope
712 && tmp_relevant
!= vect_used_in_outer_by_reduction
713 && tmp_relevant
!= vect_used_in_outer
)
715 if (dump_enabled_p ())
716 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
717 "unsupported use of nested cycle.\n");
725 case vect_double_reduction_def
:
726 if (tmp_relevant
!= vect_unused_in_scope
727 && tmp_relevant
!= vect_used_by_reduction
)
729 if (dump_enabled_p ())
730 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
731 "unsupported use of double reduction.\n");
743 if (is_pattern_stmt_p (stmt_vinfo
))
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (is_gimple_assign (stmt
))
750 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
751 tree op
= gimple_assign_rhs1 (stmt
);
754 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
756 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
757 live_p
, relevant
, &worklist
, false)
758 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
759 live_p
, relevant
, &worklist
, false))
763 for (; i
< gimple_num_ops (stmt
); i
++)
765 op
= gimple_op (stmt
, i
);
766 if (TREE_CODE (op
) == SSA_NAME
767 && !process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
772 else if (is_gimple_call (stmt
))
774 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
776 tree arg
= gimple_call_arg (stmt
, i
);
777 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
784 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
786 tree op
= USE_FROM_PTR (use_p
);
787 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
792 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
795 tree decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
797 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
801 } /* while worklist */
807 /* Function vect_model_simple_cost.
809 Models cost for simple operations, i.e. those that only emit ncopies of a
810 single op. Right now, this does not account for multiple insns that could
811 be generated for the single vector op. We will handle that shortly. */
814 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
815 enum vect_def_type
*dt
,
816 stmt_vector_for_cost
*prologue_cost_vec
,
817 stmt_vector_for_cost
*body_cost_vec
)
820 int inside_cost
= 0, prologue_cost
= 0;
822 /* The SLP costs were already calculated during SLP tree build. */
823 if (PURE_SLP_STMT (stmt_info
))
826 /* FORNOW: Assuming maximum 2 args per stmts. */
827 for (i
= 0; i
< 2; i
++)
828 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
829 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
830 stmt_info
, 0, vect_prologue
);
832 /* Pass the inside-of-loop statements to the target-specific cost model. */
833 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
834 stmt_info
, 0, vect_body
);
836 if (dump_enabled_p ())
837 dump_printf_loc (MSG_NOTE
, vect_location
,
838 "vect_model_simple_cost: inside_cost = %d, "
839 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
843 /* Model cost for type demotion and promotion operations. PWR is normally
844 zero for single-step promotions and demotions. It will be one if
845 two-step promotion/demotion is required, and so on. Each additional
846 step doubles the number of instructions required. */
849 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
850 enum vect_def_type
*dt
, int pwr
)
853 int inside_cost
= 0, prologue_cost
= 0;
854 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
855 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
856 void *target_cost_data
;
858 /* The SLP costs were already calculated during SLP tree build. */
859 if (PURE_SLP_STMT (stmt_info
))
863 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
865 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
867 for (i
= 0; i
< pwr
+ 1; i
++)
869 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
871 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
872 vec_promote_demote
, stmt_info
, 0,
876 /* FORNOW: Assuming maximum 2 args per stmts. */
877 for (i
= 0; i
< 2; i
++)
878 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
879 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
880 stmt_info
, 0, vect_prologue
);
882 if (dump_enabled_p ())
883 dump_printf_loc (MSG_NOTE
, vect_location
,
884 "vect_model_promotion_demotion_cost: inside_cost = %d, "
885 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
888 /* Function vect_cost_group_size
890 For grouped load or store, return the group_size only if it is the first
891 load or store of a group, else return 1. This ensures that group size is
892 only returned once per group. */
895 vect_cost_group_size (stmt_vec_info stmt_info
)
897 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
899 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
900 return GROUP_SIZE (stmt_info
);
906 /* Function vect_model_store_cost
908 Models cost for stores. In the case of grouped accesses, one access
909 has the overhead of the grouped access attributed to it. */
912 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
913 bool store_lanes_p
, enum vect_def_type dt
,
915 stmt_vector_for_cost
*prologue_cost_vec
,
916 stmt_vector_for_cost
*body_cost_vec
)
919 unsigned int inside_cost
= 0, prologue_cost
= 0;
920 struct data_reference
*first_dr
;
923 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
924 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
925 stmt_info
, 0, vect_prologue
);
927 /* Grouped access? */
928 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
932 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
937 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
938 group_size
= vect_cost_group_size (stmt_info
);
941 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
943 /* Not a grouped access. */
947 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
950 /* We assume that the cost of a single store-lanes instruction is
951 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
952 access is instead being provided by a permute-and-store operation,
953 include the cost of the permutes. */
954 if (!store_lanes_p
&& group_size
> 1
955 && !STMT_VINFO_STRIDED_P (stmt_info
))
957 /* Uses a high and low interleave or shuffle operations for each
959 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
960 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
961 stmt_info
, 0, vect_body
);
963 if (dump_enabled_p ())
964 dump_printf_loc (MSG_NOTE
, vect_location
,
965 "vect_model_store_cost: strided group_size = %d .\n",
969 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
970 /* Costs of the stores. */
971 if (STMT_VINFO_STRIDED_P (stmt_info
)
972 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
974 /* N scalar stores plus extracting the elements. */
975 inside_cost
+= record_stmt_cost (body_cost_vec
,
976 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
977 scalar_store
, stmt_info
, 0, vect_body
);
980 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
982 if (STMT_VINFO_STRIDED_P (stmt_info
))
983 inside_cost
+= record_stmt_cost (body_cost_vec
,
984 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
985 vec_to_scalar
, stmt_info
, 0, vect_body
);
987 if (dump_enabled_p ())
988 dump_printf_loc (MSG_NOTE
, vect_location
,
989 "vect_model_store_cost: inside_cost = %d, "
990 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
994 /* Calculate cost of DR's memory access. */
996 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
997 unsigned int *inside_cost
,
998 stmt_vector_for_cost
*body_cost_vec
)
1000 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1001 gimple
*stmt
= DR_STMT (dr
);
1002 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1004 switch (alignment_support_scheme
)
1008 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1009 vector_store
, stmt_info
, 0,
1012 if (dump_enabled_p ())
1013 dump_printf_loc (MSG_NOTE
, vect_location
,
1014 "vect_model_store_cost: aligned.\n");
1018 case dr_unaligned_supported
:
1020 /* Here, we assign an additional cost for the unaligned store. */
1021 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1022 unaligned_store
, stmt_info
,
1023 DR_MISALIGNMENT (dr
), vect_body
);
1024 if (dump_enabled_p ())
1025 dump_printf_loc (MSG_NOTE
, vect_location
,
1026 "vect_model_store_cost: unaligned supported by "
1031 case dr_unaligned_unsupported
:
1033 *inside_cost
= VECT_MAX_COST
;
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1037 "vect_model_store_cost: unsupported access.\n");
1047 /* Function vect_model_load_cost
1049 Models cost for loads. In the case of grouped accesses, the last access
1050 has the overhead of the grouped access attributed to it. Since unaligned
1051 accesses are supported for loads, we also account for the costs of the
1052 access scheme chosen. */
1055 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1056 bool load_lanes_p
, slp_tree slp_node
,
1057 stmt_vector_for_cost
*prologue_cost_vec
,
1058 stmt_vector_for_cost
*body_cost_vec
)
1062 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1063 unsigned int inside_cost
= 0, prologue_cost
= 0;
1065 /* Grouped accesses? */
1066 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1067 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1069 group_size
= vect_cost_group_size (stmt_info
);
1070 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1072 /* Not a grouped access. */
1079 /* We assume that the cost of a single load-lanes instruction is
1080 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1081 access is instead being provided by a load-and-permute operation,
1082 include the cost of the permutes. */
1083 if (!load_lanes_p
&& group_size
> 1
1084 && !STMT_VINFO_STRIDED_P (stmt_info
))
1086 /* Uses an even and odd extract operations or shuffle operations
1087 for each needed permute. */
1088 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1089 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1090 stmt_info
, 0, vect_body
);
1092 if (dump_enabled_p ())
1093 dump_printf_loc (MSG_NOTE
, vect_location
,
1094 "vect_model_load_cost: strided group_size = %d .\n",
1098 /* The loads themselves. */
1099 if (STMT_VINFO_STRIDED_P (stmt_info
)
1100 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1102 /* N scalar loads plus gathering them into a vector. */
1103 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1104 inside_cost
+= record_stmt_cost (body_cost_vec
,
1105 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1106 scalar_load
, stmt_info
, 0, vect_body
);
1109 vect_get_load_cost (first_dr
, ncopies
,
1110 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1111 || group_size
> 1 || slp_node
),
1112 &inside_cost
, &prologue_cost
,
1113 prologue_cost_vec
, body_cost_vec
, true);
1114 if (STMT_VINFO_STRIDED_P (stmt_info
))
1115 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1116 stmt_info
, 0, vect_body
);
1118 if (dump_enabled_p ())
1119 dump_printf_loc (MSG_NOTE
, vect_location
,
1120 "vect_model_load_cost: inside_cost = %d, "
1121 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1125 /* Calculate cost of DR's memory access. */
1127 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1128 bool add_realign_cost
, unsigned int *inside_cost
,
1129 unsigned int *prologue_cost
,
1130 stmt_vector_for_cost
*prologue_cost_vec
,
1131 stmt_vector_for_cost
*body_cost_vec
,
1132 bool record_prologue_costs
)
1134 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1135 gimple
*stmt
= DR_STMT (dr
);
1136 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1138 switch (alignment_support_scheme
)
1142 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1143 stmt_info
, 0, vect_body
);
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE
, vect_location
,
1147 "vect_model_load_cost: aligned.\n");
1151 case dr_unaligned_supported
:
1153 /* Here, we assign an additional cost for the unaligned load. */
1154 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1155 unaligned_load
, stmt_info
,
1156 DR_MISALIGNMENT (dr
), vect_body
);
1158 if (dump_enabled_p ())
1159 dump_printf_loc (MSG_NOTE
, vect_location
,
1160 "vect_model_load_cost: unaligned supported by "
1165 case dr_explicit_realign
:
1167 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1168 vector_load
, stmt_info
, 0, vect_body
);
1169 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1170 vec_perm
, stmt_info
, 0, vect_body
);
1172 /* FIXME: If the misalignment remains fixed across the iterations of
1173 the containing loop, the following cost should be added to the
1175 if (targetm
.vectorize
.builtin_mask_for_load
)
1176 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1177 stmt_info
, 0, vect_body
);
1179 if (dump_enabled_p ())
1180 dump_printf_loc (MSG_NOTE
, vect_location
,
1181 "vect_model_load_cost: explicit realign\n");
1185 case dr_explicit_realign_optimized
:
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE
, vect_location
,
1189 "vect_model_load_cost: unaligned software "
1192 /* Unaligned software pipeline has a load of an address, an initial
1193 load, and possibly a mask operation to "prime" the loop. However,
1194 if this is an access in a group of loads, which provide grouped
1195 access, then the above cost should only be considered for one
1196 access in the group. Inside the loop, there is a load op
1197 and a realignment op. */
1199 if (add_realign_cost
&& record_prologue_costs
)
1201 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1202 vector_stmt
, stmt_info
,
1204 if (targetm
.vectorize
.builtin_mask_for_load
)
1205 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1206 vector_stmt
, stmt_info
,
1210 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1211 stmt_info
, 0, vect_body
);
1212 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1213 stmt_info
, 0, vect_body
);
1215 if (dump_enabled_p ())
1216 dump_printf_loc (MSG_NOTE
, vect_location
,
1217 "vect_model_load_cost: explicit realign optimized"
1223 case dr_unaligned_unsupported
:
1225 *inside_cost
= VECT_MAX_COST
;
1227 if (dump_enabled_p ())
1228 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1229 "vect_model_load_cost: unsupported access.\n");
1238 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1239 the loop preheader for the vectorized stmt STMT. */
1242 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1245 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1248 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1249 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1253 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1257 if (nested_in_vect_loop_p (loop
, stmt
))
1260 pe
= loop_preheader_edge (loop
);
1261 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1262 gcc_assert (!new_bb
);
1266 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1268 gimple_stmt_iterator gsi_bb_start
;
1270 gcc_assert (bb_vinfo
);
1271 bb
= BB_VINFO_BB (bb_vinfo
);
1272 gsi_bb_start
= gsi_after_labels (bb
);
1273 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1277 if (dump_enabled_p ())
1279 dump_printf_loc (MSG_NOTE
, vect_location
,
1280 "created new init_stmt: ");
1281 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1285 /* Function vect_init_vector.
1287 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1288 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1289 vector type a vector with all elements equal to VAL is created first.
1290 Place the initialization at BSI if it is not NULL. Otherwise, place the
1291 initialization at the loop preheader.
1292 Return the DEF of INIT_STMT.
1293 It will be used in the vectorization of STMT. */
1296 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1301 if (TREE_CODE (type
) == VECTOR_TYPE
1302 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1304 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1306 /* Scalar boolean value should be transformed into
1307 all zeros or all ones value before building a vector. */
1308 if (VECTOR_BOOLEAN_TYPE_P (type
))
1310 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1311 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1313 if (CONSTANT_CLASS_P (val
))
1314 val
= integer_zerop (val
) ? false_val
: true_val
;
1317 new_temp
= make_ssa_name (TREE_TYPE (type
));
1318 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1319 val
, true_val
, false_val
);
1320 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1324 else if (CONSTANT_CLASS_P (val
))
1325 val
= fold_convert (TREE_TYPE (type
), val
);
1328 new_temp
= make_ssa_name (TREE_TYPE (type
));
1329 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1330 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1334 val
= build_vector_from_val (type
, val
);
1337 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1338 init_stmt
= gimple_build_assign (new_temp
, val
);
1339 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1344 /* Function vect_get_vec_def_for_operand.
1346 OP is an operand in STMT. This function returns a (vector) def that will be
1347 used in the vectorized stmt for STMT.
1349 In the case that OP is an SSA_NAME which is defined in the loop, then
1350 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1352 In case OP is an invariant or constant, a new stmt that creates a vector def
1353 needs to be introduced. VECTYPE may be used to specify a required type for
1354 vector invariant. */
1357 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1362 stmt_vec_info def_stmt_info
= NULL
;
1363 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1364 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1365 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1366 enum vect_def_type dt
;
1370 if (dump_enabled_p ())
1372 dump_printf_loc (MSG_NOTE
, vect_location
,
1373 "vect_get_vec_def_for_operand: ");
1374 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1375 dump_printf (MSG_NOTE
, "\n");
1378 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1379 gcc_assert (is_simple_use
);
1380 if (dump_enabled_p ())
1382 int loc_printed
= 0;
1386 dump_printf (MSG_NOTE
, " def_stmt = ");
1388 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1389 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1395 /* operand is a constant or a loop invariant. */
1396 case vect_constant_def
:
1397 case vect_external_def
:
1400 vector_type
= vectype
;
1401 else if (TREE_CODE (TREE_TYPE (op
)) == BOOLEAN_TYPE
1402 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1403 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1405 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1407 gcc_assert (vector_type
);
1408 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1411 /* operand is defined inside the loop. */
1412 case vect_internal_def
:
1414 /* Get the def from the vectorized stmt. */
1415 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1417 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1418 /* Get vectorized pattern statement. */
1420 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1421 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1422 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1423 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1424 gcc_assert (vec_stmt
);
1425 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1426 vec_oprnd
= PHI_RESULT (vec_stmt
);
1427 else if (is_gimple_call (vec_stmt
))
1428 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1430 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1434 /* operand is defined by a loop header phi - reduction */
1435 case vect_reduction_def
:
1436 case vect_double_reduction_def
:
1437 case vect_nested_cycle
:
1438 /* Code should use get_initial_def_for_reduction. */
1441 /* operand is defined by loop-header phi - induction. */
1442 case vect_induction_def
:
1444 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1446 /* Get the def from the vectorized stmt. */
1447 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1448 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1449 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1450 vec_oprnd
= PHI_RESULT (vec_stmt
);
1452 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1462 /* Function vect_get_vec_def_for_stmt_copy
1464 Return a vector-def for an operand. This function is used when the
1465 vectorized stmt to be created (by the caller to this function) is a "copy"
1466 created in case the vectorized result cannot fit in one vector, and several
1467 copies of the vector-stmt are required. In this case the vector-def is
1468 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1469 of the stmt that defines VEC_OPRND.
1470 DT is the type of the vector def VEC_OPRND.
1473 In case the vectorization factor (VF) is bigger than the number
1474 of elements that can fit in a vectype (nunits), we have to generate
1475 more than one vector stmt to vectorize the scalar stmt. This situation
1476 arises when there are multiple data-types operated upon in the loop; the
1477 smallest data-type determines the VF, and as a result, when vectorizing
1478 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1479 vector stmt (each computing a vector of 'nunits' results, and together
1480 computing 'VF' results in each iteration). This function is called when
1481 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1482 which VF=16 and nunits=4, so the number of copies required is 4):
1484 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1486 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1487 VS1.1: vx.1 = memref1 VS1.2
1488 VS1.2: vx.2 = memref2 VS1.3
1489 VS1.3: vx.3 = memref3
1491 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1492 VSnew.1: vz1 = vx.1 + ... VSnew.2
1493 VSnew.2: vz2 = vx.2 + ... VSnew.3
1494 VSnew.3: vz3 = vx.3 + ...
1496 The vectorization of S1 is explained in vectorizable_load.
1497 The vectorization of S2:
1498 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1499 the function 'vect_get_vec_def_for_operand' is called to
1500 get the relevant vector-def for each operand of S2. For operand x it
1501 returns the vector-def 'vx.0'.
1503 To create the remaining copies of the vector-stmt (VSnew.j), this
1504 function is called to get the relevant vector-def for each operand. It is
1505 obtained from the respective VS1.j stmt, which is recorded in the
1506 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1508 For example, to obtain the vector-def 'vx.1' in order to create the
1509 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1510 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1511 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1512 and return its def ('vx.1').
1513 Overall, to create the above sequence this function will be called 3 times:
1514 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1515 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1516 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1519 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1521 gimple
*vec_stmt_for_operand
;
1522 stmt_vec_info def_stmt_info
;
1524 /* Do nothing; can reuse same def. */
1525 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1528 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1529 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1530 gcc_assert (def_stmt_info
);
1531 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1532 gcc_assert (vec_stmt_for_operand
);
1533 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1534 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1536 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1541 /* Get vectorized definitions for the operands to create a copy of an original
1542 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1545 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1546 vec
<tree
> *vec_oprnds0
,
1547 vec
<tree
> *vec_oprnds1
)
1549 tree vec_oprnd
= vec_oprnds0
->pop ();
1551 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1552 vec_oprnds0
->quick_push (vec_oprnd
);
1554 if (vec_oprnds1
&& vec_oprnds1
->length ())
1556 vec_oprnd
= vec_oprnds1
->pop ();
1557 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1558 vec_oprnds1
->quick_push (vec_oprnd
);
1563 /* Get vectorized definitions for OP0 and OP1.
1564 REDUC_INDEX is the index of reduction operand in case of reduction,
1565 and -1 otherwise. */
1568 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1569 vec
<tree
> *vec_oprnds0
,
1570 vec
<tree
> *vec_oprnds1
,
1571 slp_tree slp_node
, int reduc_index
)
1575 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1576 auto_vec
<tree
> ops (nops
);
1577 auto_vec
<vec
<tree
> > vec_defs (nops
);
1579 ops
.quick_push (op0
);
1581 ops
.quick_push (op1
);
1583 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1585 *vec_oprnds0
= vec_defs
[0];
1587 *vec_oprnds1
= vec_defs
[1];
1593 vec_oprnds0
->create (1);
1594 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1595 vec_oprnds0
->quick_push (vec_oprnd
);
1599 vec_oprnds1
->create (1);
1600 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1601 vec_oprnds1
->quick_push (vec_oprnd
);
1607 /* Function vect_finish_stmt_generation.
1609 Insert a new stmt. */
1612 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1613 gimple_stmt_iterator
*gsi
)
1615 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1616 vec_info
*vinfo
= stmt_info
->vinfo
;
1618 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1620 if (!gsi_end_p (*gsi
)
1621 && gimple_has_mem_ops (vec_stmt
))
1623 gimple
*at_stmt
= gsi_stmt (*gsi
);
1624 tree vuse
= gimple_vuse (at_stmt
);
1625 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1627 tree vdef
= gimple_vdef (at_stmt
);
1628 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1629 /* If we have an SSA vuse and insert a store, update virtual
1630 SSA form to avoid triggering the renamer. Do so only
1631 if we can easily see all uses - which is what almost always
1632 happens with the way vectorized stmts are inserted. */
1633 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1634 && ((is_gimple_assign (vec_stmt
)
1635 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1636 || (is_gimple_call (vec_stmt
)
1637 && !(gimple_call_flags (vec_stmt
)
1638 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1640 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1641 gimple_set_vdef (vec_stmt
, new_vdef
);
1642 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1646 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1648 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1650 if (dump_enabled_p ())
1652 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1653 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1656 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1658 /* While EH edges will generally prevent vectorization, stmt might
1659 e.g. be in a must-not-throw region. Ensure newly created stmts
1660 that could throw are part of the same region. */
1661 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1662 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1663 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1666 /* We want to vectorize a call to combined function CFN with function
1667 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1668 as the types of all inputs. Check whether this is possible using
1669 an internal function, returning its code if so or IFN_LAST if not. */
1672 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1673 tree vectype_out
, tree vectype_in
)
1676 if (internal_fn_p (cfn
))
1677 ifn
= as_internal_fn (cfn
);
1679 ifn
= associated_internal_fn (fndecl
);
1680 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1682 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1683 if (info
.vectorizable
)
1685 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1686 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1687 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1688 OPTIMIZE_FOR_SPEED
))
1696 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1697 gimple_stmt_iterator
*);
1700 /* Function vectorizable_mask_load_store.
1702 Check if STMT performs a conditional load or store that can be vectorized.
1703 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1704 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1705 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1708 vectorizable_mask_load_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
1709 gimple
**vec_stmt
, slp_tree slp_node
)
1711 tree vec_dest
= NULL
;
1712 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1713 stmt_vec_info prev_stmt_info
;
1714 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1715 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1716 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1717 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1718 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1719 tree rhs_vectype
= NULL_TREE
;
1724 tree dataref_ptr
= NULL_TREE
;
1726 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1730 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1731 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1732 int gather_scale
= 1;
1733 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1737 enum vect_def_type dt
;
1739 if (slp_node
!= NULL
)
1742 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1743 gcc_assert (ncopies
>= 1);
1745 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1746 mask
= gimple_call_arg (stmt
, 2);
1748 if (TREE_CODE (TREE_TYPE (mask
)) != BOOLEAN_TYPE
)
1751 /* FORNOW. This restriction should be relaxed. */
1752 if (nested_in_vect_loop
&& ncopies
> 1)
1754 if (dump_enabled_p ())
1755 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1756 "multiple types in nested loop.");
1760 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1763 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
1767 if (!STMT_VINFO_DATA_REF (stmt_info
))
1770 elem_type
= TREE_TYPE (vectype
);
1772 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1775 if (STMT_VINFO_STRIDED_P (stmt_info
))
1778 if (TREE_CODE (mask
) != SSA_NAME
)
1781 if (!vect_is_simple_use (mask
, loop_vinfo
, &def_stmt
, &dt
, &mask_vectype
))
1785 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
1787 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
1792 tree rhs
= gimple_call_arg (stmt
, 3);
1793 if (!vect_is_simple_use (rhs
, loop_vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
1797 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1800 gather_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &gather_base
,
1801 &gather_off
, &gather_scale
);
1802 gcc_assert (gather_decl
);
1803 if (!vect_is_simple_use (gather_off
, loop_vinfo
, &def_stmt
, &gather_dt
,
1804 &gather_off_vectype
))
1806 if (dump_enabled_p ())
1807 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1808 "gather index use not simple.");
1812 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1814 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1815 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1817 if (dump_enabled_p ())
1818 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1819 "masked gather with integer mask not supported.");
1823 else if (tree_int_cst_compare (nested_in_vect_loop
1824 ? STMT_VINFO_DR_STEP (stmt_info
)
1825 : DR_STEP (dr
), size_zero_node
) <= 0)
1827 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1828 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
),
1829 TYPE_MODE (mask_vectype
),
1832 && !useless_type_conversion_p (vectype
, rhs_vectype
)))
1835 if (!vec_stmt
) /* transformation not required. */
1837 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1839 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1842 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1848 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1850 tree vec_oprnd0
= NULL_TREE
, op
;
1851 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1852 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1853 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1854 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1855 tree mask_perm_mask
= NULL_TREE
;
1856 edge pe
= loop_preheader_edge (loop
);
1859 enum { NARROW
, NONE
, WIDEN
} modifier
;
1860 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1862 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1863 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1864 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1865 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1866 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1867 scaletype
= TREE_VALUE (arglist
);
1868 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1869 && types_compatible_p (srctype
, masktype
));
1871 if (nunits
== gather_off_nunits
)
1873 else if (nunits
== gather_off_nunits
/ 2)
1875 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1878 for (i
= 0; i
< gather_off_nunits
; ++i
)
1879 sel
[i
] = i
| nunits
;
1881 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
1883 else if (nunits
== gather_off_nunits
* 2)
1885 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1888 for (i
= 0; i
< nunits
; ++i
)
1889 sel
[i
] = i
< gather_off_nunits
1890 ? i
: i
+ nunits
- gather_off_nunits
;
1892 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
1894 for (i
= 0; i
< nunits
; ++i
)
1895 sel
[i
] = i
| gather_off_nunits
;
1896 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
1901 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1903 ptr
= fold_convert (ptrtype
, gather_base
);
1904 if (!is_gimple_min_invariant (ptr
))
1906 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1907 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1908 gcc_assert (!new_bb
);
1911 scale
= build_int_cst (scaletype
, gather_scale
);
1913 prev_stmt_info
= NULL
;
1914 for (j
= 0; j
< ncopies
; ++j
)
1916 if (modifier
== WIDEN
&& (j
& 1))
1917 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1918 perm_mask
, stmt
, gsi
);
1921 = vect_get_vec_def_for_operand (gather_off
, stmt
);
1924 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1926 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1928 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1929 == TYPE_VECTOR_SUBPARTS (idxtype
));
1930 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
1931 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1933 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1934 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1938 if (mask_perm_mask
&& (j
& 1))
1939 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1940 mask_perm_mask
, stmt
, gsi
);
1944 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
1947 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
1948 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
1952 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
1954 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
1955 == TYPE_VECTOR_SUBPARTS (masktype
));
1956 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
1957 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
1959 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
1960 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1966 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
1969 if (!useless_type_conversion_p (vectype
, rettype
))
1971 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
1972 == TYPE_VECTOR_SUBPARTS (rettype
));
1973 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
1974 gimple_call_set_lhs (new_stmt
, op
);
1975 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1976 var
= make_ssa_name (vec_dest
);
1977 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
1978 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1982 var
= make_ssa_name (vec_dest
, new_stmt
);
1983 gimple_call_set_lhs (new_stmt
, var
);
1986 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1988 if (modifier
== NARROW
)
1995 var
= permute_vec_elements (prev_res
, var
,
1996 perm_mask
, stmt
, gsi
);
1997 new_stmt
= SSA_NAME_DEF_STMT (var
);
2000 if (prev_stmt_info
== NULL
)
2001 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2003 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2004 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2007 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2009 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2011 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2012 stmt_info
= vinfo_for_stmt (stmt
);
2014 tree lhs
= gimple_call_lhs (stmt
);
2015 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2016 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2017 set_vinfo_for_stmt (stmt
, NULL
);
2018 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2019 gsi_replace (gsi
, new_stmt
, true);
2024 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2025 prev_stmt_info
= NULL
;
2026 for (i
= 0; i
< ncopies
; i
++)
2028 unsigned align
, misalign
;
2032 tree rhs
= gimple_call_arg (stmt
, 3);
2033 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
);
2034 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2035 /* We should have catched mismatched types earlier. */
2036 gcc_assert (useless_type_conversion_p (vectype
,
2037 TREE_TYPE (vec_rhs
)));
2038 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2039 NULL_TREE
, &dummy
, gsi
,
2040 &ptr_incr
, false, &inv_p
);
2041 gcc_assert (!inv_p
);
2045 vect_is_simple_use (vec_rhs
, loop_vinfo
, &def_stmt
, &dt
);
2046 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2047 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2048 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2049 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2050 TYPE_SIZE_UNIT (vectype
));
2053 align
= TYPE_ALIGN_UNIT (vectype
);
2054 if (aligned_access_p (dr
))
2056 else if (DR_MISALIGNMENT (dr
) == -1)
2058 align
= TYPE_ALIGN_UNIT (elem_type
);
2062 misalign
= DR_MISALIGNMENT (dr
);
2063 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2065 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2066 misalign
? misalign
& -misalign
: align
);
2068 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2069 ptr
, vec_mask
, vec_rhs
);
2070 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2072 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2074 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2075 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2080 tree vec_mask
= NULL_TREE
;
2081 prev_stmt_info
= NULL
;
2082 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2083 for (i
= 0; i
< ncopies
; i
++)
2085 unsigned align
, misalign
;
2089 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2090 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2091 NULL_TREE
, &dummy
, gsi
,
2092 &ptr_incr
, false, &inv_p
);
2093 gcc_assert (!inv_p
);
2097 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2098 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2099 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2100 TYPE_SIZE_UNIT (vectype
));
2103 align
= TYPE_ALIGN_UNIT (vectype
);
2104 if (aligned_access_p (dr
))
2106 else if (DR_MISALIGNMENT (dr
) == -1)
2108 align
= TYPE_ALIGN_UNIT (elem_type
);
2112 misalign
= DR_MISALIGNMENT (dr
);
2113 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2115 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2116 misalign
? misalign
& -misalign
: align
);
2118 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2120 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2121 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2123 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2125 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2126 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2132 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2134 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2136 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2137 stmt_info
= vinfo_for_stmt (stmt
);
2139 tree lhs
= gimple_call_lhs (stmt
);
2140 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2141 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2142 set_vinfo_for_stmt (stmt
, NULL
);
2143 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2144 gsi_replace (gsi
, new_stmt
, true);
2150 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2151 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2152 in a single step. On success, store the binary pack code in
2156 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2157 tree_code
*convert_code
)
2159 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2160 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2164 int multi_step_cvt
= 0;
2165 auto_vec
<tree
, 8> interm_types
;
2166 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2167 &code
, &multi_step_cvt
,
2172 *convert_code
= code
;
2176 /* Function vectorizable_call.
2178 Check if GS performs a function call that can be vectorized.
2179 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2180 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2181 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2184 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2191 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2192 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2193 tree vectype_out
, vectype_in
;
2196 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2197 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2198 vec_info
*vinfo
= stmt_info
->vinfo
;
2199 tree fndecl
, new_temp
, rhs_type
;
2201 enum vect_def_type dt
[3]
2202 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2203 gimple
*new_stmt
= NULL
;
2205 vec
<tree
> vargs
= vNULL
;
2206 enum { NARROW
, NONE
, WIDEN
} modifier
;
2210 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2213 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2217 /* Is GS a vectorizable call? */
2218 stmt
= dyn_cast
<gcall
*> (gs
);
2222 if (gimple_call_internal_p (stmt
)
2223 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2224 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2225 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2228 if (gimple_call_lhs (stmt
) == NULL_TREE
2229 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2232 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2234 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2236 /* Process function arguments. */
2237 rhs_type
= NULL_TREE
;
2238 vectype_in
= NULL_TREE
;
2239 nargs
= gimple_call_num_args (stmt
);
2241 /* Bail out if the function has more than three arguments, we do not have
2242 interesting builtin functions to vectorize with more than two arguments
2243 except for fma. No arguments is also not good. */
2244 if (nargs
== 0 || nargs
> 3)
2247 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2248 if (gimple_call_internal_p (stmt
)
2249 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2252 rhs_type
= unsigned_type_node
;
2255 for (i
= 0; i
< nargs
; i
++)
2259 op
= gimple_call_arg (stmt
, i
);
2261 /* We can only handle calls with arguments of the same type. */
2263 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2265 if (dump_enabled_p ())
2266 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2267 "argument types differ.\n");
2271 rhs_type
= TREE_TYPE (op
);
2273 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
2275 if (dump_enabled_p ())
2276 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2277 "use not simple.\n");
2282 vectype_in
= opvectype
;
2284 && opvectype
!= vectype_in
)
2286 if (dump_enabled_p ())
2287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2288 "argument vector types differ.\n");
2292 /* If all arguments are external or constant defs use a vector type with
2293 the same size as the output vector type. */
2295 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2297 gcc_assert (vectype_in
);
2300 if (dump_enabled_p ())
2302 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2303 "no vectype for scalar type ");
2304 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2305 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2312 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2313 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2314 if (nunits_in
== nunits_out
/ 2)
2316 else if (nunits_out
== nunits_in
)
2318 else if (nunits_out
== nunits_in
/ 2)
2323 /* We only handle functions that do not read or clobber memory. */
2324 if (gimple_vuse (stmt
))
2326 if (dump_enabled_p ())
2327 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2328 "function reads from or writes to memory.\n");
2332 /* For now, we only vectorize functions if a target specific builtin
2333 is available. TODO -- in some cases, it might be profitable to
2334 insert the calls for pieces of the vector, in order to be able
2335 to vectorize other operations in the loop. */
2337 internal_fn ifn
= IFN_LAST
;
2338 combined_fn cfn
= gimple_call_combined_fn (stmt
);
2339 tree callee
= gimple_call_fndecl (stmt
);
2341 /* First try using an internal function. */
2342 tree_code convert_code
= ERROR_MARK
;
2344 && (modifier
== NONE
2345 || (modifier
== NARROW
2346 && simple_integer_narrowing (vectype_out
, vectype_in
,
2348 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
2351 /* If that fails, try asking for a target-specific built-in function. */
2352 if (ifn
== IFN_LAST
)
2354 if (cfn
!= CFN_LAST
)
2355 fndecl
= targetm
.vectorize
.builtin_vectorized_function
2356 (cfn
, vectype_out
, vectype_in
);
2358 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
2359 (callee
, vectype_out
, vectype_in
);
2362 if (ifn
== IFN_LAST
&& !fndecl
)
2364 if (cfn
== CFN_GOMP_SIMD_LANE
2367 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2368 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2369 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2370 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2372 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2373 { 0, 1, 2, ... vf - 1 } vector. */
2374 gcc_assert (nargs
== 0);
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2380 "function is not vectorizable.\n");
2385 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2387 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
2388 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2390 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2392 /* Sanity check: make sure that at least one copy of the vectorized stmt
2393 needs to be generated. */
2394 gcc_assert (ncopies
>= 1);
2396 if (!vec_stmt
) /* transformation not required. */
2398 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2399 if (dump_enabled_p ())
2400 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2402 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2403 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
2404 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
2405 vec_promote_demote
, stmt_info
, 0, vect_body
);
2412 if (dump_enabled_p ())
2413 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2416 scalar_dest
= gimple_call_lhs (stmt
);
2417 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2419 prev_stmt_info
= NULL
;
2420 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
2422 tree prev_res
= NULL_TREE
;
2423 for (j
= 0; j
< ncopies
; ++j
)
2425 /* Build argument list for the vectorized call. */
2427 vargs
.create (nargs
);
2433 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2434 vec
<tree
> vec_oprnds0
;
2436 for (i
= 0; i
< nargs
; i
++)
2437 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2438 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2439 vec_oprnds0
= vec_defs
[0];
2441 /* Arguments are ready. Create the new vector stmt. */
2442 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2445 for (k
= 0; k
< nargs
; k
++)
2447 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2448 vargs
[k
] = vec_oprndsk
[i
];
2450 if (modifier
== NARROW
)
2452 tree half_res
= make_ssa_name (vectype_in
);
2453 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2454 gimple_call_set_lhs (new_stmt
, half_res
);
2455 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2458 prev_res
= half_res
;
2461 new_temp
= make_ssa_name (vec_dest
);
2462 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2463 prev_res
, half_res
);
2467 if (ifn
!= IFN_LAST
)
2468 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2470 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2471 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2472 gimple_call_set_lhs (new_stmt
, new_temp
);
2474 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2475 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2478 for (i
= 0; i
< nargs
; i
++)
2480 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2481 vec_oprndsi
.release ();
2486 for (i
= 0; i
< nargs
; i
++)
2488 op
= gimple_call_arg (stmt
, i
);
2491 = vect_get_vec_def_for_operand (op
, stmt
);
2494 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2496 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2499 vargs
.quick_push (vec_oprnd0
);
2502 if (gimple_call_internal_p (stmt
)
2503 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2505 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2507 for (k
= 0; k
< nunits_out
; ++k
)
2508 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2509 tree cst
= build_vector (vectype_out
, v
);
2511 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
2512 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
2513 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2514 new_temp
= make_ssa_name (vec_dest
);
2515 new_stmt
= gimple_build_assign (new_temp
, new_var
);
2517 else if (modifier
== NARROW
)
2519 tree half_res
= make_ssa_name (vectype_in
);
2520 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2521 gimple_call_set_lhs (new_stmt
, half_res
);
2522 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2525 prev_res
= half_res
;
2528 new_temp
= make_ssa_name (vec_dest
);
2529 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2530 prev_res
, half_res
);
2534 if (ifn
!= IFN_LAST
)
2535 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2537 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2538 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2539 gimple_call_set_lhs (new_stmt
, new_temp
);
2541 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2543 if (j
== (modifier
== NARROW
? 1 : 0))
2544 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2546 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2548 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2551 else if (modifier
== NARROW
)
2553 for (j
= 0; j
< ncopies
; ++j
)
2555 /* Build argument list for the vectorized call. */
2557 vargs
.create (nargs
* 2);
2563 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2564 vec
<tree
> vec_oprnds0
;
2566 for (i
= 0; i
< nargs
; i
++)
2567 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2568 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2569 vec_oprnds0
= vec_defs
[0];
2571 /* Arguments are ready. Create the new vector stmt. */
2572 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2576 for (k
= 0; k
< nargs
; k
++)
2578 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2579 vargs
.quick_push (vec_oprndsk
[i
]);
2580 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2582 if (ifn
!= IFN_LAST
)
2583 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2585 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2586 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2587 gimple_call_set_lhs (new_stmt
, new_temp
);
2588 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2589 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2592 for (i
= 0; i
< nargs
; i
++)
2594 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2595 vec_oprndsi
.release ();
2600 for (i
= 0; i
< nargs
; i
++)
2602 op
= gimple_call_arg (stmt
, i
);
2606 = vect_get_vec_def_for_operand (op
, stmt
);
2608 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2612 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2614 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2616 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2619 vargs
.quick_push (vec_oprnd0
);
2620 vargs
.quick_push (vec_oprnd1
);
2623 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2624 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2625 gimple_call_set_lhs (new_stmt
, new_temp
);
2626 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2629 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2631 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2633 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2636 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2639 /* No current target implements this case. */
2644 /* The call in STMT might prevent it from being removed in dce.
2645 We however cannot remove it here, due to the way the ssa name
2646 it defines is mapped to the new definition. So just replace
2647 rhs of the statement with something harmless. */
2652 type
= TREE_TYPE (scalar_dest
);
2653 if (is_pattern_stmt_p (stmt_info
))
2654 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2656 lhs
= gimple_call_lhs (stmt
);
2658 if (gimple_call_internal_p (stmt
)
2659 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2661 /* Replace uses of the lhs of GOMP_SIMD_LANE call outside the loop
2662 with vf - 1 rather than 0, that is the last iteration of the
2664 imm_use_iterator iter
;
2665 use_operand_p use_p
;
2667 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
2669 basic_block use_bb
= gimple_bb (use_stmt
);
2671 && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo
), use_bb
))
2673 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2674 SET_USE (use_p
, build_int_cst (TREE_TYPE (lhs
),
2675 ncopies
* nunits_out
- 1));
2676 update_stmt (use_stmt
);
2681 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2682 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2683 set_vinfo_for_stmt (stmt
, NULL
);
2684 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2685 gsi_replace (gsi
, new_stmt
, false);
2691 struct simd_call_arg_info
2695 enum vect_def_type dt
;
2696 HOST_WIDE_INT linear_step
;
2698 bool simd_lane_linear
;
2701 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2702 is linear within simd lane (but not within whole loop), note it in
2706 vect_simd_lane_linear (tree op
, struct loop
*loop
,
2707 struct simd_call_arg_info
*arginfo
)
2709 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
2711 if (!is_gimple_assign (def_stmt
)
2712 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
2713 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
2716 tree base
= gimple_assign_rhs1 (def_stmt
);
2717 HOST_WIDE_INT linear_step
= 0;
2718 tree v
= gimple_assign_rhs2 (def_stmt
);
2719 while (TREE_CODE (v
) == SSA_NAME
)
2722 def_stmt
= SSA_NAME_DEF_STMT (v
);
2723 if (is_gimple_assign (def_stmt
))
2724 switch (gimple_assign_rhs_code (def_stmt
))
2727 t
= gimple_assign_rhs2 (def_stmt
);
2728 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
2730 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
2731 v
= gimple_assign_rhs1 (def_stmt
);
2734 t
= gimple_assign_rhs2 (def_stmt
);
2735 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
2737 linear_step
= tree_to_shwi (t
);
2738 v
= gimple_assign_rhs1 (def_stmt
);
2741 t
= gimple_assign_rhs1 (def_stmt
);
2742 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
2743 || (TYPE_PRECISION (TREE_TYPE (v
))
2744 < TYPE_PRECISION (TREE_TYPE (t
))))
2753 else if (is_gimple_call (def_stmt
)
2754 && gimple_call_internal_p (def_stmt
)
2755 && gimple_call_internal_fn (def_stmt
) == IFN_GOMP_SIMD_LANE
2757 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
2758 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
2763 arginfo
->linear_step
= linear_step
;
2765 arginfo
->simd_lane_linear
= true;
2771 /* Function vectorizable_simd_clone_call.
2773 Check if STMT performs a function call that can be vectorized
2774 by calling a simd clone of the function.
2775 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2776 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2777 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2780 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2781 gimple
**vec_stmt
, slp_tree slp_node
)
2786 tree vec_oprnd0
= NULL_TREE
;
2787 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2789 unsigned int nunits
;
2790 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2791 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2792 vec_info
*vinfo
= stmt_info
->vinfo
;
2793 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2794 tree fndecl
, new_temp
;
2796 gimple
*new_stmt
= NULL
;
2798 vec
<simd_call_arg_info
> arginfo
= vNULL
;
2799 vec
<tree
> vargs
= vNULL
;
2801 tree lhs
, rtype
, ratype
;
2802 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2804 /* Is STMT a vectorizable call? */
2805 if (!is_gimple_call (stmt
))
2808 fndecl
= gimple_call_fndecl (stmt
);
2809 if (fndecl
== NULL_TREE
)
2812 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2813 if (node
== NULL
|| node
->simd_clones
== NULL
)
2816 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2819 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2823 if (gimple_call_lhs (stmt
)
2824 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2827 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2829 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2831 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2835 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2838 /* Process function arguments. */
2839 nargs
= gimple_call_num_args (stmt
);
2841 /* Bail out if the function has zero arguments. */
2845 arginfo
.create (nargs
);
2847 for (i
= 0; i
< nargs
; i
++)
2849 simd_call_arg_info thisarginfo
;
2852 thisarginfo
.linear_step
= 0;
2853 thisarginfo
.align
= 0;
2854 thisarginfo
.op
= NULL_TREE
;
2855 thisarginfo
.simd_lane_linear
= false;
2857 op
= gimple_call_arg (stmt
, i
);
2858 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
2859 &thisarginfo
.vectype
)
2860 || thisarginfo
.dt
== vect_uninitialized_def
)
2862 if (dump_enabled_p ())
2863 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2864 "use not simple.\n");
2869 if (thisarginfo
.dt
== vect_constant_def
2870 || thisarginfo
.dt
== vect_external_def
)
2871 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2873 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2875 /* For linear arguments, the analyze phase should have saved
2876 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2877 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
2878 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
2880 gcc_assert (vec_stmt
);
2881 thisarginfo
.linear_step
2882 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
2884 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
2885 thisarginfo
.simd_lane_linear
2886 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
2887 == boolean_true_node
);
2888 /* If loop has been peeled for alignment, we need to adjust it. */
2889 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
2890 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
2891 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
2893 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
2894 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
2895 tree opt
= TREE_TYPE (thisarginfo
.op
);
2896 bias
= fold_convert (TREE_TYPE (step
), bias
);
2897 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
2899 = fold_build2 (POINTER_TYPE_P (opt
)
2900 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
2901 thisarginfo
.op
, bias
);
2905 && thisarginfo
.dt
!= vect_constant_def
2906 && thisarginfo
.dt
!= vect_external_def
2908 && TREE_CODE (op
) == SSA_NAME
2909 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
2911 && tree_fits_shwi_p (iv
.step
))
2913 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2914 thisarginfo
.op
= iv
.base
;
2916 else if ((thisarginfo
.dt
== vect_constant_def
2917 || thisarginfo
.dt
== vect_external_def
)
2918 && POINTER_TYPE_P (TREE_TYPE (op
)))
2919 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2920 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2922 if (POINTER_TYPE_P (TREE_TYPE (op
))
2923 && !thisarginfo
.linear_step
2925 && thisarginfo
.dt
!= vect_constant_def
2926 && thisarginfo
.dt
!= vect_external_def
2929 && TREE_CODE (op
) == SSA_NAME
)
2930 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
2932 arginfo
.quick_push (thisarginfo
);
2935 unsigned int badness
= 0;
2936 struct cgraph_node
*bestn
= NULL
;
2937 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
2938 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
2940 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2941 n
= n
->simdclone
->next_clone
)
2943 unsigned int this_badness
= 0;
2944 if (n
->simdclone
->simdlen
2945 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2946 || n
->simdclone
->nargs
!= nargs
)
2948 if (n
->simdclone
->simdlen
2949 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2950 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2951 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2952 if (n
->simdclone
->inbranch
)
2953 this_badness
+= 2048;
2954 int target_badness
= targetm
.simd_clone
.usable (n
);
2955 if (target_badness
< 0)
2957 this_badness
+= target_badness
* 512;
2958 /* FORNOW: Have to add code to add the mask argument. */
2959 if (n
->simdclone
->inbranch
)
2961 for (i
= 0; i
< nargs
; i
++)
2963 switch (n
->simdclone
->args
[i
].arg_type
)
2965 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2966 if (!useless_type_conversion_p
2967 (n
->simdclone
->args
[i
].orig_type
,
2968 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2970 else if (arginfo
[i
].dt
== vect_constant_def
2971 || arginfo
[i
].dt
== vect_external_def
2972 || arginfo
[i
].linear_step
)
2975 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2976 if (arginfo
[i
].dt
!= vect_constant_def
2977 && arginfo
[i
].dt
!= vect_external_def
)
2980 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2981 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
2982 if (arginfo
[i
].dt
== vect_constant_def
2983 || arginfo
[i
].dt
== vect_external_def
2984 || (arginfo
[i
].linear_step
2985 != n
->simdclone
->args
[i
].linear_step
))
2988 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2989 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
2990 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
2991 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
2992 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
2993 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
2997 case SIMD_CLONE_ARG_TYPE_MASK
:
3000 if (i
== (size_t) -1)
3002 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3007 if (arginfo
[i
].align
)
3008 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3009 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3011 if (i
== (size_t) -1)
3013 if (bestn
== NULL
|| this_badness
< badness
)
3016 badness
= this_badness
;
3026 for (i
= 0; i
< nargs
; i
++)
3027 if ((arginfo
[i
].dt
== vect_constant_def
3028 || arginfo
[i
].dt
== vect_external_def
)
3029 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3032 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3034 if (arginfo
[i
].vectype
== NULL
3035 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3036 > bestn
->simdclone
->simdlen
))
3043 fndecl
= bestn
->decl
;
3044 nunits
= bestn
->simdclone
->simdlen
;
3045 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3047 /* If the function isn't const, only allow it in simd loops where user
3048 has asserted that at least nunits consecutive iterations can be
3049 performed using SIMD instructions. */
3050 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3051 && gimple_vuse (stmt
))
3057 /* Sanity check: make sure that at least one copy of the vectorized stmt
3058 needs to be generated. */
3059 gcc_assert (ncopies
>= 1);
3061 if (!vec_stmt
) /* transformation not required. */
3063 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3064 for (i
= 0; i
< nargs
; i
++)
3065 if (bestn
->simdclone
->args
[i
].arg_type
3066 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3068 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3070 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3071 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3072 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3073 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3074 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3075 tree sll
= arginfo
[i
].simd_lane_linear
3076 ? boolean_true_node
: boolean_false_node
;
3077 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3079 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3080 if (dump_enabled_p ())
3081 dump_printf_loc (MSG_NOTE
, vect_location
,
3082 "=== vectorizable_simd_clone_call ===\n");
3083 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3090 if (dump_enabled_p ())
3091 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3094 scalar_dest
= gimple_call_lhs (stmt
);
3095 vec_dest
= NULL_TREE
;
3100 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3101 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3102 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3105 rtype
= TREE_TYPE (ratype
);
3109 prev_stmt_info
= NULL
;
3110 for (j
= 0; j
< ncopies
; ++j
)
3112 /* Build argument list for the vectorized call. */
3114 vargs
.create (nargs
);
3118 for (i
= 0; i
< nargs
; i
++)
3120 unsigned int k
, l
, m
, o
;
3122 op
= gimple_call_arg (stmt
, i
);
3123 switch (bestn
->simdclone
->args
[i
].arg_type
)
3125 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3126 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3127 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
3128 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3130 if (TYPE_VECTOR_SUBPARTS (atype
)
3131 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
3133 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3134 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3135 / TYPE_VECTOR_SUBPARTS (atype
));
3136 gcc_assert ((k
& (k
- 1)) == 0);
3139 = vect_get_vec_def_for_operand (op
, stmt
);
3142 vec_oprnd0
= arginfo
[i
].op
;
3143 if ((m
& (k
- 1)) == 0)
3145 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3148 arginfo
[i
].op
= vec_oprnd0
;
3150 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3152 bitsize_int ((m
& (k
- 1)) * prec
));
3154 = gimple_build_assign (make_ssa_name (atype
),
3156 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3157 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3161 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3162 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3163 gcc_assert ((k
& (k
- 1)) == 0);
3164 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3166 vec_alloc (ctor_elts
, k
);
3169 for (l
= 0; l
< k
; l
++)
3171 if (m
== 0 && l
== 0)
3173 = vect_get_vec_def_for_operand (op
, stmt
);
3176 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3178 arginfo
[i
].op
= vec_oprnd0
;
3181 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3185 vargs
.safe_push (vec_oprnd0
);
3188 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3190 = gimple_build_assign (make_ssa_name (atype
),
3192 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3193 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3198 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3199 vargs
.safe_push (op
);
3201 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3206 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3211 edge pe
= loop_preheader_edge (loop
);
3212 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3213 gcc_assert (!new_bb
);
3215 if (arginfo
[i
].simd_lane_linear
)
3217 vargs
.safe_push (arginfo
[i
].op
);
3220 tree phi_res
= copy_ssa_name (op
);
3221 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3222 set_vinfo_for_stmt (new_phi
,
3223 new_stmt_vec_info (new_phi
, loop_vinfo
));
3224 add_phi_arg (new_phi
, arginfo
[i
].op
,
3225 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3227 = POINTER_TYPE_P (TREE_TYPE (op
))
3228 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3229 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3230 ? sizetype
: TREE_TYPE (op
);
3232 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3234 tree tcst
= wide_int_to_tree (type
, cst
);
3235 tree phi_arg
= copy_ssa_name (op
);
3237 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3238 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3239 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3240 set_vinfo_for_stmt (new_stmt
,
3241 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3242 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3244 arginfo
[i
].op
= phi_res
;
3245 vargs
.safe_push (phi_res
);
3250 = POINTER_TYPE_P (TREE_TYPE (op
))
3251 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3252 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3253 ? sizetype
: TREE_TYPE (op
);
3255 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3257 tree tcst
= wide_int_to_tree (type
, cst
);
3258 new_temp
= make_ssa_name (TREE_TYPE (op
));
3259 new_stmt
= gimple_build_assign (new_temp
, code
,
3260 arginfo
[i
].op
, tcst
);
3261 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3262 vargs
.safe_push (new_temp
);
3265 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3266 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3267 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3268 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3274 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3277 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3279 new_temp
= create_tmp_var (ratype
);
3280 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3281 == TYPE_VECTOR_SUBPARTS (rtype
))
3282 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3284 new_temp
= make_ssa_name (rtype
, new_stmt
);
3285 gimple_call_set_lhs (new_stmt
, new_temp
);
3287 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3291 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3294 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3295 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3296 gcc_assert ((k
& (k
- 1)) == 0);
3297 for (l
= 0; l
< k
; l
++)
3302 t
= build_fold_addr_expr (new_temp
);
3303 t
= build2 (MEM_REF
, vectype
, t
,
3304 build_int_cst (TREE_TYPE (t
),
3305 l
* prec
/ BITS_PER_UNIT
));
3308 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3309 size_int (prec
), bitsize_int (l
* prec
));
3311 = gimple_build_assign (make_ssa_name (vectype
), t
);
3312 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3313 if (j
== 0 && l
== 0)
3314 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3316 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3318 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3323 tree clobber
= build_constructor (ratype
, NULL
);
3324 TREE_THIS_VOLATILE (clobber
) = 1;
3325 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3326 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3330 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3332 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3333 / TYPE_VECTOR_SUBPARTS (rtype
));
3334 gcc_assert ((k
& (k
- 1)) == 0);
3335 if ((j
& (k
- 1)) == 0)
3336 vec_alloc (ret_ctor_elts
, k
);
3339 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3340 for (m
= 0; m
< o
; m
++)
3342 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3343 size_int (m
), NULL_TREE
, NULL_TREE
);
3345 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3346 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3347 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3348 gimple_assign_lhs (new_stmt
));
3350 tree clobber
= build_constructor (ratype
, NULL
);
3351 TREE_THIS_VOLATILE (clobber
) = 1;
3352 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3353 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3356 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3357 if ((j
& (k
- 1)) != k
- 1)
3359 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3361 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3362 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3364 if ((unsigned) j
== k
- 1)
3365 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3367 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3369 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3374 tree t
= build_fold_addr_expr (new_temp
);
3375 t
= build2 (MEM_REF
, vectype
, t
,
3376 build_int_cst (TREE_TYPE (t
), 0));
3378 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3379 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3380 tree clobber
= build_constructor (ratype
, NULL
);
3381 TREE_THIS_VOLATILE (clobber
) = 1;
3382 vect_finish_stmt_generation (stmt
,
3383 gimple_build_assign (new_temp
,
3389 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3391 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3393 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3398 /* The call in STMT might prevent it from being removed in dce.
3399 We however cannot remove it here, due to the way the ssa name
3400 it defines is mapped to the new definition. So just replace
3401 rhs of the statement with something harmless. */
3408 type
= TREE_TYPE (scalar_dest
);
3409 if (is_pattern_stmt_p (stmt_info
))
3410 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3412 lhs
= gimple_call_lhs (stmt
);
3413 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3416 new_stmt
= gimple_build_nop ();
3417 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3418 set_vinfo_for_stmt (stmt
, NULL
);
3419 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3420 gsi_replace (gsi
, new_stmt
, true);
3421 unlink_stmt_vdef (stmt
);
3427 /* Function vect_gen_widened_results_half
3429 Create a vector stmt whose code, type, number of arguments, and result
3430 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3431 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3432 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3433 needs to be created (DECL is a function-decl of a target-builtin).
3434 STMT is the original scalar stmt that we are vectorizing. */
3437 vect_gen_widened_results_half (enum tree_code code
,
3439 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3440 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3446 /* Generate half of the widened result: */
3447 if (code
== CALL_EXPR
)
3449 /* Target specific support */
3450 if (op_type
== binary_op
)
3451 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3453 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3454 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3455 gimple_call_set_lhs (new_stmt
, new_temp
);
3459 /* Generic support */
3460 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3461 if (op_type
!= binary_op
)
3463 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3464 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3465 gimple_assign_set_lhs (new_stmt
, new_temp
);
3467 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3473 /* Get vectorized definitions for loop-based vectorization. For the first
3474 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3475 scalar operand), and for the rest we get a copy with
3476 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3477 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3478 The vectors are collected into VEC_OPRNDS. */
3481 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
3482 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3486 /* Get first vector operand. */
3487 /* All the vector operands except the very first one (that is scalar oprnd)
3489 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3490 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
3492 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3494 vec_oprnds
->quick_push (vec_oprnd
);
3496 /* Get second vector operand. */
3497 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3498 vec_oprnds
->quick_push (vec_oprnd
);
3502 /* For conversion in multiple steps, continue to get operands
3505 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3509 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3510 For multi-step conversions store the resulting vectors and call the function
3514 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3515 int multi_step_cvt
, gimple
*stmt
,
3517 gimple_stmt_iterator
*gsi
,
3518 slp_tree slp_node
, enum tree_code code
,
3519 stmt_vec_info
*prev_stmt_info
)
3522 tree vop0
, vop1
, new_tmp
, vec_dest
;
3524 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3526 vec_dest
= vec_dsts
.pop ();
3528 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3530 /* Create demotion operation. */
3531 vop0
= (*vec_oprnds
)[i
];
3532 vop1
= (*vec_oprnds
)[i
+ 1];
3533 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3534 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3535 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3536 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3539 /* Store the resulting vector for next recursive call. */
3540 (*vec_oprnds
)[i
/2] = new_tmp
;
3543 /* This is the last step of the conversion sequence. Store the
3544 vectors in SLP_NODE or in vector info of the scalar statement
3545 (or in STMT_VINFO_RELATED_STMT chain). */
3547 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3550 if (!*prev_stmt_info
)
3551 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3553 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3555 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3560 /* For multi-step demotion operations we first generate demotion operations
3561 from the source type to the intermediate types, and then combine the
3562 results (stored in VEC_OPRNDS) in demotion operation to the destination
3566 /* At each level of recursion we have half of the operands we had at the
3568 vec_oprnds
->truncate ((i
+1)/2);
3569 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3570 stmt
, vec_dsts
, gsi
, slp_node
,
3571 VEC_PACK_TRUNC_EXPR
,
3575 vec_dsts
.quick_push (vec_dest
);
3579 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3580 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3581 the resulting vectors and call the function recursively. */
3584 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3585 vec
<tree
> *vec_oprnds1
,
3586 gimple
*stmt
, tree vec_dest
,
3587 gimple_stmt_iterator
*gsi
,
3588 enum tree_code code1
,
3589 enum tree_code code2
, tree decl1
,
3590 tree decl2
, int op_type
)
3593 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3594 gimple
*new_stmt1
, *new_stmt2
;
3595 vec
<tree
> vec_tmp
= vNULL
;
3597 vec_tmp
.create (vec_oprnds0
->length () * 2);
3598 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3600 if (op_type
== binary_op
)
3601 vop1
= (*vec_oprnds1
)[i
];
3605 /* Generate the two halves of promotion operation. */
3606 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3607 op_type
, vec_dest
, gsi
, stmt
);
3608 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3609 op_type
, vec_dest
, gsi
, stmt
);
3610 if (is_gimple_call (new_stmt1
))
3612 new_tmp1
= gimple_call_lhs (new_stmt1
);
3613 new_tmp2
= gimple_call_lhs (new_stmt2
);
3617 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3618 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3621 /* Store the results for the next step. */
3622 vec_tmp
.quick_push (new_tmp1
);
3623 vec_tmp
.quick_push (new_tmp2
);
3626 vec_oprnds0
->release ();
3627 *vec_oprnds0
= vec_tmp
;
3631 /* Check if STMT performs a conversion operation, that can be vectorized.
3632 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3633 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3634 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3637 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3638 gimple
**vec_stmt
, slp_tree slp_node
)
3642 tree op0
, op1
= NULL_TREE
;
3643 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3644 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3645 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3646 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3647 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3648 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3651 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3652 gimple
*new_stmt
= NULL
;
3653 stmt_vec_info prev_stmt_info
;
3656 tree vectype_out
, vectype_in
;
3658 tree lhs_type
, rhs_type
;
3659 enum { NARROW
, NONE
, WIDEN
} modifier
;
3660 vec
<tree
> vec_oprnds0
= vNULL
;
3661 vec
<tree
> vec_oprnds1
= vNULL
;
3663 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3664 vec_info
*vinfo
= stmt_info
->vinfo
;
3665 int multi_step_cvt
= 0;
3666 vec
<tree
> vec_dsts
= vNULL
;
3667 vec
<tree
> interm_types
= vNULL
;
3668 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3670 machine_mode rhs_mode
;
3671 unsigned short fltsz
;
3673 /* Is STMT a vectorizable conversion? */
3675 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3678 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3682 if (!is_gimple_assign (stmt
))
3685 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3688 code
= gimple_assign_rhs_code (stmt
);
3689 if (!CONVERT_EXPR_CODE_P (code
)
3690 && code
!= FIX_TRUNC_EXPR
3691 && code
!= FLOAT_EXPR
3692 && code
!= WIDEN_MULT_EXPR
3693 && code
!= WIDEN_LSHIFT_EXPR
)
3696 op_type
= TREE_CODE_LENGTH (code
);
3698 /* Check types of lhs and rhs. */
3699 scalar_dest
= gimple_assign_lhs (stmt
);
3700 lhs_type
= TREE_TYPE (scalar_dest
);
3701 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3703 op0
= gimple_assign_rhs1 (stmt
);
3704 rhs_type
= TREE_TYPE (op0
);
3706 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3707 && !((INTEGRAL_TYPE_P (lhs_type
)
3708 && INTEGRAL_TYPE_P (rhs_type
))
3709 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3710 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3713 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3714 && ((INTEGRAL_TYPE_P (lhs_type
)
3715 && (TYPE_PRECISION (lhs_type
)
3716 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3717 || (INTEGRAL_TYPE_P (rhs_type
)
3718 && (TYPE_PRECISION (rhs_type
)
3719 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
))))))
3721 if (dump_enabled_p ())
3722 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3723 "type conversion to/from bit-precision unsupported."
3728 /* Check the operands of the operation. */
3729 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
3731 if (dump_enabled_p ())
3732 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3733 "use not simple.\n");
3736 if (op_type
== binary_op
)
3740 op1
= gimple_assign_rhs2 (stmt
);
3741 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3742 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3744 if (CONSTANT_CLASS_P (op0
))
3745 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
3747 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
3751 if (dump_enabled_p ())
3752 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3753 "use not simple.\n");
3758 /* If op0 is an external or constant defs use a vector type of
3759 the same size as the output vector type. */
3761 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3763 gcc_assert (vectype_in
);
3766 if (dump_enabled_p ())
3768 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3769 "no vectype for scalar type ");
3770 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3771 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3777 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3778 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3780 if (dump_enabled_p ())
3782 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3783 "can't convert between boolean and non "
3785 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3786 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3792 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3793 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3794 if (nunits_in
< nunits_out
)
3796 else if (nunits_out
== nunits_in
)
3801 /* Multiple types in SLP are handled by creating the appropriate number of
3802 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3804 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3806 else if (modifier
== NARROW
)
3807 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3809 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3811 /* Sanity check: make sure that at least one copy of the vectorized stmt
3812 needs to be generated. */
3813 gcc_assert (ncopies
>= 1);
3815 /* Supportable by target? */
3819 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3821 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3826 if (dump_enabled_p ())
3827 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3828 "conversion not supported by target.\n");
3832 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3833 &code1
, &code2
, &multi_step_cvt
,
3836 /* Binary widening operation can only be supported directly by the
3838 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3842 if (code
!= FLOAT_EXPR
3843 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3844 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3847 rhs_mode
= TYPE_MODE (rhs_type
);
3848 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3849 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3850 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3851 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3854 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3855 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3856 if (cvt_type
== NULL_TREE
)
3859 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3861 if (!supportable_convert_operation (code
, vectype_out
,
3862 cvt_type
, &decl1
, &codecvt1
))
3865 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3866 cvt_type
, &codecvt1
,
3867 &codecvt2
, &multi_step_cvt
,
3871 gcc_assert (multi_step_cvt
== 0);
3873 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3874 vectype_in
, &code1
, &code2
,
3875 &multi_step_cvt
, &interm_types
))
3879 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3882 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3883 codecvt2
= ERROR_MARK
;
3887 interm_types
.safe_push (cvt_type
);
3888 cvt_type
= NULL_TREE
;
3893 gcc_assert (op_type
== unary_op
);
3894 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3895 &code1
, &multi_step_cvt
,
3899 if (code
!= FIX_TRUNC_EXPR
3900 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3901 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3904 rhs_mode
= TYPE_MODE (rhs_type
);
3906 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3907 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3908 if (cvt_type
== NULL_TREE
)
3910 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3913 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3914 &code1
, &multi_step_cvt
,
3923 if (!vec_stmt
) /* transformation not required. */
3925 if (dump_enabled_p ())
3926 dump_printf_loc (MSG_NOTE
, vect_location
,
3927 "=== vectorizable_conversion ===\n");
3928 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3930 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3931 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3933 else if (modifier
== NARROW
)
3935 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3936 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3940 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3941 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3943 interm_types
.release ();
3948 if (dump_enabled_p ())
3949 dump_printf_loc (MSG_NOTE
, vect_location
,
3950 "transform conversion. ncopies = %d.\n", ncopies
);
3952 if (op_type
== binary_op
)
3954 if (CONSTANT_CLASS_P (op0
))
3955 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3956 else if (CONSTANT_CLASS_P (op1
))
3957 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3960 /* In case of multi-step conversion, we first generate conversion operations
3961 to the intermediate types, and then from that types to the final one.
3962 We create vector destinations for the intermediate type (TYPES) received
3963 from supportable_*_operation, and store them in the correct order
3964 for future use in vect_create_vectorized_*_stmts (). */
3965 vec_dsts
.create (multi_step_cvt
+ 1);
3966 vec_dest
= vect_create_destination_var (scalar_dest
,
3967 (cvt_type
&& modifier
== WIDEN
)
3968 ? cvt_type
: vectype_out
);
3969 vec_dsts
.quick_push (vec_dest
);
3973 for (i
= interm_types
.length () - 1;
3974 interm_types
.iterate (i
, &intermediate_type
); i
--)
3976 vec_dest
= vect_create_destination_var (scalar_dest
,
3978 vec_dsts
.quick_push (vec_dest
);
3983 vec_dest
= vect_create_destination_var (scalar_dest
,
3985 ? vectype_out
: cvt_type
);
3989 if (modifier
== WIDEN
)
3991 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3992 if (op_type
== binary_op
)
3993 vec_oprnds1
.create (1);
3995 else if (modifier
== NARROW
)
3996 vec_oprnds0
.create (
3997 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3999 else if (code
== WIDEN_LSHIFT_EXPR
)
4000 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4003 prev_stmt_info
= NULL
;
4007 for (j
= 0; j
< ncopies
; j
++)
4010 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
4013 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
4015 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4017 /* Arguments are ready, create the new vector stmt. */
4018 if (code1
== CALL_EXPR
)
4020 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4021 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4022 gimple_call_set_lhs (new_stmt
, new_temp
);
4026 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4027 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4028 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4029 gimple_assign_set_lhs (new_stmt
, new_temp
);
4032 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4034 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4037 if (!prev_stmt_info
)
4038 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4040 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4041 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4048 /* In case the vectorization factor (VF) is bigger than the number
4049 of elements that we can fit in a vectype (nunits), we have to
4050 generate more than one vector stmt - i.e - we need to "unroll"
4051 the vector stmt by a factor VF/nunits. */
4052 for (j
= 0; j
< ncopies
; j
++)
4059 if (code
== WIDEN_LSHIFT_EXPR
)
4064 /* Store vec_oprnd1 for every vector stmt to be created
4065 for SLP_NODE. We check during the analysis that all
4066 the shift arguments are the same. */
4067 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4068 vec_oprnds1
.quick_push (vec_oprnd1
);
4070 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4074 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4075 &vec_oprnds1
, slp_node
, -1);
4079 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4080 vec_oprnds0
.quick_push (vec_oprnd0
);
4081 if (op_type
== binary_op
)
4083 if (code
== WIDEN_LSHIFT_EXPR
)
4086 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4087 vec_oprnds1
.quick_push (vec_oprnd1
);
4093 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4094 vec_oprnds0
.truncate (0);
4095 vec_oprnds0
.quick_push (vec_oprnd0
);
4096 if (op_type
== binary_op
)
4098 if (code
== WIDEN_LSHIFT_EXPR
)
4101 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4103 vec_oprnds1
.truncate (0);
4104 vec_oprnds1
.quick_push (vec_oprnd1
);
4108 /* Arguments are ready. Create the new vector stmts. */
4109 for (i
= multi_step_cvt
; i
>= 0; i
--)
4111 tree this_dest
= vec_dsts
[i
];
4112 enum tree_code c1
= code1
, c2
= code2
;
4113 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4118 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4120 stmt
, this_dest
, gsi
,
4121 c1
, c2
, decl1
, decl2
,
4125 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4129 if (codecvt1
== CALL_EXPR
)
4131 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4132 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4133 gimple_call_set_lhs (new_stmt
, new_temp
);
4137 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4138 new_temp
= make_ssa_name (vec_dest
);
4139 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4143 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4146 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4149 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4152 if (!prev_stmt_info
)
4153 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4155 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4156 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4161 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4165 /* In case the vectorization factor (VF) is bigger than the number
4166 of elements that we can fit in a vectype (nunits), we have to
4167 generate more than one vector stmt - i.e - we need to "unroll"
4168 the vector stmt by a factor VF/nunits. */
4169 for (j
= 0; j
< ncopies
; j
++)
4173 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4177 vec_oprnds0
.truncate (0);
4178 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4179 vect_pow2 (multi_step_cvt
) - 1);
4182 /* Arguments are ready. Create the new vector stmts. */
4184 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4186 if (codecvt1
== CALL_EXPR
)
4188 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4189 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4190 gimple_call_set_lhs (new_stmt
, new_temp
);
4194 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4195 new_temp
= make_ssa_name (vec_dest
);
4196 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4200 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4201 vec_oprnds0
[i
] = new_temp
;
4204 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4205 stmt
, vec_dsts
, gsi
,
4210 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4214 vec_oprnds0
.release ();
4215 vec_oprnds1
.release ();
4216 vec_dsts
.release ();
4217 interm_types
.release ();
4223 /* Function vectorizable_assignment.
4225 Check if STMT performs an assignment (copy) that can be vectorized.
4226 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4227 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4228 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4231 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4232 gimple
**vec_stmt
, slp_tree slp_node
)
4237 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4238 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4241 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4244 vec
<tree
> vec_oprnds
= vNULL
;
4246 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4247 vec_info
*vinfo
= stmt_info
->vinfo
;
4248 gimple
*new_stmt
= NULL
;
4249 stmt_vec_info prev_stmt_info
= NULL
;
4250 enum tree_code code
;
4253 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4256 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4260 /* Is vectorizable assignment? */
4261 if (!is_gimple_assign (stmt
))
4264 scalar_dest
= gimple_assign_lhs (stmt
);
4265 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4268 code
= gimple_assign_rhs_code (stmt
);
4269 if (gimple_assign_single_p (stmt
)
4270 || code
== PAREN_EXPR
4271 || CONVERT_EXPR_CODE_P (code
))
4272 op
= gimple_assign_rhs1 (stmt
);
4276 if (code
== VIEW_CONVERT_EXPR
)
4277 op
= TREE_OPERAND (op
, 0);
4279 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4280 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4282 /* Multiple types in SLP are handled by creating the appropriate number of
4283 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4285 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4288 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4290 gcc_assert (ncopies
>= 1);
4292 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4294 if (dump_enabled_p ())
4295 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4296 "use not simple.\n");
4300 /* We can handle NOP_EXPR conversions that do not change the number
4301 of elements or the vector size. */
4302 if ((CONVERT_EXPR_CODE_P (code
)
4303 || code
== VIEW_CONVERT_EXPR
)
4305 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4306 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4307 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4310 /* We do not handle bit-precision changes. */
4311 if ((CONVERT_EXPR_CODE_P (code
)
4312 || code
== VIEW_CONVERT_EXPR
)
4313 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4314 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4315 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4316 || ((TYPE_PRECISION (TREE_TYPE (op
))
4317 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4318 /* But a conversion that does not change the bit-pattern is ok. */
4319 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4320 > TYPE_PRECISION (TREE_TYPE (op
)))
4321 && TYPE_UNSIGNED (TREE_TYPE (op
)))
4322 /* Conversion between boolean types of different sizes is
4323 a simple assignment in case their vectypes are same
4325 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
4326 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
4328 if (dump_enabled_p ())
4329 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4330 "type conversion to/from bit-precision "
4335 if (!vec_stmt
) /* transformation not required. */
4337 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4338 if (dump_enabled_p ())
4339 dump_printf_loc (MSG_NOTE
, vect_location
,
4340 "=== vectorizable_assignment ===\n");
4341 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4346 if (dump_enabled_p ())
4347 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4350 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4353 for (j
= 0; j
< ncopies
; j
++)
4357 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4359 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4361 /* Arguments are ready. create the new vector stmt. */
4362 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4364 if (CONVERT_EXPR_CODE_P (code
)
4365 || code
== VIEW_CONVERT_EXPR
)
4366 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4367 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4368 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4369 gimple_assign_set_lhs (new_stmt
, new_temp
);
4370 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4372 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4379 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4381 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4383 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4386 vec_oprnds
.release ();
4391 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4392 either as shift by a scalar or by a vector. */
4395 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4398 machine_mode vec_mode
;
4403 vectype
= get_vectype_for_scalar_type (scalar_type
);
4407 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4409 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4411 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4413 || (optab_handler (optab
, TYPE_MODE (vectype
))
4414 == CODE_FOR_nothing
))
4418 vec_mode
= TYPE_MODE (vectype
);
4419 icode
= (int) optab_handler (optab
, vec_mode
);
4420 if (icode
== CODE_FOR_nothing
)
4427 /* Function vectorizable_shift.
4429 Check if STMT performs a shift operation that can be vectorized.
4430 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4431 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4432 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4435 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4436 gimple
**vec_stmt
, slp_tree slp_node
)
4440 tree op0
, op1
= NULL
;
4441 tree vec_oprnd1
= NULL_TREE
;
4442 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4444 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4445 enum tree_code code
;
4446 machine_mode vec_mode
;
4450 machine_mode optab_op2_mode
;
4452 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4453 gimple
*new_stmt
= NULL
;
4454 stmt_vec_info prev_stmt_info
;
4461 vec
<tree
> vec_oprnds0
= vNULL
;
4462 vec
<tree
> vec_oprnds1
= vNULL
;
4465 bool scalar_shift_arg
= true;
4466 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4467 vec_info
*vinfo
= stmt_info
->vinfo
;
4470 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4473 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4477 /* Is STMT a vectorizable binary/unary operation? */
4478 if (!is_gimple_assign (stmt
))
4481 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4484 code
= gimple_assign_rhs_code (stmt
);
4486 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4487 || code
== RROTATE_EXPR
))
4490 scalar_dest
= gimple_assign_lhs (stmt
);
4491 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4492 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4493 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4495 if (dump_enabled_p ())
4496 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4497 "bit-precision shifts not supported.\n");
4501 op0
= gimple_assign_rhs1 (stmt
);
4502 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4504 if (dump_enabled_p ())
4505 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4506 "use not simple.\n");
4509 /* If op0 is an external or constant def use a vector type with
4510 the same size as the output vector type. */
4512 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4514 gcc_assert (vectype
);
4517 if (dump_enabled_p ())
4518 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4519 "no vectype for scalar type\n");
4523 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4524 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4525 if (nunits_out
!= nunits_in
)
4528 op1
= gimple_assign_rhs2 (stmt
);
4529 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
4531 if (dump_enabled_p ())
4532 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4533 "use not simple.\n");
4538 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4542 /* Multiple types in SLP are handled by creating the appropriate number of
4543 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4545 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4548 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4550 gcc_assert (ncopies
>= 1);
4552 /* Determine whether the shift amount is a vector, or scalar. If the
4553 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4555 if ((dt
[1] == vect_internal_def
4556 || dt
[1] == vect_induction_def
)
4558 scalar_shift_arg
= false;
4559 else if (dt
[1] == vect_constant_def
4560 || dt
[1] == vect_external_def
4561 || dt
[1] == vect_internal_def
)
4563 /* In SLP, need to check whether the shift count is the same,
4564 in loops if it is a constant or invariant, it is always
4568 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4571 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4572 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4573 scalar_shift_arg
= false;
4578 if (dump_enabled_p ())
4579 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4580 "operand mode requires invariant argument.\n");
4584 /* Vector shifted by vector. */
4585 if (!scalar_shift_arg
)
4587 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4588 if (dump_enabled_p ())
4589 dump_printf_loc (MSG_NOTE
, vect_location
,
4590 "vector/vector shift/rotate found.\n");
4593 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4594 if (op1_vectype
== NULL_TREE
4595 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4597 if (dump_enabled_p ())
4598 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4599 "unusable type for last operand in"
4600 " vector/vector shift/rotate.\n");
4604 /* See if the machine has a vector shifted by scalar insn and if not
4605 then see if it has a vector shifted by vector insn. */
4608 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4610 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4612 if (dump_enabled_p ())
4613 dump_printf_loc (MSG_NOTE
, vect_location
,
4614 "vector/scalar shift/rotate found.\n");
4618 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4620 && (optab_handler (optab
, TYPE_MODE (vectype
))
4621 != CODE_FOR_nothing
))
4623 scalar_shift_arg
= false;
4625 if (dump_enabled_p ())
4626 dump_printf_loc (MSG_NOTE
, vect_location
,
4627 "vector/vector shift/rotate found.\n");
4629 /* Unlike the other binary operators, shifts/rotates have
4630 the rhs being int, instead of the same type as the lhs,
4631 so make sure the scalar is the right type if we are
4632 dealing with vectors of long long/long/short/char. */
4633 if (dt
[1] == vect_constant_def
)
4634 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4635 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4639 && TYPE_MODE (TREE_TYPE (vectype
))
4640 != TYPE_MODE (TREE_TYPE (op1
)))
4642 if (dump_enabled_p ())
4643 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4644 "unusable type for last operand in"
4645 " vector/vector shift/rotate.\n");
4648 if (vec_stmt
&& !slp_node
)
4650 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4651 op1
= vect_init_vector (stmt
, op1
,
4652 TREE_TYPE (vectype
), NULL
);
4659 /* Supportable by target? */
4662 if (dump_enabled_p ())
4663 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4667 vec_mode
= TYPE_MODE (vectype
);
4668 icode
= (int) optab_handler (optab
, vec_mode
);
4669 if (icode
== CODE_FOR_nothing
)
4671 if (dump_enabled_p ())
4672 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4673 "op not supported by target.\n");
4674 /* Check only during analysis. */
4675 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4676 || (vf
< vect_min_worthwhile_factor (code
)
4679 if (dump_enabled_p ())
4680 dump_printf_loc (MSG_NOTE
, vect_location
,
4681 "proceeding using word mode.\n");
4684 /* Worthwhile without SIMD support? Check only during analysis. */
4685 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4686 && vf
< vect_min_worthwhile_factor (code
)
4689 if (dump_enabled_p ())
4690 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4691 "not worthwhile without SIMD support.\n");
4695 if (!vec_stmt
) /* transformation not required. */
4697 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4698 if (dump_enabled_p ())
4699 dump_printf_loc (MSG_NOTE
, vect_location
,
4700 "=== vectorizable_shift ===\n");
4701 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4707 if (dump_enabled_p ())
4708 dump_printf_loc (MSG_NOTE
, vect_location
,
4709 "transform binary/unary operation.\n");
4712 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4714 prev_stmt_info
= NULL
;
4715 for (j
= 0; j
< ncopies
; j
++)
4720 if (scalar_shift_arg
)
4722 /* Vector shl and shr insn patterns can be defined with scalar
4723 operand 2 (shift operand). In this case, use constant or loop
4724 invariant op1 directly, without extending it to vector mode
4726 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4727 if (!VECTOR_MODE_P (optab_op2_mode
))
4729 if (dump_enabled_p ())
4730 dump_printf_loc (MSG_NOTE
, vect_location
,
4731 "operand 1 using scalar mode.\n");
4733 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4734 vec_oprnds1
.quick_push (vec_oprnd1
);
4737 /* Store vec_oprnd1 for every vector stmt to be created
4738 for SLP_NODE. We check during the analysis that all
4739 the shift arguments are the same.
4740 TODO: Allow different constants for different vector
4741 stmts generated for an SLP instance. */
4742 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4743 vec_oprnds1
.quick_push (vec_oprnd1
);
4748 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4749 (a special case for certain kind of vector shifts); otherwise,
4750 operand 1 should be of a vector type (the usual case). */
4752 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4755 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4759 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4761 /* Arguments are ready. Create the new vector stmt. */
4762 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4764 vop1
= vec_oprnds1
[i
];
4765 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4766 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4767 gimple_assign_set_lhs (new_stmt
, new_temp
);
4768 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4770 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4777 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4779 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4780 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4783 vec_oprnds0
.release ();
4784 vec_oprnds1
.release ();
4790 /* Function vectorizable_operation.
4792 Check if STMT performs a binary, unary or ternary operation that can
4794 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4795 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4796 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4799 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4800 gimple
**vec_stmt
, slp_tree slp_node
)
4804 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4805 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4807 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4808 enum tree_code code
;
4809 machine_mode vec_mode
;
4813 bool target_support_p
;
4815 enum vect_def_type dt
[3]
4816 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4817 gimple
*new_stmt
= NULL
;
4818 stmt_vec_info prev_stmt_info
;
4824 vec
<tree
> vec_oprnds0
= vNULL
;
4825 vec
<tree
> vec_oprnds1
= vNULL
;
4826 vec
<tree
> vec_oprnds2
= vNULL
;
4827 tree vop0
, vop1
, vop2
;
4828 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4829 vec_info
*vinfo
= stmt_info
->vinfo
;
4832 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4835 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4839 /* Is STMT a vectorizable binary/unary operation? */
4840 if (!is_gimple_assign (stmt
))
4843 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4846 code
= gimple_assign_rhs_code (stmt
);
4848 /* For pointer addition, we should use the normal plus for
4849 the vector addition. */
4850 if (code
== POINTER_PLUS_EXPR
)
4853 /* Support only unary or binary operations. */
4854 op_type
= TREE_CODE_LENGTH (code
);
4855 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4857 if (dump_enabled_p ())
4858 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4859 "num. args = %d (not unary/binary/ternary op).\n",
4864 scalar_dest
= gimple_assign_lhs (stmt
);
4865 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4867 /* Most operations cannot handle bit-precision types without extra
4869 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4870 && (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4871 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4872 /* Exception are bitwise binary operations. */
4873 && code
!= BIT_IOR_EXPR
4874 && code
!= BIT_XOR_EXPR
4875 && code
!= BIT_AND_EXPR
)
4877 if (dump_enabled_p ())
4878 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4879 "bit-precision arithmetic not supported.\n");
4883 op0
= gimple_assign_rhs1 (stmt
);
4884 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4886 if (dump_enabled_p ())
4887 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4888 "use not simple.\n");
4891 /* If op0 is an external or constant def use a vector type with
4892 the same size as the output vector type. */
4895 /* For boolean type we cannot determine vectype by
4896 invariant value (don't know whether it is a vector
4897 of booleans or vector of integers). We use output
4898 vectype because operations on boolean don't change
4900 if (TREE_CODE (TREE_TYPE (op0
)) == BOOLEAN_TYPE
)
4902 if (TREE_CODE (TREE_TYPE (scalar_dest
)) != BOOLEAN_TYPE
)
4904 if (dump_enabled_p ())
4905 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4906 "not supported operation on bool value.\n");
4909 vectype
= vectype_out
;
4912 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4915 gcc_assert (vectype
);
4918 if (dump_enabled_p ())
4920 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4921 "no vectype for scalar type ");
4922 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4924 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4930 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4931 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4932 if (nunits_out
!= nunits_in
)
4935 if (op_type
== binary_op
|| op_type
== ternary_op
)
4937 op1
= gimple_assign_rhs2 (stmt
);
4938 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
4940 if (dump_enabled_p ())
4941 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4942 "use not simple.\n");
4946 if (op_type
== ternary_op
)
4948 op2
= gimple_assign_rhs3 (stmt
);
4949 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
4951 if (dump_enabled_p ())
4952 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4953 "use not simple.\n");
4959 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4963 /* Multiple types in SLP are handled by creating the appropriate number of
4964 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4966 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4969 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4971 gcc_assert (ncopies
>= 1);
4973 /* Shifts are handled in vectorizable_shift (). */
4974 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4975 || code
== RROTATE_EXPR
)
4978 /* Supportable by target? */
4980 vec_mode
= TYPE_MODE (vectype
);
4981 if (code
== MULT_HIGHPART_EXPR
)
4982 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
4985 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4988 if (dump_enabled_p ())
4989 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4993 target_support_p
= (optab_handler (optab
, vec_mode
)
4994 != CODE_FOR_nothing
);
4997 if (!target_support_p
)
4999 if (dump_enabled_p ())
5000 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5001 "op not supported by target.\n");
5002 /* Check only during analysis. */
5003 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5004 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
5006 if (dump_enabled_p ())
5007 dump_printf_loc (MSG_NOTE
, vect_location
,
5008 "proceeding using word mode.\n");
5011 /* Worthwhile without SIMD support? Check only during analysis. */
5012 if (!VECTOR_MODE_P (vec_mode
)
5014 && vf
< vect_min_worthwhile_factor (code
))
5016 if (dump_enabled_p ())
5017 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5018 "not worthwhile without SIMD support.\n");
5022 if (!vec_stmt
) /* transformation not required. */
5024 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5025 if (dump_enabled_p ())
5026 dump_printf_loc (MSG_NOTE
, vect_location
,
5027 "=== vectorizable_operation ===\n");
5028 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
5034 if (dump_enabled_p ())
5035 dump_printf_loc (MSG_NOTE
, vect_location
,
5036 "transform binary/unary operation.\n");
5039 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5041 /* In case the vectorization factor (VF) is bigger than the number
5042 of elements that we can fit in a vectype (nunits), we have to generate
5043 more than one vector stmt - i.e - we need to "unroll" the
5044 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5045 from one copy of the vector stmt to the next, in the field
5046 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5047 stages to find the correct vector defs to be used when vectorizing
5048 stmts that use the defs of the current stmt. The example below
5049 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5050 we need to create 4 vectorized stmts):
5052 before vectorization:
5053 RELATED_STMT VEC_STMT
5057 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5059 RELATED_STMT VEC_STMT
5060 VS1_0: vx0 = memref0 VS1_1 -
5061 VS1_1: vx1 = memref1 VS1_2 -
5062 VS1_2: vx2 = memref2 VS1_3 -
5063 VS1_3: vx3 = memref3 - -
5064 S1: x = load - VS1_0
5067 step2: vectorize stmt S2 (done here):
5068 To vectorize stmt S2 we first need to find the relevant vector
5069 def for the first operand 'x'. This is, as usual, obtained from
5070 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5071 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5072 relevant vector def 'vx0'. Having found 'vx0' we can generate
5073 the vector stmt VS2_0, and as usual, record it in the
5074 STMT_VINFO_VEC_STMT of stmt S2.
5075 When creating the second copy (VS2_1), we obtain the relevant vector
5076 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5077 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5078 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5079 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5080 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5081 chain of stmts and pointers:
5082 RELATED_STMT VEC_STMT
5083 VS1_0: vx0 = memref0 VS1_1 -
5084 VS1_1: vx1 = memref1 VS1_2 -
5085 VS1_2: vx2 = memref2 VS1_3 -
5086 VS1_3: vx3 = memref3 - -
5087 S1: x = load - VS1_0
5088 VS2_0: vz0 = vx0 + v1 VS2_1 -
5089 VS2_1: vz1 = vx1 + v1 VS2_2 -
5090 VS2_2: vz2 = vx2 + v1 VS2_3 -
5091 VS2_3: vz3 = vx3 + v1 - -
5092 S2: z = x + 1 - VS2_0 */
5094 prev_stmt_info
= NULL
;
5095 for (j
= 0; j
< ncopies
; j
++)
5100 if (op_type
== binary_op
|| op_type
== ternary_op
)
5101 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5104 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5106 if (op_type
== ternary_op
)
5108 vec_oprnds2
.create (1);
5109 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
5115 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5116 if (op_type
== ternary_op
)
5118 tree vec_oprnd
= vec_oprnds2
.pop ();
5119 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5124 /* Arguments are ready. Create the new vector stmt. */
5125 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5127 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5128 ? vec_oprnds1
[i
] : NULL_TREE
);
5129 vop2
= ((op_type
== ternary_op
)
5130 ? vec_oprnds2
[i
] : NULL_TREE
);
5131 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5132 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5133 gimple_assign_set_lhs (new_stmt
, new_temp
);
5134 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5136 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5143 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5145 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5146 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5149 vec_oprnds0
.release ();
5150 vec_oprnds1
.release ();
5151 vec_oprnds2
.release ();
5156 /* A helper function to ensure data reference DR's base alignment
5160 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
5165 if (DR_VECT_AUX (dr
)->base_misaligned
)
5167 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5168 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5170 if (decl_in_symtab_p (base_decl
))
5171 symtab_node::get (base_decl
)->increase_alignment (TYPE_ALIGN (vectype
));
5174 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
5175 DECL_USER_ALIGN (base_decl
) = 1;
5177 DR_VECT_AUX (dr
)->base_misaligned
= false;
5182 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5183 reversal of the vector elements. If that is impossible to do,
5187 perm_mask_for_reverse (tree vectype
)
5192 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5193 sel
= XALLOCAVEC (unsigned char, nunits
);
5195 for (i
= 0; i
< nunits
; ++i
)
5196 sel
[i
] = nunits
- 1 - i
;
5198 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
5200 return vect_gen_perm_mask_checked (vectype
, sel
);
5203 /* Function vectorizable_store.
5205 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5207 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5208 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5209 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5212 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5218 tree vec_oprnd
= NULL_TREE
;
5219 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5220 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5222 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5223 struct loop
*loop
= NULL
;
5224 machine_mode vec_mode
;
5226 enum dr_alignment_support alignment_support_scheme
;
5228 enum vect_def_type dt
;
5229 stmt_vec_info prev_stmt_info
= NULL
;
5230 tree dataref_ptr
= NULL_TREE
;
5231 tree dataref_offset
= NULL_TREE
;
5232 gimple
*ptr_incr
= NULL
;
5235 gimple
*next_stmt
, *first_stmt
= NULL
;
5236 bool grouped_store
= false;
5237 bool store_lanes_p
= false;
5238 unsigned int group_size
, i
;
5239 vec
<tree
> dr_chain
= vNULL
;
5240 vec
<tree
> oprnds
= vNULL
;
5241 vec
<tree
> result_chain
= vNULL
;
5243 bool negative
= false;
5244 tree offset
= NULL_TREE
;
5245 vec
<tree
> vec_oprnds
= vNULL
;
5246 bool slp
= (slp_node
!= NULL
);
5247 unsigned int vec_num
;
5248 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5249 vec_info
*vinfo
= stmt_info
->vinfo
;
5251 tree scatter_base
= NULL_TREE
, scatter_off
= NULL_TREE
;
5252 tree scatter_off_vectype
= NULL_TREE
, scatter_decl
= NULL_TREE
;
5253 int scatter_scale
= 1;
5254 enum vect_def_type scatter_idx_dt
= vect_unknown_def_type
;
5255 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5258 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5261 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5265 /* Is vectorizable store? */
5267 if (!is_gimple_assign (stmt
))
5270 scalar_dest
= gimple_assign_lhs (stmt
);
5271 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5272 && is_pattern_stmt_p (stmt_info
))
5273 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5274 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5275 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5276 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5277 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5278 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5279 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5280 && TREE_CODE (scalar_dest
) != MEM_REF
)
5283 gcc_assert (gimple_assign_single_p (stmt
));
5285 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5286 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5289 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5291 /* Multiple types in SLP are handled by creating the appropriate number of
5292 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5294 if (slp
|| PURE_SLP_STMT (stmt_info
))
5297 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5299 gcc_assert (ncopies
>= 1);
5301 /* FORNOW. This restriction should be relaxed. */
5302 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5304 if (dump_enabled_p ())
5305 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5306 "multiple types in nested loop.\n");
5310 op
= gimple_assign_rhs1 (stmt
);
5311 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
5313 if (dump_enabled_p ())
5314 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5315 "use not simple.\n");
5319 elem_type
= TREE_TYPE (vectype
);
5320 vec_mode
= TYPE_MODE (vectype
);
5322 /* FORNOW. In some cases can vectorize even if data-type not supported
5323 (e.g. - array initialization with 0). */
5324 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5327 if (!STMT_VINFO_DATA_REF (stmt_info
))
5330 if (!STMT_VINFO_STRIDED_P (stmt_info
))
5333 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5334 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5335 size_zero_node
) < 0;
5336 if (negative
&& ncopies
> 1)
5338 if (dump_enabled_p ())
5339 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5340 "multiple types with negative step.\n");
5345 gcc_assert (!grouped_store
);
5346 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5347 if (alignment_support_scheme
!= dr_aligned
5348 && alignment_support_scheme
!= dr_unaligned_supported
)
5350 if (dump_enabled_p ())
5351 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5352 "negative step but alignment required.\n");
5355 if (dt
!= vect_constant_def
5356 && dt
!= vect_external_def
5357 && !perm_mask_for_reverse (vectype
))
5359 if (dump_enabled_p ())
5360 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5361 "negative step and reversing not supported.\n");
5367 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5369 grouped_store
= true;
5370 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5371 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5373 && !PURE_SLP_STMT (stmt_info
)
5374 && !STMT_VINFO_STRIDED_P (stmt_info
))
5376 if (vect_store_lanes_supported (vectype
, group_size
))
5377 store_lanes_p
= true;
5378 else if (!vect_grouped_store_supported (vectype
, group_size
))
5382 if (STMT_VINFO_STRIDED_P (stmt_info
)
5383 && (slp
|| PURE_SLP_STMT (stmt_info
))
5384 && (group_size
> nunits
5385 || nunits
% group_size
!= 0))
5387 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5388 "unhandled strided group store\n");
5392 if (first_stmt
== stmt
)
5394 /* STMT is the leader of the group. Check the operands of all the
5395 stmts of the group. */
5396 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5399 gcc_assert (gimple_assign_single_p (next_stmt
));
5400 op
= gimple_assign_rhs1 (next_stmt
);
5401 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
5403 if (dump_enabled_p ())
5404 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5405 "use not simple.\n");
5408 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5413 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5416 scatter_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &scatter_base
,
5417 &scatter_off
, &scatter_scale
);
5418 gcc_assert (scatter_decl
);
5419 if (!vect_is_simple_use (scatter_off
, vinfo
, &def_stmt
, &scatter_idx_dt
,
5420 &scatter_off_vectype
))
5422 if (dump_enabled_p ())
5423 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5424 "scatter index use not simple.");
5429 if (!vec_stmt
) /* transformation not required. */
5431 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5432 /* The SLP costs are calculated during SLP analysis. */
5433 if (!PURE_SLP_STMT (stmt_info
))
5434 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5441 ensure_base_align (stmt_info
, dr
);
5443 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5445 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, op
, src
;
5446 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (scatter_decl
));
5447 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5448 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5449 edge pe
= loop_preheader_edge (loop
);
5452 enum { NARROW
, NONE
, WIDEN
} modifier
;
5453 int scatter_off_nunits
= TYPE_VECTOR_SUBPARTS (scatter_off_vectype
);
5455 if (nunits
== (unsigned int) scatter_off_nunits
)
5457 else if (nunits
== (unsigned int) scatter_off_nunits
/ 2)
5459 unsigned char *sel
= XALLOCAVEC (unsigned char, scatter_off_nunits
);
5462 for (i
= 0; i
< (unsigned int) scatter_off_nunits
; ++i
)
5463 sel
[i
] = i
| nunits
;
5465 perm_mask
= vect_gen_perm_mask_checked (scatter_off_vectype
, sel
);
5466 gcc_assert (perm_mask
!= NULL_TREE
);
5468 else if (nunits
== (unsigned int) scatter_off_nunits
* 2)
5470 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5473 for (i
= 0; i
< (unsigned int) nunits
; ++i
)
5474 sel
[i
] = i
| scatter_off_nunits
;
5476 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
5477 gcc_assert (perm_mask
!= NULL_TREE
);
5483 rettype
= TREE_TYPE (TREE_TYPE (scatter_decl
));
5484 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5485 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5486 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5487 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5488 scaletype
= TREE_VALUE (arglist
);
5490 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
5491 && TREE_CODE (rettype
) == VOID_TYPE
);
5493 ptr
= fold_convert (ptrtype
, scatter_base
);
5494 if (!is_gimple_min_invariant (ptr
))
5496 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5497 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5498 gcc_assert (!new_bb
);
5501 /* Currently we support only unconditional scatter stores,
5502 so mask should be all ones. */
5503 mask
= build_int_cst (masktype
, -1);
5504 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5506 scale
= build_int_cst (scaletype
, scatter_scale
);
5508 prev_stmt_info
= NULL
;
5509 for (j
= 0; j
< ncopies
; ++j
)
5514 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt
), stmt
);
5516 = vect_get_vec_def_for_operand (scatter_off
, stmt
);
5518 else if (modifier
!= NONE
&& (j
& 1))
5520 if (modifier
== WIDEN
)
5523 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5524 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
5527 else if (modifier
== NARROW
)
5529 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
5532 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt
, vec_oprnd0
);
5540 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5542 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt
, vec_oprnd0
);
5545 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
5547 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
))
5548 == TYPE_VECTOR_SUBPARTS (srctype
));
5549 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
5550 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
5551 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
5552 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5556 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5558 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5559 == TYPE_VECTOR_SUBPARTS (idxtype
));
5560 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
5561 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5562 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
5563 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5568 = gimple_build_call (scatter_decl
, 5, ptr
, mask
, op
, src
, scale
);
5570 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5572 if (prev_stmt_info
== NULL
)
5573 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5575 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5576 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5583 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5584 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5586 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5589 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5591 /* We vectorize all the stmts of the interleaving group when we
5592 reach the last stmt in the group. */
5593 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5594 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5603 grouped_store
= false;
5604 /* VEC_NUM is the number of vect stmts to be created for this
5606 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5607 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5608 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
5609 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5610 op
= gimple_assign_rhs1 (first_stmt
);
5613 /* VEC_NUM is the number of vect stmts to be created for this
5615 vec_num
= group_size
;
5621 group_size
= vec_num
= 1;
5624 if (dump_enabled_p ())
5625 dump_printf_loc (MSG_NOTE
, vect_location
,
5626 "transform store. ncopies = %d\n", ncopies
);
5628 if (STMT_VINFO_STRIDED_P (stmt_info
))
5630 gimple_stmt_iterator incr_gsi
;
5636 gimple_seq stmts
= NULL
;
5637 tree stride_base
, stride_step
, alias_off
;
5641 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5644 = fold_build_pointer_plus
5645 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
5646 size_binop (PLUS_EXPR
,
5647 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
5648 convert_to_ptrofftype (DR_INIT(first_dr
))));
5649 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
5651 /* For a store with loop-invariant (but other than power-of-2)
5652 stride (i.e. not a grouped access) like so:
5654 for (i = 0; i < n; i += stride)
5657 we generate a new induction variable and new stores from
5658 the components of the (vectorized) rhs:
5660 for (j = 0; ; j += VF*stride)
5665 array[j + stride] = tmp2;
5669 unsigned nstores
= nunits
;
5670 tree ltype
= elem_type
;
5673 nstores
= nunits
/ group_size
;
5674 if (group_size
< nunits
)
5675 ltype
= build_vector_type (elem_type
, group_size
);
5678 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
5679 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5683 ivstep
= stride_step
;
5684 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
5685 build_int_cst (TREE_TYPE (ivstep
),
5686 ncopies
* nstores
));
5688 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
5690 create_iv (stride_base
, ivstep
, NULL
,
5691 loop
, &incr_gsi
, insert_after
,
5693 incr
= gsi_stmt (incr_gsi
);
5694 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
5696 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
5698 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
5700 prev_stmt_info
= NULL
;
5701 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (first_dr
)), 0);
5702 next_stmt
= first_stmt
;
5703 for (g
= 0; g
< group_size
; g
++)
5705 running_off
= offvar
;
5708 tree size
= TYPE_SIZE_UNIT (ltype
);
5709 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
5711 tree newoff
= copy_ssa_name (running_off
, NULL
);
5712 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5714 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5715 running_off
= newoff
;
5717 for (j
= 0; j
< ncopies
; j
++)
5719 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5720 and first_stmt == stmt. */
5725 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
5727 vec_oprnd
= vec_oprnds
[0];
5731 gcc_assert (gimple_assign_single_p (next_stmt
));
5732 op
= gimple_assign_rhs1 (next_stmt
);
5733 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
5739 vec_oprnd
= vec_oprnds
[j
];
5742 vect_is_simple_use (vec_oprnd
, vinfo
, &def_stmt
, &dt
);
5743 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
5747 for (i
= 0; i
< nstores
; i
++)
5749 tree newref
, newoff
;
5750 gimple
*incr
, *assign
;
5751 tree size
= TYPE_SIZE (ltype
);
5752 /* Extract the i'th component. */
5753 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
5754 bitsize_int (i
), size
);
5755 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
5758 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
5762 newref
= build2 (MEM_REF
, ltype
,
5763 running_off
, alias_off
);
5765 /* And store it to *running_off. */
5766 assign
= gimple_build_assign (newref
, elem
);
5767 vect_finish_stmt_generation (stmt
, assign
, gsi
);
5769 newoff
= copy_ssa_name (running_off
, NULL
);
5770 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5771 running_off
, stride_step
);
5772 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5774 running_off
= newoff
;
5775 if (g
== group_size
- 1
5778 if (j
== 0 && i
== 0)
5779 STMT_VINFO_VEC_STMT (stmt_info
)
5780 = *vec_stmt
= assign
;
5782 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
5783 prev_stmt_info
= vinfo_for_stmt (assign
);
5787 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5792 dr_chain
.create (group_size
);
5793 oprnds
.create (group_size
);
5795 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5796 gcc_assert (alignment_support_scheme
);
5797 /* Targets with store-lane instructions must not require explicit
5799 gcc_assert (!store_lanes_p
5800 || alignment_support_scheme
== dr_aligned
5801 || alignment_support_scheme
== dr_unaligned_supported
);
5804 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5807 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5809 aggr_type
= vectype
;
5811 /* In case the vectorization factor (VF) is bigger than the number
5812 of elements that we can fit in a vectype (nunits), we have to generate
5813 more than one vector stmt - i.e - we need to "unroll" the
5814 vector stmt by a factor VF/nunits. For more details see documentation in
5815 vect_get_vec_def_for_copy_stmt. */
5817 /* In case of interleaving (non-unit grouped access):
5824 We create vectorized stores starting from base address (the access of the
5825 first stmt in the chain (S2 in the above example), when the last store stmt
5826 of the chain (S4) is reached:
5829 VS2: &base + vec_size*1 = vx0
5830 VS3: &base + vec_size*2 = vx1
5831 VS4: &base + vec_size*3 = vx3
5833 Then permutation statements are generated:
5835 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5836 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5839 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5840 (the order of the data-refs in the output of vect_permute_store_chain
5841 corresponds to the order of scalar stmts in the interleaving chain - see
5842 the documentation of vect_permute_store_chain()).
5844 In case of both multiple types and interleaving, above vector stores and
5845 permutation stmts are created for every copy. The result vector stmts are
5846 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5847 STMT_VINFO_RELATED_STMT for the next copies.
5850 prev_stmt_info
= NULL
;
5851 for (j
= 0; j
< ncopies
; j
++)
5858 /* Get vectorized arguments for SLP_NODE. */
5859 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5860 NULL
, slp_node
, -1);
5862 vec_oprnd
= vec_oprnds
[0];
5866 /* For interleaved stores we collect vectorized defs for all the
5867 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5868 used as an input to vect_permute_store_chain(), and OPRNDS as
5869 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5871 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5872 OPRNDS are of size 1. */
5873 next_stmt
= first_stmt
;
5874 for (i
= 0; i
< group_size
; i
++)
5876 /* Since gaps are not supported for interleaved stores,
5877 GROUP_SIZE is the exact number of stmts in the chain.
5878 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5879 there is no interleaving, GROUP_SIZE is 1, and only one
5880 iteration of the loop will be executed. */
5881 gcc_assert (next_stmt
5882 && gimple_assign_single_p (next_stmt
));
5883 op
= gimple_assign_rhs1 (next_stmt
);
5885 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
5886 dr_chain
.quick_push (vec_oprnd
);
5887 oprnds
.quick_push (vec_oprnd
);
5888 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5892 /* We should have catched mismatched types earlier. */
5893 gcc_assert (useless_type_conversion_p (vectype
,
5894 TREE_TYPE (vec_oprnd
)));
5895 bool simd_lane_access_p
5896 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5897 if (simd_lane_access_p
5898 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5899 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5900 && integer_zerop (DR_OFFSET (first_dr
))
5901 && integer_zerop (DR_INIT (first_dr
))
5902 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5903 get_alias_set (DR_REF (first_dr
))))
5905 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5906 dataref_offset
= build_int_cst (reference_alias_ptr_type
5907 (DR_REF (first_dr
)), 0);
5912 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5913 simd_lane_access_p
? loop
: NULL
,
5914 offset
, &dummy
, gsi
, &ptr_incr
,
5915 simd_lane_access_p
, &inv_p
);
5916 gcc_assert (bb_vinfo
|| !inv_p
);
5920 /* For interleaved stores we created vectorized defs for all the
5921 defs stored in OPRNDS in the previous iteration (previous copy).
5922 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5923 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5925 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5926 OPRNDS are of size 1. */
5927 for (i
= 0; i
< group_size
; i
++)
5930 vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
);
5931 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5932 dr_chain
[i
] = vec_oprnd
;
5933 oprnds
[i
] = vec_oprnd
;
5937 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5938 TYPE_SIZE_UNIT (aggr_type
));
5940 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5941 TYPE_SIZE_UNIT (aggr_type
));
5948 /* Combine all the vectors into an array. */
5949 vec_array
= create_vector_array (vectype
, vec_num
);
5950 for (i
= 0; i
< vec_num
; i
++)
5952 vec_oprnd
= dr_chain
[i
];
5953 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5957 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5958 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5959 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5960 gimple_call_set_lhs (new_stmt
, data_ref
);
5961 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5969 result_chain
.create (group_size
);
5971 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5975 next_stmt
= first_stmt
;
5976 for (i
= 0; i
< vec_num
; i
++)
5978 unsigned align
, misalign
;
5981 /* Bump the vector pointer. */
5982 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5986 vec_oprnd
= vec_oprnds
[i
];
5987 else if (grouped_store
)
5988 /* For grouped stores vectorized defs are interleaved in
5989 vect_permute_store_chain(). */
5990 vec_oprnd
= result_chain
[i
];
5992 data_ref
= fold_build2 (MEM_REF
, TREE_TYPE (vec_oprnd
),
5996 : build_int_cst (reference_alias_ptr_type
5997 (DR_REF (first_dr
)), 0));
5998 align
= TYPE_ALIGN_UNIT (vectype
);
5999 if (aligned_access_p (first_dr
))
6001 else if (DR_MISALIGNMENT (first_dr
) == -1)
6003 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
6004 align
= TYPE_ALIGN_UNIT (elem_type
);
6006 align
= get_object_alignment (DR_REF (first_dr
))
6009 TREE_TYPE (data_ref
)
6010 = build_aligned_type (TREE_TYPE (data_ref
),
6011 align
* BITS_PER_UNIT
);
6015 TREE_TYPE (data_ref
)
6016 = build_aligned_type (TREE_TYPE (data_ref
),
6017 TYPE_ALIGN (elem_type
));
6018 misalign
= DR_MISALIGNMENT (first_dr
);
6020 if (dataref_offset
== NULL_TREE
6021 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6022 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
6026 && dt
!= vect_constant_def
6027 && dt
!= vect_external_def
)
6029 tree perm_mask
= perm_mask_for_reverse (vectype
);
6031 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
6033 tree new_temp
= make_ssa_name (perm_dest
);
6035 /* Generate the permute statement. */
6037 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
6038 vec_oprnd
, perm_mask
);
6039 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6041 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6042 vec_oprnd
= new_temp
;
6045 /* Arguments are ready. Create the new vector stmt. */
6046 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
6047 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6052 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6060 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6062 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6063 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6067 dr_chain
.release ();
6069 result_chain
.release ();
6070 vec_oprnds
.release ();
6075 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6076 VECTOR_CST mask. No checks are made that the target platform supports the
6077 mask, so callers may wish to test can_vec_perm_p separately, or use
6078 vect_gen_perm_mask_checked. */
6081 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
6083 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
6086 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6088 mask_elt_type
= lang_hooks
.types
.type_for_mode
6089 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
6090 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
6092 mask_elts
= XALLOCAVEC (tree
, nunits
);
6093 for (i
= nunits
- 1; i
>= 0; i
--)
6094 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
6095 mask_vec
= build_vector (mask_type
, mask_elts
);
6100 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6101 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6104 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
6106 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
6107 return vect_gen_perm_mask_any (vectype
, sel
);
6110 /* Given a vector variable X and Y, that was generated for the scalar
6111 STMT, generate instructions to permute the vector elements of X and Y
6112 using permutation mask MASK_VEC, insert them at *GSI and return the
6113 permuted vector variable. */
6116 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6117 gimple_stmt_iterator
*gsi
)
6119 tree vectype
= TREE_TYPE (x
);
6120 tree perm_dest
, data_ref
;
6123 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
6124 data_ref
= make_ssa_name (perm_dest
);
6126 /* Generate the permute statement. */
6127 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6128 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6133 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6134 inserting them on the loops preheader edge. Returns true if we
6135 were successful in doing so (and thus STMT can be moved then),
6136 otherwise returns false. */
6139 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6145 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6147 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6148 if (!gimple_nop_p (def_stmt
)
6149 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6151 /* Make sure we don't need to recurse. While we could do
6152 so in simple cases when there are more complex use webs
6153 we don't have an easy way to preserve stmt order to fulfil
6154 dependencies within them. */
6157 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6159 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6161 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6162 if (!gimple_nop_p (def_stmt2
)
6163 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6173 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6175 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6176 if (!gimple_nop_p (def_stmt
)
6177 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6179 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6180 gsi_remove (&gsi
, false);
6181 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6188 /* vectorizable_load.
6190 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6192 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6193 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6194 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6197 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6198 slp_tree slp_node
, slp_instance slp_node_instance
)
6201 tree vec_dest
= NULL
;
6202 tree data_ref
= NULL
;
6203 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6204 stmt_vec_info prev_stmt_info
;
6205 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6206 struct loop
*loop
= NULL
;
6207 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6208 bool nested_in_vect_loop
= false;
6209 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6213 gimple
*new_stmt
= NULL
;
6215 enum dr_alignment_support alignment_support_scheme
;
6216 tree dataref_ptr
= NULL_TREE
;
6217 tree dataref_offset
= NULL_TREE
;
6218 gimple
*ptr_incr
= NULL
;
6220 int i
, j
, group_size
= -1, group_gap_adj
;
6221 tree msq
= NULL_TREE
, lsq
;
6222 tree offset
= NULL_TREE
;
6223 tree byte_offset
= NULL_TREE
;
6224 tree realignment_token
= NULL_TREE
;
6226 vec
<tree
> dr_chain
= vNULL
;
6227 bool grouped_load
= false;
6228 bool load_lanes_p
= false;
6230 gimple
*first_stmt_for_drptr
= NULL
;
6232 bool negative
= false;
6233 bool compute_in_loop
= false;
6234 struct loop
*at_loop
;
6236 bool slp
= (slp_node
!= NULL
);
6237 bool slp_perm
= false;
6238 enum tree_code code
;
6239 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6242 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
6243 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
6244 int gather_scale
= 1;
6245 enum vect_def_type gather_dt
= vect_unknown_def_type
;
6246 vec_info
*vinfo
= stmt_info
->vinfo
;
6248 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6251 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6255 /* Is vectorizable load? */
6256 if (!is_gimple_assign (stmt
))
6259 scalar_dest
= gimple_assign_lhs (stmt
);
6260 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6263 code
= gimple_assign_rhs_code (stmt
);
6264 if (code
!= ARRAY_REF
6265 && code
!= BIT_FIELD_REF
6266 && code
!= INDIRECT_REF
6267 && code
!= COMPONENT_REF
6268 && code
!= IMAGPART_EXPR
6269 && code
!= REALPART_EXPR
6271 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6274 if (!STMT_VINFO_DATA_REF (stmt_info
))
6277 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6278 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6282 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6283 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6284 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6289 /* Multiple types in SLP are handled by creating the appropriate number of
6290 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6292 if (slp
|| PURE_SLP_STMT (stmt_info
))
6295 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6297 gcc_assert (ncopies
>= 1);
6299 /* FORNOW. This restriction should be relaxed. */
6300 if (nested_in_vect_loop
&& ncopies
> 1)
6302 if (dump_enabled_p ())
6303 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6304 "multiple types in nested loop.\n");
6308 /* Invalidate assumptions made by dependence analysis when vectorization
6309 on the unrolled body effectively re-orders stmts. */
6311 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6312 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6313 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6315 if (dump_enabled_p ())
6316 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6317 "cannot perform implicit CSE when unrolling "
6318 "with negative dependence distance\n");
6322 elem_type
= TREE_TYPE (vectype
);
6323 mode
= TYPE_MODE (vectype
);
6325 /* FORNOW. In some cases can vectorize even if data-type not supported
6326 (e.g. - data copies). */
6327 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
6329 if (dump_enabled_p ())
6330 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6331 "Aligned load, but unsupported type.\n");
6335 /* Check if the load is a part of an interleaving chain. */
6336 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6338 grouped_load
= true;
6340 gcc_assert (!nested_in_vect_loop
&& !STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
6342 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6344 /* If this is single-element interleaving with an element distance
6345 that leaves unused vector loads around punt - we at least create
6346 very sub-optimal code in that case (and blow up memory,
6348 bool force_peeling
= false;
6349 if (first_stmt
== stmt
6350 && !GROUP_NEXT_ELEMENT (stmt_info
))
6352 if (GROUP_SIZE (stmt_info
) > TYPE_VECTOR_SUBPARTS (vectype
))
6354 if (dump_enabled_p ())
6355 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6356 "single-element interleaving not supported "
6357 "for not adjacent vector loads\n");
6361 /* Single-element interleaving requires peeling for gaps. */
6362 force_peeling
= true;
6365 /* If there is a gap in the end of the group or the group size cannot
6366 be made a multiple of the vector element count then we access excess
6367 elements in the last iteration and thus need to peel that off. */
6369 && ! STMT_VINFO_STRIDED_P (stmt_info
)
6371 || GROUP_GAP (vinfo_for_stmt (first_stmt
)) != 0
6372 || (!slp
&& vf
% GROUP_SIZE (vinfo_for_stmt (first_stmt
)) != 0)))
6374 if (dump_enabled_p ())
6375 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6376 "Data access with gaps requires scalar "
6380 if (dump_enabled_p ())
6381 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6382 "Peeling for outer loop is not supported\n");
6386 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
6389 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6392 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6394 && !PURE_SLP_STMT (stmt_info
)
6395 && !STMT_VINFO_STRIDED_P (stmt_info
))
6397 if (vect_load_lanes_supported (vectype
, group_size
))
6398 load_lanes_p
= true;
6399 else if (!vect_grouped_load_supported (vectype
, group_size
))
6403 /* Invalidate assumptions made by dependence analysis when vectorization
6404 on the unrolled body effectively re-orders stmts. */
6405 if (!PURE_SLP_STMT (stmt_info
)
6406 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6407 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6408 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6410 if (dump_enabled_p ())
6411 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6412 "cannot perform implicit CSE when performing "
6413 "group loads with negative dependence distance\n");
6417 /* Similarly when the stmt is a load that is both part of a SLP
6418 instance and a loop vectorized stmt via the same-dr mechanism
6419 we have to give up. */
6420 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6421 && (STMT_SLP_TYPE (stmt_info
)
6422 != STMT_SLP_TYPE (vinfo_for_stmt
6423 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6425 if (dump_enabled_p ())
6426 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6427 "conflicting SLP types for CSEd load\n");
6433 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6436 gather_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &gather_base
,
6437 &gather_off
, &gather_scale
);
6438 gcc_assert (gather_decl
);
6439 if (!vect_is_simple_use (gather_off
, vinfo
, &def_stmt
, &gather_dt
,
6440 &gather_off_vectype
))
6442 if (dump_enabled_p ())
6443 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6444 "gather index use not simple.\n");
6448 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6451 && (slp
|| PURE_SLP_STMT (stmt_info
)))
6452 && (group_size
> nunits
6453 || nunits
% group_size
!= 0))
6455 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6456 "unhandled strided group load\n");
6462 negative
= tree_int_cst_compare (nested_in_vect_loop
6463 ? STMT_VINFO_DR_STEP (stmt_info
)
6465 size_zero_node
) < 0;
6466 if (negative
&& ncopies
> 1)
6468 if (dump_enabled_p ())
6469 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6470 "multiple types with negative step.\n");
6478 if (dump_enabled_p ())
6479 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6480 "negative step for group load not supported"
6484 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
6485 if (alignment_support_scheme
!= dr_aligned
6486 && alignment_support_scheme
!= dr_unaligned_supported
)
6488 if (dump_enabled_p ())
6489 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6490 "negative step but alignment required.\n");
6493 if (!perm_mask_for_reverse (vectype
))
6495 if (dump_enabled_p ())
6496 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6497 "negative step and reversing not supported."
6504 if (!vec_stmt
) /* transformation not required. */
6506 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6507 /* The SLP costs are calculated during SLP analysis. */
6508 if (!PURE_SLP_STMT (stmt_info
))
6509 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
,
6514 if (dump_enabled_p ())
6515 dump_printf_loc (MSG_NOTE
, vect_location
,
6516 "transform load. ncopies = %d\n", ncopies
);
6520 ensure_base_align (stmt_info
, dr
);
6522 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6524 tree vec_oprnd0
= NULL_TREE
, op
;
6525 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
6526 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6527 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6528 edge pe
= loop_preheader_edge (loop
);
6531 enum { NARROW
, NONE
, WIDEN
} modifier
;
6532 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
6534 if (nunits
== gather_off_nunits
)
6536 else if (nunits
== gather_off_nunits
/ 2)
6538 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
6541 for (i
= 0; i
< gather_off_nunits
; ++i
)
6542 sel
[i
] = i
| nunits
;
6544 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
6546 else if (nunits
== gather_off_nunits
* 2)
6548 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
6551 for (i
= 0; i
< nunits
; ++i
)
6552 sel
[i
] = i
< gather_off_nunits
6553 ? i
: i
+ nunits
- gather_off_nunits
;
6555 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6561 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
6562 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6563 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6564 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6565 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6566 scaletype
= TREE_VALUE (arglist
);
6567 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6569 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6571 ptr
= fold_convert (ptrtype
, gather_base
);
6572 if (!is_gimple_min_invariant (ptr
))
6574 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6575 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6576 gcc_assert (!new_bb
);
6579 /* Currently we support only unconditional gather loads,
6580 so mask should be all ones. */
6581 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6582 mask
= build_int_cst (masktype
, -1);
6583 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6585 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6586 mask
= build_vector_from_val (masktype
, mask
);
6587 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6589 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6593 for (j
= 0; j
< 6; ++j
)
6595 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6596 mask
= build_real (TREE_TYPE (masktype
), r
);
6597 mask
= build_vector_from_val (masktype
, mask
);
6598 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6603 scale
= build_int_cst (scaletype
, gather_scale
);
6605 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6606 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6607 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6611 for (j
= 0; j
< 6; ++j
)
6613 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6614 merge
= build_real (TREE_TYPE (rettype
), r
);
6618 merge
= build_vector_from_val (rettype
, merge
);
6619 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6621 prev_stmt_info
= NULL
;
6622 for (j
= 0; j
< ncopies
; ++j
)
6624 if (modifier
== WIDEN
&& (j
& 1))
6625 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6626 perm_mask
, stmt
, gsi
);
6629 = vect_get_vec_def_for_operand (gather_off
, stmt
);
6632 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
6634 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6636 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6637 == TYPE_VECTOR_SUBPARTS (idxtype
));
6638 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6639 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6641 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6642 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6647 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
6649 if (!useless_type_conversion_p (vectype
, rettype
))
6651 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6652 == TYPE_VECTOR_SUBPARTS (rettype
));
6653 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
6654 gimple_call_set_lhs (new_stmt
, op
);
6655 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6656 var
= make_ssa_name (vec_dest
);
6657 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6659 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6663 var
= make_ssa_name (vec_dest
, new_stmt
);
6664 gimple_call_set_lhs (new_stmt
, var
);
6667 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6669 if (modifier
== NARROW
)
6676 var
= permute_vec_elements (prev_res
, var
,
6677 perm_mask
, stmt
, gsi
);
6678 new_stmt
= SSA_NAME_DEF_STMT (var
);
6681 if (prev_stmt_info
== NULL
)
6682 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6684 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6685 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6689 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6691 gimple_stmt_iterator incr_gsi
;
6697 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6698 gimple_seq stmts
= NULL
;
6699 tree stride_base
, stride_step
, alias_off
;
6701 gcc_assert (!nested_in_vect_loop
);
6703 if (slp
&& grouped_load
)
6704 first_dr
= STMT_VINFO_DATA_REF
6705 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
)));
6710 = fold_build_pointer_plus
6711 (DR_BASE_ADDRESS (first_dr
),
6712 size_binop (PLUS_EXPR
,
6713 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
6714 convert_to_ptrofftype (DR_INIT (first_dr
))));
6715 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
6717 /* For a load with loop-invariant (but other than power-of-2)
6718 stride (i.e. not a grouped access) like so:
6720 for (i = 0; i < n; i += stride)
6723 we generate a new induction variable and new accesses to
6724 form a new vector (or vectors, depending on ncopies):
6726 for (j = 0; ; j += VF*stride)
6728 tmp2 = array[j + stride];
6730 vectemp = {tmp1, tmp2, ...}
6733 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
6734 build_int_cst (TREE_TYPE (stride_step
), vf
));
6736 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6738 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
6739 loop
, &incr_gsi
, insert_after
,
6741 incr
= gsi_stmt (incr_gsi
);
6742 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6744 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
6745 &stmts
, true, NULL_TREE
);
6747 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6749 prev_stmt_info
= NULL
;
6750 running_off
= offvar
;
6751 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (first_dr
)), 0);
6752 int nloads
= nunits
;
6753 tree ltype
= TREE_TYPE (vectype
);
6754 auto_vec
<tree
> dr_chain
;
6757 nloads
= nunits
/ group_size
;
6758 if (group_size
< nunits
)
6759 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
6762 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
6763 /* For SLP permutation support we need to load the whole group,
6764 not only the number of vector stmts the permutation result
6768 ncopies
= (group_size
* vf
+ nunits
- 1) / nunits
;
6769 dr_chain
.create (ncopies
);
6772 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6774 for (j
= 0; j
< ncopies
; j
++)
6780 vec_alloc (v
, nloads
);
6781 for (i
= 0; i
< nloads
; i
++)
6783 tree newref
, newoff
;
6785 newref
= build2 (MEM_REF
, ltype
, running_off
, alias_off
);
6787 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
6790 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
6791 newoff
= copy_ssa_name (running_off
);
6792 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6793 running_off
, stride_step
);
6794 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6796 running_off
= newoff
;
6799 vec_inv
= build_constructor (vectype
, v
);
6800 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6801 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6805 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
6806 build2 (MEM_REF
, ltype
,
6807 running_off
, alias_off
));
6808 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6810 tree newoff
= copy_ssa_name (running_off
);
6811 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6812 running_off
, stride_step
);
6813 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6815 running_off
= newoff
;
6821 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
6823 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6828 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6830 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6831 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6835 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6836 slp_node_instance
, false);
6842 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6843 /* For SLP vectorization we directly vectorize a subchain
6844 without permutation. */
6845 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6846 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6847 /* For BB vectorization always use the first stmt to base
6848 the data ref pointer on. */
6850 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6852 /* Check if the chain of loads is already vectorized. */
6853 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6854 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6855 ??? But we can only do so if there is exactly one
6856 as we have no way to get at the rest. Leave the CSE
6858 ??? With the group load eventually participating
6859 in multiple different permutations (having multiple
6860 slp nodes which refer to the same group) the CSE
6861 is even wrong code. See PR56270. */
6864 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6867 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6868 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6871 /* VEC_NUM is the number of vect stmts to be created for this group. */
6874 grouped_load
= false;
6875 /* For SLP permutation support we need to load the whole group,
6876 not only the number of vector stmts the permutation result
6879 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
6881 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6882 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
6885 vec_num
= group_size
;
6891 group_size
= vec_num
= 1;
6895 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6896 gcc_assert (alignment_support_scheme
);
6897 /* Targets with load-lane instructions must not require explicit
6899 gcc_assert (!load_lanes_p
6900 || alignment_support_scheme
== dr_aligned
6901 || alignment_support_scheme
== dr_unaligned_supported
);
6903 /* In case the vectorization factor (VF) is bigger than the number
6904 of elements that we can fit in a vectype (nunits), we have to generate
6905 more than one vector stmt - i.e - we need to "unroll" the
6906 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6907 from one copy of the vector stmt to the next, in the field
6908 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6909 stages to find the correct vector defs to be used when vectorizing
6910 stmts that use the defs of the current stmt. The example below
6911 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6912 need to create 4 vectorized stmts):
6914 before vectorization:
6915 RELATED_STMT VEC_STMT
6919 step 1: vectorize stmt S1:
6920 We first create the vector stmt VS1_0, and, as usual, record a
6921 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6922 Next, we create the vector stmt VS1_1, and record a pointer to
6923 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6924 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6926 RELATED_STMT VEC_STMT
6927 VS1_0: vx0 = memref0 VS1_1 -
6928 VS1_1: vx1 = memref1 VS1_2 -
6929 VS1_2: vx2 = memref2 VS1_3 -
6930 VS1_3: vx3 = memref3 - -
6931 S1: x = load - VS1_0
6934 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6935 information we recorded in RELATED_STMT field is used to vectorize
6938 /* In case of interleaving (non-unit grouped access):
6945 Vectorized loads are created in the order of memory accesses
6946 starting from the access of the first stmt of the chain:
6949 VS2: vx1 = &base + vec_size*1
6950 VS3: vx3 = &base + vec_size*2
6951 VS4: vx4 = &base + vec_size*3
6953 Then permutation statements are generated:
6955 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6956 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6959 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6960 (the order of the data-refs in the output of vect_permute_load_chain
6961 corresponds to the order of scalar stmts in the interleaving chain - see
6962 the documentation of vect_permute_load_chain()).
6963 The generation of permutation stmts and recording them in
6964 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6966 In case of both multiple types and interleaving, the vector loads and
6967 permutation stmts above are created for every copy. The result vector
6968 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6969 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6971 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6972 on a target that supports unaligned accesses (dr_unaligned_supported)
6973 we generate the following code:
6977 p = p + indx * vectype_size;
6982 Otherwise, the data reference is potentially unaligned on a target that
6983 does not support unaligned accesses (dr_explicit_realign_optimized) -
6984 then generate the following code, in which the data in each iteration is
6985 obtained by two vector loads, one from the previous iteration, and one
6986 from the current iteration:
6988 msq_init = *(floor(p1))
6989 p2 = initial_addr + VS - 1;
6990 realignment_token = call target_builtin;
6993 p2 = p2 + indx * vectype_size
6995 vec_dest = realign_load (msq, lsq, realignment_token)
7000 /* If the misalignment remains the same throughout the execution of the
7001 loop, we can create the init_addr and permutation mask at the loop
7002 preheader. Otherwise, it needs to be created inside the loop.
7003 This can only occur when vectorizing memory accesses in the inner-loop
7004 nested within an outer-loop that is being vectorized. */
7006 if (nested_in_vect_loop
7007 && (TREE_INT_CST_LOW (DR_STEP (dr
))
7008 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
7010 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
7011 compute_in_loop
= true;
7014 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7015 || alignment_support_scheme
== dr_explicit_realign
)
7016 && !compute_in_loop
)
7018 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7019 alignment_support_scheme
, NULL_TREE
,
7021 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7023 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7024 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7032 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7035 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7037 aggr_type
= vectype
;
7039 prev_stmt_info
= NULL
;
7040 for (j
= 0; j
< ncopies
; j
++)
7042 /* 1. Create the vector or array pointer update chain. */
7045 bool simd_lane_access_p
7046 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7047 if (simd_lane_access_p
7048 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7049 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7050 && integer_zerop (DR_OFFSET (first_dr
))
7051 && integer_zerop (DR_INIT (first_dr
))
7052 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7053 get_alias_set (DR_REF (first_dr
)))
7054 && (alignment_support_scheme
== dr_aligned
7055 || alignment_support_scheme
== dr_unaligned_supported
))
7057 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7058 dataref_offset
= build_int_cst (reference_alias_ptr_type
7059 (DR_REF (first_dr
)), 0);
7062 else if (first_stmt_for_drptr
7063 && first_stmt
!= first_stmt_for_drptr
)
7066 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
7067 at_loop
, offset
, &dummy
, gsi
,
7068 &ptr_incr
, simd_lane_access_p
,
7069 &inv_p
, byte_offset
);
7070 /* Adjust the pointer by the difference to first_stmt. */
7071 data_reference_p ptrdr
7072 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
7073 tree diff
= fold_convert (sizetype
,
7074 size_binop (MINUS_EXPR
,
7077 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7082 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
7083 offset
, &dummy
, gsi
, &ptr_incr
,
7084 simd_lane_access_p
, &inv_p
,
7087 else if (dataref_offset
)
7088 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
7089 TYPE_SIZE_UNIT (aggr_type
));
7091 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7092 TYPE_SIZE_UNIT (aggr_type
));
7094 if (grouped_load
|| slp_perm
)
7095 dr_chain
.create (vec_num
);
7101 vec_array
= create_vector_array (vectype
, vec_num
);
7104 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7105 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
7106 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
7107 gimple_call_set_lhs (new_stmt
, vec_array
);
7108 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7110 /* Extract each vector into an SSA_NAME. */
7111 for (i
= 0; i
< vec_num
; i
++)
7113 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
7115 dr_chain
.quick_push (new_temp
);
7118 /* Record the mapping between SSA_NAMEs and statements. */
7119 vect_record_grouped_load_vectors (stmt
, dr_chain
);
7123 for (i
= 0; i
< vec_num
; i
++)
7126 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7129 /* 2. Create the vector-load in the loop. */
7130 switch (alignment_support_scheme
)
7133 case dr_unaligned_supported
:
7135 unsigned int align
, misalign
;
7138 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
7141 : build_int_cst (reference_alias_ptr_type
7142 (DR_REF (first_dr
)), 0));
7143 align
= TYPE_ALIGN_UNIT (vectype
);
7144 if (alignment_support_scheme
== dr_aligned
)
7146 gcc_assert (aligned_access_p (first_dr
));
7149 else if (DR_MISALIGNMENT (first_dr
) == -1)
7151 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
7152 align
= TYPE_ALIGN_UNIT (elem_type
);
7154 align
= (get_object_alignment (DR_REF (first_dr
))
7157 TREE_TYPE (data_ref
)
7158 = build_aligned_type (TREE_TYPE (data_ref
),
7159 align
* BITS_PER_UNIT
);
7163 TREE_TYPE (data_ref
)
7164 = build_aligned_type (TREE_TYPE (data_ref
),
7165 TYPE_ALIGN (elem_type
));
7166 misalign
= DR_MISALIGNMENT (first_dr
);
7168 if (dataref_offset
== NULL_TREE
7169 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7170 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7174 case dr_explicit_realign
:
7178 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7180 if (compute_in_loop
)
7181 msq
= vect_setup_realignment (first_stmt
, gsi
,
7183 dr_explicit_realign
,
7186 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7187 ptr
= copy_ssa_name (dataref_ptr
);
7189 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7190 new_stmt
= gimple_build_assign
7191 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7193 (TREE_TYPE (dataref_ptr
),
7194 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7195 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7197 = build2 (MEM_REF
, vectype
, ptr
,
7198 build_int_cst (reference_alias_ptr_type
7199 (DR_REF (first_dr
)), 0));
7200 vec_dest
= vect_create_destination_var (scalar_dest
,
7202 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7203 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7204 gimple_assign_set_lhs (new_stmt
, new_temp
);
7205 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7206 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7207 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7210 bump
= size_binop (MULT_EXPR
, vs
,
7211 TYPE_SIZE_UNIT (elem_type
));
7212 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7213 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7214 new_stmt
= gimple_build_assign
7215 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7218 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7219 ptr
= copy_ssa_name (ptr
, new_stmt
);
7220 gimple_assign_set_lhs (new_stmt
, ptr
);
7221 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7223 = build2 (MEM_REF
, vectype
, ptr
,
7224 build_int_cst (reference_alias_ptr_type
7225 (DR_REF (first_dr
)), 0));
7228 case dr_explicit_realign_optimized
:
7229 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7230 new_temp
= copy_ssa_name (dataref_ptr
);
7232 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7233 new_stmt
= gimple_build_assign
7234 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7236 (TREE_TYPE (dataref_ptr
),
7237 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7238 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7240 = build2 (MEM_REF
, vectype
, new_temp
,
7241 build_int_cst (reference_alias_ptr_type
7242 (DR_REF (first_dr
)), 0));
7247 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7248 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7249 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7250 gimple_assign_set_lhs (new_stmt
, new_temp
);
7251 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7253 /* 3. Handle explicit realignment if necessary/supported.
7255 vec_dest = realign_load (msq, lsq, realignment_token) */
7256 if (alignment_support_scheme
== dr_explicit_realign_optimized
7257 || alignment_support_scheme
== dr_explicit_realign
)
7259 lsq
= gimple_assign_lhs (new_stmt
);
7260 if (!realignment_token
)
7261 realignment_token
= dataref_ptr
;
7262 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7263 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7264 msq
, lsq
, realignment_token
);
7265 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7266 gimple_assign_set_lhs (new_stmt
, new_temp
);
7267 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7269 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7272 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7273 add_phi_arg (phi
, lsq
,
7274 loop_latch_edge (containing_loop
),
7280 /* 4. Handle invariant-load. */
7281 if (inv_p
&& !bb_vinfo
)
7283 gcc_assert (!grouped_load
);
7284 /* If we have versioned for aliasing or the loop doesn't
7285 have any data dependencies that would preclude this,
7286 then we are sure this is a loop invariant load and
7287 thus we can insert it on the preheader edge. */
7288 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7289 && !nested_in_vect_loop
7290 && hoist_defs_of_uses (stmt
, loop
))
7292 if (dump_enabled_p ())
7294 dump_printf_loc (MSG_NOTE
, vect_location
,
7295 "hoisting out of the vectorized "
7297 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7299 tree tem
= copy_ssa_name (scalar_dest
);
7300 gsi_insert_on_edge_immediate
7301 (loop_preheader_edge (loop
),
7302 gimple_build_assign (tem
,
7304 (gimple_assign_rhs1 (stmt
))));
7305 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7306 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7307 set_vinfo_for_stmt (new_stmt
,
7308 new_stmt_vec_info (new_stmt
, vinfo
));
7312 gimple_stmt_iterator gsi2
= *gsi
;
7314 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7316 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7322 tree perm_mask
= perm_mask_for_reverse (vectype
);
7323 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7324 perm_mask
, stmt
, gsi
);
7325 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7328 /* Collect vector loads and later create their permutation in
7329 vect_transform_grouped_load (). */
7330 if (grouped_load
|| slp_perm
)
7331 dr_chain
.quick_push (new_temp
);
7333 /* Store vector loads in the corresponding SLP_NODE. */
7334 if (slp
&& !slp_perm
)
7335 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7337 /* Bump the vector pointer to account for a gap or for excess
7338 elements loaded for a permuted SLP load. */
7339 if (group_gap_adj
!= 0)
7343 = wide_int_to_tree (sizetype
,
7344 wi::smul (TYPE_SIZE_UNIT (elem_type
),
7345 group_gap_adj
, &ovf
));
7346 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7351 if (slp
&& !slp_perm
)
7356 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7357 slp_node_instance
, false))
7359 dr_chain
.release ();
7368 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7369 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7374 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7376 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7377 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7380 dr_chain
.release ();
7386 /* Function vect_is_simple_cond.
7389 LOOP - the loop that is being vectorized.
7390 COND - Condition that is checked for simple use.
7393 *COMP_VECTYPE - the vector type for the comparison.
7395 Returns whether a COND can be vectorized. Checks whether
7396 condition operands are supportable using vec_is_simple_use. */
7399 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, tree
*comp_vectype
)
7402 enum vect_def_type dt
;
7403 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7406 if (TREE_CODE (cond
) == SSA_NAME
7407 && TREE_CODE (TREE_TYPE (cond
)) == BOOLEAN_TYPE
)
7409 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
7410 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
7413 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
7418 if (!COMPARISON_CLASS_P (cond
))
7421 lhs
= TREE_OPERAND (cond
, 0);
7422 rhs
= TREE_OPERAND (cond
, 1);
7424 if (TREE_CODE (lhs
) == SSA_NAME
)
7426 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
7427 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dt
, &vectype1
))
7430 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
7431 && TREE_CODE (lhs
) != FIXED_CST
)
7434 if (TREE_CODE (rhs
) == SSA_NAME
)
7436 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7437 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dt
, &vectype2
))
7440 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
7441 && TREE_CODE (rhs
) != FIXED_CST
)
7444 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7448 /* vectorizable_condition.
7450 Check if STMT is conditional modify expression that can be vectorized.
7451 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7452 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7455 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7456 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7457 else clause if it is 2).
7459 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7462 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7463 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
7466 tree scalar_dest
= NULL_TREE
;
7467 tree vec_dest
= NULL_TREE
;
7468 tree cond_expr
, then_clause
, else_clause
;
7469 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7470 tree comp_vectype
= NULL_TREE
;
7471 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7472 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7473 tree vec_compare
, vec_cond_expr
;
7475 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7476 enum vect_def_type dt
, dts
[4];
7478 enum tree_code code
;
7479 stmt_vec_info prev_stmt_info
= NULL
;
7481 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7482 vec
<tree
> vec_oprnds0
= vNULL
;
7483 vec
<tree
> vec_oprnds1
= vNULL
;
7484 vec
<tree
> vec_oprnds2
= vNULL
;
7485 vec
<tree
> vec_oprnds3
= vNULL
;
7487 bool masked
= false;
7489 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7492 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == TREE_CODE_REDUCTION
)
7494 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7497 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7498 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7502 /* FORNOW: not yet supported. */
7503 if (STMT_VINFO_LIVE_P (stmt_info
))
7505 if (dump_enabled_p ())
7506 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7507 "value used after loop.\n");
7512 /* Is vectorizable conditional operation? */
7513 if (!is_gimple_assign (stmt
))
7516 code
= gimple_assign_rhs_code (stmt
);
7518 if (code
!= COND_EXPR
)
7521 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7522 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7524 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
7527 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7529 gcc_assert (ncopies
>= 1);
7530 if (reduc_index
&& ncopies
> 1)
7531 return false; /* FORNOW */
7533 cond_expr
= gimple_assign_rhs1 (stmt
);
7534 then_clause
= gimple_assign_rhs2 (stmt
);
7535 else_clause
= gimple_assign_rhs3 (stmt
);
7537 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
, &comp_vectype
)
7542 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
))
7544 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
))
7547 if (VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
7549 vec_cmp_type
= comp_vectype
;
7553 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
7554 if (vec_cmp_type
== NULL_TREE
)
7559 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
7560 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
7567 vec_oprnds0
.create (1);
7568 vec_oprnds1
.create (1);
7569 vec_oprnds2
.create (1);
7570 vec_oprnds3
.create (1);
7574 scalar_dest
= gimple_assign_lhs (stmt
);
7575 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7577 /* Handle cond expr. */
7578 for (j
= 0; j
< ncopies
; j
++)
7580 gassign
*new_stmt
= NULL
;
7585 auto_vec
<tree
, 4> ops
;
7586 auto_vec
<vec
<tree
>, 4> vec_defs
;
7589 ops
.safe_push (cond_expr
);
7592 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
7593 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
7595 ops
.safe_push (then_clause
);
7596 ops
.safe_push (else_clause
);
7597 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7598 vec_oprnds3
= vec_defs
.pop ();
7599 vec_oprnds2
= vec_defs
.pop ();
7601 vec_oprnds1
= vec_defs
.pop ();
7602 vec_oprnds0
= vec_defs
.pop ();
7605 vec_defs
.release ();
7613 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
7615 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
7621 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
7622 stmt
, comp_vectype
);
7623 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0),
7624 loop_vinfo
, >emp
, &dts
[0]);
7627 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
7628 stmt
, comp_vectype
);
7629 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1),
7630 loop_vinfo
, >emp
, &dts
[1]);
7632 if (reduc_index
== 1)
7633 vec_then_clause
= reduc_def
;
7636 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
7638 vect_is_simple_use (then_clause
, loop_vinfo
,
7641 if (reduc_index
== 2)
7642 vec_else_clause
= reduc_def
;
7645 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
7647 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
7654 = vect_get_vec_def_for_stmt_copy (dts
[0],
7655 vec_oprnds0
.pop ());
7658 = vect_get_vec_def_for_stmt_copy (dts
[1],
7659 vec_oprnds1
.pop ());
7661 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
7662 vec_oprnds2
.pop ());
7663 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
7664 vec_oprnds3
.pop ());
7669 vec_oprnds0
.quick_push (vec_cond_lhs
);
7671 vec_oprnds1
.quick_push (vec_cond_rhs
);
7672 vec_oprnds2
.quick_push (vec_then_clause
);
7673 vec_oprnds3
.quick_push (vec_else_clause
);
7676 /* Arguments are ready. Create the new vector stmt. */
7677 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
7679 vec_then_clause
= vec_oprnds2
[i
];
7680 vec_else_clause
= vec_oprnds3
[i
];
7683 vec_compare
= vec_cond_lhs
;
7686 vec_cond_rhs
= vec_oprnds1
[i
];
7687 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
7688 vec_cond_lhs
, vec_cond_rhs
);
7690 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
7691 vec_compare
, vec_then_clause
, vec_else_clause
);
7693 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
7694 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7695 gimple_assign_set_lhs (new_stmt
, new_temp
);
7696 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7698 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7705 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7707 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7709 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7712 vec_oprnds0
.release ();
7713 vec_oprnds1
.release ();
7714 vec_oprnds2
.release ();
7715 vec_oprnds3
.release ();
7720 /* vectorizable_comparison.
7722 Check if STMT is comparison expression that can be vectorized.
7723 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7724 comparison, put it in VEC_STMT, and insert it at GSI.
7726 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7729 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7730 gimple
**vec_stmt
, tree reduc_def
,
7733 tree lhs
, rhs1
, rhs2
;
7734 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7735 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7736 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7737 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
7739 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7740 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
7743 enum tree_code code
;
7744 stmt_vec_info prev_stmt_info
= NULL
;
7746 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7747 vec
<tree
> vec_oprnds0
= vNULL
;
7748 vec
<tree
> vec_oprnds1
= vNULL
;
7753 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7756 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
7759 mask_type
= vectype
;
7760 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7762 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
7765 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7767 gcc_assert (ncopies
>= 1);
7768 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7769 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7773 if (STMT_VINFO_LIVE_P (stmt_info
))
7775 if (dump_enabled_p ())
7776 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7777 "value used after loop.\n");
7781 if (!is_gimple_assign (stmt
))
7784 code
= gimple_assign_rhs_code (stmt
);
7786 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
7789 rhs1
= gimple_assign_rhs1 (stmt
);
7790 rhs2
= gimple_assign_rhs2 (stmt
);
7792 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
7793 &dts
[0], &vectype1
))
7796 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
7797 &dts
[1], &vectype2
))
7800 if (vectype1
&& vectype2
7801 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7804 vectype
= vectype1
? vectype1
: vectype2
;
7806 /* Invariant comparison. */
7809 vectype
= build_vector_type (TREE_TYPE (rhs1
), nunits
);
7810 if (tree_to_shwi (TYPE_SIZE_UNIT (vectype
)) != current_vector_size
)
7813 else if (nunits
!= TYPE_VECTOR_SUBPARTS (vectype
))
7818 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
7819 vect_model_simple_cost (stmt_info
, ncopies
, dts
, NULL
, NULL
);
7820 return expand_vec_cmp_expr_p (vectype
, mask_type
);
7826 vec_oprnds0
.create (1);
7827 vec_oprnds1
.create (1);
7831 lhs
= gimple_assign_lhs (stmt
);
7832 mask
= vect_create_destination_var (lhs
, mask_type
);
7834 /* Handle cmp expr. */
7835 for (j
= 0; j
< ncopies
; j
++)
7837 gassign
*new_stmt
= NULL
;
7842 auto_vec
<tree
, 2> ops
;
7843 auto_vec
<vec
<tree
>, 2> vec_defs
;
7845 ops
.safe_push (rhs1
);
7846 ops
.safe_push (rhs2
);
7847 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7848 vec_oprnds1
= vec_defs
.pop ();
7849 vec_oprnds0
= vec_defs
.pop ();
7853 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
7854 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
7859 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
7860 vec_oprnds0
.pop ());
7861 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
7862 vec_oprnds1
.pop ());
7867 vec_oprnds0
.quick_push (vec_rhs1
);
7868 vec_oprnds1
.quick_push (vec_rhs2
);
7871 /* Arguments are ready. Create the new vector stmt. */
7872 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
7874 vec_rhs2
= vec_oprnds1
[i
];
7876 new_temp
= make_ssa_name (mask
);
7877 new_stmt
= gimple_build_assign (new_temp
, code
, vec_rhs1
, vec_rhs2
);
7878 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7880 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7887 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7889 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7891 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7894 vec_oprnds0
.release ();
7895 vec_oprnds1
.release ();
7900 /* Make sure the statement is vectorizable. */
7903 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
)
7905 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7906 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7907 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
7909 tree scalar_type
, vectype
;
7910 gimple
*pattern_stmt
;
7911 gimple_seq pattern_def_seq
;
7913 if (dump_enabled_p ())
7915 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
7916 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7919 if (gimple_has_volatile_ops (stmt
))
7921 if (dump_enabled_p ())
7922 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7923 "not vectorized: stmt has volatile operands\n");
7928 /* Skip stmts that do not need to be vectorized. In loops this is expected
7930 - the COND_EXPR which is the loop exit condition
7931 - any LABEL_EXPRs in the loop
7932 - computations that are used only for array indexing or loop control.
7933 In basic blocks we only analyze statements that are a part of some SLP
7934 instance, therefore, all the statements are relevant.
7936 Pattern statement needs to be analyzed instead of the original statement
7937 if the original statement is not relevant. Otherwise, we analyze both
7938 statements. In basic blocks we are called from some SLP instance
7939 traversal, don't analyze pattern stmts instead, the pattern stmts
7940 already will be part of SLP instance. */
7942 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
7943 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
7944 && !STMT_VINFO_LIVE_P (stmt_info
))
7946 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7948 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7949 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7951 /* Analyze PATTERN_STMT instead of the original stmt. */
7952 stmt
= pattern_stmt
;
7953 stmt_info
= vinfo_for_stmt (pattern_stmt
);
7954 if (dump_enabled_p ())
7956 dump_printf_loc (MSG_NOTE
, vect_location
,
7957 "==> examining pattern statement: ");
7958 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7963 if (dump_enabled_p ())
7964 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
7969 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7972 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7973 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7975 /* Analyze PATTERN_STMT too. */
7976 if (dump_enabled_p ())
7978 dump_printf_loc (MSG_NOTE
, vect_location
,
7979 "==> examining pattern statement: ");
7980 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7983 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
7987 if (is_pattern_stmt_p (stmt_info
)
7989 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
7991 gimple_stmt_iterator si
;
7993 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
7995 gimple
*pattern_def_stmt
= gsi_stmt (si
);
7996 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
7997 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
7999 /* Analyze def stmt of STMT if it's a pattern stmt. */
8000 if (dump_enabled_p ())
8002 dump_printf_loc (MSG_NOTE
, vect_location
,
8003 "==> examining pattern def statement: ");
8004 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
8007 if (!vect_analyze_stmt (pattern_def_stmt
,
8008 need_to_vectorize
, node
))
8014 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
8016 case vect_internal_def
:
8019 case vect_reduction_def
:
8020 case vect_nested_cycle
:
8021 gcc_assert (!bb_vinfo
8022 && (relevance
== vect_used_in_outer
8023 || relevance
== vect_used_in_outer_by_reduction
8024 || relevance
== vect_used_by_reduction
8025 || relevance
== vect_unused_in_scope
));
8028 case vect_induction_def
:
8029 case vect_constant_def
:
8030 case vect_external_def
:
8031 case vect_unknown_def_type
:
8038 gcc_assert (PURE_SLP_STMT (stmt_info
));
8040 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
8041 if (dump_enabled_p ())
8043 dump_printf_loc (MSG_NOTE
, vect_location
,
8044 "get vectype for scalar type: ");
8045 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
8046 dump_printf (MSG_NOTE
, "\n");
8049 vectype
= get_vectype_for_scalar_type (scalar_type
);
8052 if (dump_enabled_p ())
8054 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8055 "not SLPed: unsupported data-type ");
8056 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
8058 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
8063 if (dump_enabled_p ())
8065 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
8066 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
8067 dump_printf (MSG_NOTE
, "\n");
8070 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
8073 if (STMT_VINFO_RELEVANT_P (stmt_info
))
8075 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
8076 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
8077 || (is_gimple_call (stmt
)
8078 && gimple_call_lhs (stmt
) == NULL_TREE
));
8079 *need_to_vectorize
= true;
8082 if (PURE_SLP_STMT (stmt_info
) && !node
)
8084 dump_printf_loc (MSG_NOTE
, vect_location
,
8085 "handled only by SLP analysis\n");
8091 && (STMT_VINFO_RELEVANT_P (stmt_info
)
8092 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
8093 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8094 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8095 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8096 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8097 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8098 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8099 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8100 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8101 || vectorizable_reduction (stmt
, NULL
, NULL
, node
)
8102 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8103 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8107 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8108 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8109 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8110 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8111 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8112 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8113 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8114 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8115 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8116 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8121 if (dump_enabled_p ())
8123 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8124 "not vectorized: relevant stmt not ");
8125 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8126 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8135 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8136 need extra handling, except for vectorizable reductions. */
8137 if (STMT_VINFO_LIVE_P (stmt_info
)
8138 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8139 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
8143 if (dump_enabled_p ())
8145 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8146 "not vectorized: live stmt not ");
8147 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8148 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8158 /* Function vect_transform_stmt.
8160 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8163 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8164 bool *grouped_store
, slp_tree slp_node
,
8165 slp_instance slp_node_instance
)
8167 bool is_store
= false;
8168 gimple
*vec_stmt
= NULL
;
8169 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8172 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8174 switch (STMT_VINFO_TYPE (stmt_info
))
8176 case type_demotion_vec_info_type
:
8177 case type_promotion_vec_info_type
:
8178 case type_conversion_vec_info_type
:
8179 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
8183 case induc_vec_info_type
:
8184 gcc_assert (!slp_node
);
8185 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
8189 case shift_vec_info_type
:
8190 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
8194 case op_vec_info_type
:
8195 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
8199 case assignment_vec_info_type
:
8200 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
8204 case load_vec_info_type
:
8205 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
8210 case store_vec_info_type
:
8211 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
8213 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
8215 /* In case of interleaving, the whole chain is vectorized when the
8216 last store in the chain is reached. Store stmts before the last
8217 one are skipped, and there vec_stmt_info shouldn't be freed
8219 *grouped_store
= true;
8220 if (STMT_VINFO_VEC_STMT (stmt_info
))
8227 case condition_vec_info_type
:
8228 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
8232 case comparison_vec_info_type
:
8233 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
8237 case call_vec_info_type
:
8238 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8239 stmt
= gsi_stmt (*gsi
);
8240 if (is_gimple_call (stmt
)
8241 && gimple_call_internal_p (stmt
)
8242 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
8246 case call_simd_clone_vec_info_type
:
8247 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8248 stmt
= gsi_stmt (*gsi
);
8251 case reduc_vec_info_type
:
8252 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
8257 if (!STMT_VINFO_LIVE_P (stmt_info
))
8259 if (dump_enabled_p ())
8260 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8261 "stmt not supported.\n");
8266 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8267 This would break hybrid SLP vectorization. */
8269 gcc_assert (!vec_stmt
8270 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
8272 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8273 is being vectorized, but outside the immediately enclosing loop. */
8275 && STMT_VINFO_LOOP_VINFO (stmt_info
)
8276 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8277 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
8278 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8279 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
8280 || STMT_VINFO_RELEVANT (stmt_info
) ==
8281 vect_used_in_outer_by_reduction
))
8283 struct loop
*innerloop
= LOOP_VINFO_LOOP (
8284 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
8285 imm_use_iterator imm_iter
;
8286 use_operand_p use_p
;
8290 if (dump_enabled_p ())
8291 dump_printf_loc (MSG_NOTE
, vect_location
,
8292 "Record the vdef for outer-loop vectorization.\n");
8294 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8295 (to be used when vectorizing outer-loop stmts that use the DEF of
8297 if (gimple_code (stmt
) == GIMPLE_PHI
)
8298 scalar_dest
= PHI_RESULT (stmt
);
8300 scalar_dest
= gimple_assign_lhs (stmt
);
8302 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
8304 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
8306 exit_phi
= USE_STMT (use_p
);
8307 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
8312 /* Handle stmts whose DEF is used outside the loop-nest that is
8313 being vectorized. */
8314 if (STMT_VINFO_LIVE_P (stmt_info
)
8315 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8317 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
8322 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
8328 /* Remove a group of stores (for SLP or interleaving), free their
8332 vect_remove_stores (gimple
*first_stmt
)
8334 gimple
*next
= first_stmt
;
8336 gimple_stmt_iterator next_si
;
8340 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
8342 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
8343 if (is_pattern_stmt_p (stmt_info
))
8344 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
8345 /* Free the attached stmt_vec_info and remove the stmt. */
8346 next_si
= gsi_for_stmt (next
);
8347 unlink_stmt_vdef (next
);
8348 gsi_remove (&next_si
, true);
8349 release_defs (next
);
8350 free_stmt_vec_info (next
);
8356 /* Function new_stmt_vec_info.
8358 Create and initialize a new stmt_vec_info struct for STMT. */
8361 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
8364 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
8366 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
8367 STMT_VINFO_STMT (res
) = stmt
;
8369 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
8370 STMT_VINFO_LIVE_P (res
) = false;
8371 STMT_VINFO_VECTYPE (res
) = NULL
;
8372 STMT_VINFO_VEC_STMT (res
) = NULL
;
8373 STMT_VINFO_VECTORIZABLE (res
) = true;
8374 STMT_VINFO_IN_PATTERN_P (res
) = false;
8375 STMT_VINFO_RELATED_STMT (res
) = NULL
;
8376 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
8377 STMT_VINFO_DATA_REF (res
) = NULL
;
8378 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
8380 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
8381 STMT_VINFO_DR_OFFSET (res
) = NULL
;
8382 STMT_VINFO_DR_INIT (res
) = NULL
;
8383 STMT_VINFO_DR_STEP (res
) = NULL
;
8384 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
8386 if (gimple_code (stmt
) == GIMPLE_PHI
8387 && is_loop_header_bb_p (gimple_bb (stmt
)))
8388 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
8390 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
8392 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
8393 STMT_SLP_TYPE (res
) = loop_vect
;
8394 STMT_VINFO_NUM_SLP_USES (res
) = 0;
8396 GROUP_FIRST_ELEMENT (res
) = NULL
;
8397 GROUP_NEXT_ELEMENT (res
) = NULL
;
8398 GROUP_SIZE (res
) = 0;
8399 GROUP_STORE_COUNT (res
) = 0;
8400 GROUP_GAP (res
) = 0;
8401 GROUP_SAME_DR_STMT (res
) = NULL
;
8407 /* Create a hash table for stmt_vec_info. */
8410 init_stmt_vec_info_vec (void)
8412 gcc_assert (!stmt_vec_info_vec
.exists ());
8413 stmt_vec_info_vec
.create (50);
8417 /* Free hash table for stmt_vec_info. */
8420 free_stmt_vec_info_vec (void)
8424 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
8426 free_stmt_vec_info (STMT_VINFO_STMT (info
));
8427 gcc_assert (stmt_vec_info_vec
.exists ());
8428 stmt_vec_info_vec
.release ();
8432 /* Free stmt vectorization related info. */
8435 free_stmt_vec_info (gimple
*stmt
)
8437 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8442 /* Check if this statement has a related "pattern stmt"
8443 (introduced by the vectorizer during the pattern recognition
8444 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8446 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
8448 stmt_vec_info patt_info
8449 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8452 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
8453 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
8454 gimple_set_bb (patt_stmt
, NULL
);
8455 tree lhs
= gimple_get_lhs (patt_stmt
);
8456 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8457 release_ssa_name (lhs
);
8460 gimple_stmt_iterator si
;
8461 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
8463 gimple
*seq_stmt
= gsi_stmt (si
);
8464 gimple_set_bb (seq_stmt
, NULL
);
8465 lhs
= gimple_get_lhs (seq_stmt
);
8466 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8467 release_ssa_name (lhs
);
8468 free_stmt_vec_info (seq_stmt
);
8471 free_stmt_vec_info (patt_stmt
);
8475 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
8476 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
8477 set_vinfo_for_stmt (stmt
, NULL
);
8482 /* Function get_vectype_for_scalar_type_and_size.
8484 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8488 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
8490 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
8491 machine_mode simd_mode
;
8492 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
8499 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
8500 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
8503 /* For vector types of elements whose mode precision doesn't
8504 match their types precision we use a element type of mode
8505 precision. The vectorization routines will have to make sure
8506 they support the proper result truncation/extension.
8507 We also make sure to build vector types with INTEGER_TYPE
8508 component type only. */
8509 if (INTEGRAL_TYPE_P (scalar_type
)
8510 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
8511 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
8512 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
8513 TYPE_UNSIGNED (scalar_type
));
8515 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8516 When the component mode passes the above test simply use a type
8517 corresponding to that mode. The theory is that any use that
8518 would cause problems with this will disable vectorization anyway. */
8519 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
8520 && !INTEGRAL_TYPE_P (scalar_type
))
8521 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
8523 /* We can't build a vector type of elements with alignment bigger than
8525 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
8526 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
8527 TYPE_UNSIGNED (scalar_type
));
8529 /* If we felt back to using the mode fail if there was
8530 no scalar type for it. */
8531 if (scalar_type
== NULL_TREE
)
8534 /* If no size was supplied use the mode the target prefers. Otherwise
8535 lookup a vector mode of the specified size. */
8537 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
8539 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
8540 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
8544 vectype
= build_vector_type (scalar_type
, nunits
);
8546 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
8547 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
8553 unsigned int current_vector_size
;
8555 /* Function get_vectype_for_scalar_type.
8557 Returns the vector type corresponding to SCALAR_TYPE as supported
8561 get_vectype_for_scalar_type (tree scalar_type
)
8564 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
8565 current_vector_size
);
8567 && current_vector_size
== 0)
8568 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
8572 /* Function get_mask_type_for_scalar_type.
8574 Returns the mask type corresponding to a result of comparison
8575 of vectors of specified SCALAR_TYPE as supported by target. */
8578 get_mask_type_for_scalar_type (tree scalar_type
)
8580 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
8585 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
8586 current_vector_size
);
8589 /* Function get_same_sized_vectype
8591 Returns a vector type corresponding to SCALAR_TYPE of size
8592 VECTOR_TYPE if supported by the target. */
8595 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
8597 if (TREE_CODE (scalar_type
) == BOOLEAN_TYPE
)
8598 return build_same_sized_truth_vector_type (vector_type
);
8600 return get_vectype_for_scalar_type_and_size
8601 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
8604 /* Function vect_is_simple_use.
8607 VINFO - the vect info of the loop or basic block that is being vectorized.
8608 OPERAND - operand in the loop or bb.
8610 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8611 DT - the type of definition
8613 Returns whether a stmt with OPERAND can be vectorized.
8614 For loops, supportable operands are constants, loop invariants, and operands
8615 that are defined by the current iteration of the loop. Unsupportable
8616 operands are those that are defined by a previous iteration of the loop (as
8617 is the case in reduction/induction computations).
8618 For basic blocks, supportable operands are constants and bb invariants.
8619 For now, operands defined outside the basic block are not supported. */
8622 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
8623 gimple
**def_stmt
, enum vect_def_type
*dt
)
8626 *dt
= vect_unknown_def_type
;
8628 if (dump_enabled_p ())
8630 dump_printf_loc (MSG_NOTE
, vect_location
,
8631 "vect_is_simple_use: operand ");
8632 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
8633 dump_printf (MSG_NOTE
, "\n");
8636 if (CONSTANT_CLASS_P (operand
))
8638 *dt
= vect_constant_def
;
8642 if (is_gimple_min_invariant (operand
))
8644 *dt
= vect_external_def
;
8648 if (TREE_CODE (operand
) != SSA_NAME
)
8650 if (dump_enabled_p ())
8651 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8656 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
8658 *dt
= vect_external_def
;
8662 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
8663 if (dump_enabled_p ())
8665 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
8666 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
8669 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
8670 *dt
= vect_external_def
;
8673 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
8674 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
8677 if (dump_enabled_p ())
8679 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
8682 case vect_uninitialized_def
:
8683 dump_printf (MSG_NOTE
, "uninitialized\n");
8685 case vect_constant_def
:
8686 dump_printf (MSG_NOTE
, "constant\n");
8688 case vect_external_def
:
8689 dump_printf (MSG_NOTE
, "external\n");
8691 case vect_internal_def
:
8692 dump_printf (MSG_NOTE
, "internal\n");
8694 case vect_induction_def
:
8695 dump_printf (MSG_NOTE
, "induction\n");
8697 case vect_reduction_def
:
8698 dump_printf (MSG_NOTE
, "reduction\n");
8700 case vect_double_reduction_def
:
8701 dump_printf (MSG_NOTE
, "double reduction\n");
8703 case vect_nested_cycle
:
8704 dump_printf (MSG_NOTE
, "nested cycle\n");
8706 case vect_unknown_def_type
:
8707 dump_printf (MSG_NOTE
, "unknown\n");
8712 if (*dt
== vect_unknown_def_type
)
8714 if (dump_enabled_p ())
8715 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8716 "Unsupported pattern.\n");
8720 switch (gimple_code (*def_stmt
))
8727 if (dump_enabled_p ())
8728 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8729 "unsupported defining stmt:\n");
8736 /* Function vect_is_simple_use.
8738 Same as vect_is_simple_use but also determines the vector operand
8739 type of OPERAND and stores it to *VECTYPE. If the definition of
8740 OPERAND is vect_uninitialized_def, vect_constant_def or
8741 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8742 is responsible to compute the best suited vector type for the
8746 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
8747 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
8749 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
8752 /* Now get a vector type if the def is internal, otherwise supply
8753 NULL_TREE and leave it up to the caller to figure out a proper
8754 type for the use stmt. */
8755 if (*dt
== vect_internal_def
8756 || *dt
== vect_induction_def
8757 || *dt
== vect_reduction_def
8758 || *dt
== vect_double_reduction_def
8759 || *dt
== vect_nested_cycle
)
8761 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
8763 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8764 && !STMT_VINFO_RELEVANT (stmt_info
)
8765 && !STMT_VINFO_LIVE_P (stmt_info
))
8766 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8768 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8769 gcc_assert (*vectype
!= NULL_TREE
);
8771 else if (*dt
== vect_uninitialized_def
8772 || *dt
== vect_constant_def
8773 || *dt
== vect_external_def
)
8774 *vectype
= NULL_TREE
;
8782 /* Function supportable_widening_operation
8784 Check whether an operation represented by the code CODE is a
8785 widening operation that is supported by the target platform in
8786 vector form (i.e., when operating on arguments of type VECTYPE_IN
8787 producing a result of type VECTYPE_OUT).
8789 Widening operations we currently support are NOP (CONVERT), FLOAT
8790 and WIDEN_MULT. This function checks if these operations are supported
8791 by the target platform either directly (via vector tree-codes), or via
8795 - CODE1 and CODE2 are codes of vector operations to be used when
8796 vectorizing the operation, if available.
8797 - MULTI_STEP_CVT determines the number of required intermediate steps in
8798 case of multi-step conversion (like char->short->int - in that case
8799 MULTI_STEP_CVT will be 1).
8800 - INTERM_TYPES contains the intermediate type required to perform the
8801 widening operation (short in the above example). */
8804 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
8805 tree vectype_out
, tree vectype_in
,
8806 enum tree_code
*code1
, enum tree_code
*code2
,
8807 int *multi_step_cvt
,
8808 vec
<tree
> *interm_types
)
8810 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8811 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8812 struct loop
*vect_loop
= NULL
;
8813 machine_mode vec_mode
;
8814 enum insn_code icode1
, icode2
;
8815 optab optab1
, optab2
;
8816 tree vectype
= vectype_in
;
8817 tree wide_vectype
= vectype_out
;
8818 enum tree_code c1
, c2
;
8820 tree prev_type
, intermediate_type
;
8821 machine_mode intermediate_mode
, prev_mode
;
8822 optab optab3
, optab4
;
8824 *multi_step_cvt
= 0;
8826 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
8830 case WIDEN_MULT_EXPR
:
8831 /* The result of a vectorized widening operation usually requires
8832 two vectors (because the widened results do not fit into one vector).
8833 The generated vector results would normally be expected to be
8834 generated in the same order as in the original scalar computation,
8835 i.e. if 8 results are generated in each vector iteration, they are
8836 to be organized as follows:
8837 vect1: [res1,res2,res3,res4],
8838 vect2: [res5,res6,res7,res8].
8840 However, in the special case that the result of the widening
8841 operation is used in a reduction computation only, the order doesn't
8842 matter (because when vectorizing a reduction we change the order of
8843 the computation). Some targets can take advantage of this and
8844 generate more efficient code. For example, targets like Altivec,
8845 that support widen_mult using a sequence of {mult_even,mult_odd}
8846 generate the following vectors:
8847 vect1: [res1,res3,res5,res7],
8848 vect2: [res2,res4,res6,res8].
8850 When vectorizing outer-loops, we execute the inner-loop sequentially
8851 (each vectorized inner-loop iteration contributes to VF outer-loop
8852 iterations in parallel). We therefore don't allow to change the
8853 order of the computation in the inner-loop during outer-loop
8855 /* TODO: Another case in which order doesn't *really* matter is when we
8856 widen and then contract again, e.g. (short)((int)x * y >> 8).
8857 Normally, pack_trunc performs an even/odd permute, whereas the
8858 repack from an even/odd expansion would be an interleave, which
8859 would be significantly simpler for e.g. AVX2. */
8860 /* In any case, in order to avoid duplicating the code below, recurse
8861 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8862 are properly set up for the caller. If we fail, we'll continue with
8863 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8865 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
8866 && !nested_in_vect_loop_p (vect_loop
, stmt
)
8867 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
8868 stmt
, vectype_out
, vectype_in
,
8869 code1
, code2
, multi_step_cvt
,
8872 /* Elements in a vector with vect_used_by_reduction property cannot
8873 be reordered if the use chain with this property does not have the
8874 same operation. One such an example is s += a * b, where elements
8875 in a and b cannot be reordered. Here we check if the vector defined
8876 by STMT is only directly used in the reduction statement. */
8877 tree lhs
= gimple_assign_lhs (stmt
);
8878 use_operand_p dummy
;
8880 stmt_vec_info use_stmt_info
= NULL
;
8881 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
8882 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
8883 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
8886 c1
= VEC_WIDEN_MULT_LO_EXPR
;
8887 c2
= VEC_WIDEN_MULT_HI_EXPR
;
8900 case VEC_WIDEN_MULT_EVEN_EXPR
:
8901 /* Support the recursion induced just above. */
8902 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
8903 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
8906 case WIDEN_LSHIFT_EXPR
:
8907 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
8908 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
8912 c1
= VEC_UNPACK_LO_EXPR
;
8913 c2
= VEC_UNPACK_HI_EXPR
;
8917 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
8918 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
8921 case FIX_TRUNC_EXPR
:
8922 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8923 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8924 computing the operation. */
8931 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
8934 if (code
== FIX_TRUNC_EXPR
)
8936 /* The signedness is determined from output operand. */
8937 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8938 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
8942 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8943 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
8946 if (!optab1
|| !optab2
)
8949 vec_mode
= TYPE_MODE (vectype
);
8950 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
8951 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
8957 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8958 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8961 /* Check if it's a multi-step conversion that can be done using intermediate
8964 prev_type
= vectype
;
8965 prev_mode
= vec_mode
;
8967 if (!CONVERT_EXPR_CODE_P (code
))
8970 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8971 intermediate steps in promotion sequence. We try
8972 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8974 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8975 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8977 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8979 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
8980 TYPE_UNSIGNED (prev_type
));
8981 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8982 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
8984 if (!optab3
|| !optab4
8985 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
8986 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8987 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
8988 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
8989 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
8990 == CODE_FOR_nothing
)
8991 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
8992 == CODE_FOR_nothing
))
8995 interm_types
->quick_push (intermediate_type
);
8996 (*multi_step_cvt
)++;
8998 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8999 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9002 prev_type
= intermediate_type
;
9003 prev_mode
= intermediate_mode
;
9006 interm_types
->release ();
9011 /* Function supportable_narrowing_operation
9013 Check whether an operation represented by the code CODE is a
9014 narrowing operation that is supported by the target platform in
9015 vector form (i.e., when operating on arguments of type VECTYPE_IN
9016 and producing a result of type VECTYPE_OUT).
9018 Narrowing operations we currently support are NOP (CONVERT) and
9019 FIX_TRUNC. This function checks if these operations are supported by
9020 the target platform directly via vector tree-codes.
9023 - CODE1 is the code of a vector operation to be used when
9024 vectorizing the operation, if available.
9025 - MULTI_STEP_CVT determines the number of required intermediate steps in
9026 case of multi-step conversion (like int->short->char - in that case
9027 MULTI_STEP_CVT will be 1).
9028 - INTERM_TYPES contains the intermediate type required to perform the
9029 narrowing operation (short in the above example). */
9032 supportable_narrowing_operation (enum tree_code code
,
9033 tree vectype_out
, tree vectype_in
,
9034 enum tree_code
*code1
, int *multi_step_cvt
,
9035 vec
<tree
> *interm_types
)
9037 machine_mode vec_mode
;
9038 enum insn_code icode1
;
9039 optab optab1
, interm_optab
;
9040 tree vectype
= vectype_in
;
9041 tree narrow_vectype
= vectype_out
;
9043 tree intermediate_type
;
9044 machine_mode intermediate_mode
, prev_mode
;
9048 *multi_step_cvt
= 0;
9052 c1
= VEC_PACK_TRUNC_EXPR
;
9055 case FIX_TRUNC_EXPR
:
9056 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
9060 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9061 tree code and optabs used for computing the operation. */
9068 if (code
== FIX_TRUNC_EXPR
)
9069 /* The signedness is determined from output operand. */
9070 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9072 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9077 vec_mode
= TYPE_MODE (vectype
);
9078 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
9083 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9086 /* Check if it's a multi-step conversion that can be done using intermediate
9088 prev_mode
= vec_mode
;
9089 if (code
== FIX_TRUNC_EXPR
)
9090 uns
= TYPE_UNSIGNED (vectype_out
);
9092 uns
= TYPE_UNSIGNED (vectype
);
9094 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9095 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9096 costly than signed. */
9097 if (code
== FIX_TRUNC_EXPR
&& uns
)
9099 enum insn_code icode2
;
9102 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
9104 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9105 if (interm_optab
!= unknown_optab
9106 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
9107 && insn_data
[icode1
].operand
[0].mode
9108 == insn_data
[icode2
].operand
[0].mode
)
9111 optab1
= interm_optab
;
9116 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9117 intermediate steps in promotion sequence. We try
9118 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9119 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9120 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9122 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9124 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
9126 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
9129 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
9130 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9131 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
9132 == CODE_FOR_nothing
))
9135 interm_types
->quick_push (intermediate_type
);
9136 (*multi_step_cvt
)++;
9138 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9141 prev_mode
= intermediate_mode
;
9142 optab1
= interm_optab
;
9145 interm_types
->release ();