1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Return the vectorized type for the given statement. */
58 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
60 return STMT_VINFO_VECTYPE (stmt_info
);
63 /* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
66 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
68 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
69 basic_block bb
= gimple_bb (stmt
);
70 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
76 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
78 return (bb
->loop_father
== loop
->inner
);
81 /* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
86 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
87 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
88 int misalign
, enum vect_cost_model_location where
)
92 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
93 stmt_info_for_cost si
= { count
, kind
,
94 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
96 body_cost_vec
->safe_push (si
);
98 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
101 return add_stmt_cost (stmt_info
->vinfo
->target_cost_data
,
102 count
, kind
, stmt_info
, misalign
, where
);
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
108 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
110 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
120 read_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
121 tree array
, unsigned HOST_WIDE_INT n
)
123 tree vect_type
, vect
, vect_name
, array_ref
;
126 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
127 vect_type
= TREE_TYPE (TREE_TYPE (array
));
128 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
129 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
130 build_int_cst (size_type_node
, n
),
131 NULL_TREE
, NULL_TREE
);
133 new_stmt
= gimple_build_assign (vect
, array_ref
);
134 vect_name
= make_ssa_name (vect
, new_stmt
);
135 gimple_assign_set_lhs (new_stmt
, vect_name
);
136 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
146 write_vector_array (gimple
*stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
147 tree array
, unsigned HOST_WIDE_INT n
)
152 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
153 build_int_cst (size_type_node
, n
),
154 NULL_TREE
, NULL_TREE
);
156 new_stmt
= gimple_build_assign (array_ref
, vect
);
157 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
165 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
167 tree mem_ref
, alias_ptr_type
;
169 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
170 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
183 vect_mark_relevant (vec
<gimple
*> *worklist
, gimple
*stmt
,
184 enum vect_relevant relevant
, bool live_p
,
185 bool used_in_pattern
)
187 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
188 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
189 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
190 gimple
*pattern_stmt
;
192 if (dump_enabled_p ())
194 dump_printf_loc (MSG_NOTE
, vect_location
,
195 "mark relevant %d, live %d: ", relevant
, live_p
);
196 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
199 /* If this stmt is an original stmt in a pattern, we might need to mark its
200 related pattern stmt instead of the original stmt. However, such stmts
201 may have their own uses that are not in any pattern, in such cases the
202 stmt itself should be marked. */
203 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
206 if (!used_in_pattern
)
208 imm_use_iterator imm_iter
;
212 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
213 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
215 if (is_gimple_assign (stmt
))
216 lhs
= gimple_assign_lhs (stmt
);
218 lhs
= gimple_call_lhs (stmt
);
220 /* This use is out of pattern use, if LHS has other uses that are
221 pattern uses, we should mark the stmt itself, and not the pattern
223 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
224 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
226 if (is_gimple_debug (USE_STMT (use_p
)))
228 use_stmt
= USE_STMT (use_p
);
230 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
233 if (vinfo_for_stmt (use_stmt
)
234 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
244 /* This is the last stmt in a sequence that was detected as a
245 pattern that can potentially be vectorized. Don't mark the stmt
246 as relevant/live because it's not going to be vectorized.
247 Instead mark the pattern-stmt that replaces it. */
249 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
251 if (dump_enabled_p ())
252 dump_printf_loc (MSG_NOTE
, vect_location
,
253 "last stmt in pattern. don't mark"
254 " relevant/live.\n");
255 stmt_info
= vinfo_for_stmt (pattern_stmt
);
256 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
257 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
258 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
263 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
264 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
265 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
267 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
268 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
270 if (dump_enabled_p ())
271 dump_printf_loc (MSG_NOTE
, vect_location
,
272 "already marked relevant/live.\n");
276 worklist
->safe_push (stmt
);
280 /* Function vect_stmt_relevant_p.
282 Return true if STMT in loop that is represented by LOOP_VINFO is
283 "relevant for vectorization".
285 A stmt is considered "relevant for vectorization" if:
286 - it has uses outside the loop.
287 - it has vdefs (it alters memory).
288 - control stmts in the loop (except for the exit condition).
290 CHECKME: what other side effects would the vectorizer allow? */
293 vect_stmt_relevant_p (gimple
*stmt
, loop_vec_info loop_vinfo
,
294 enum vect_relevant
*relevant
, bool *live_p
)
296 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
298 imm_use_iterator imm_iter
;
302 *relevant
= vect_unused_in_scope
;
305 /* cond stmt other than loop exit cond. */
306 if (is_ctrl_stmt (stmt
)
307 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
308 != loop_exit_ctrl_vec_info_type
)
309 *relevant
= vect_used_in_scope
;
311 /* changing memory. */
312 if (gimple_code (stmt
) != GIMPLE_PHI
)
313 if (gimple_vdef (stmt
)
314 && !gimple_clobber_p (stmt
))
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE
, vect_location
,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant
= vect_used_in_scope
;
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
325 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
327 basic_block bb
= gimple_bb (USE_STMT (use_p
));
328 if (!flow_bb_inside_loop_p (loop
, bb
))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE
, vect_location
,
332 "vec_stmt_relevant_p: used out of loop.\n");
334 if (is_gimple_debug (USE_STMT (use_p
)))
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
340 gcc_assert (bb
== single_exit (loop
)->dest
);
347 return (*live_p
|| *relevant
);
351 /* Function exist_non_indexing_operands_for_use_p
353 USE is one of the uses attached to STMT. Check if USE is
354 used in STMT for anything other than indexing an array. */
357 exist_non_indexing_operands_for_use_p (tree use
, gimple
*stmt
)
360 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
362 /* USE corresponds to some operand in STMT. If there is no data
363 reference in STMT, then any operand that corresponds to USE
364 is not indexing an array. */
365 if (!STMT_VINFO_DATA_REF (stmt_info
))
368 /* STMT has a data_ref. FORNOW this means that its of one of
372 (This should have been verified in analyze_data_refs).
374 'var' in the second case corresponds to a def, not a use,
375 so USE cannot correspond to any operands that are not used
378 Therefore, all we need to check is if STMT falls into the
379 first case, and whether var corresponds to USE. */
381 if (!gimple_assign_copy_p (stmt
))
383 if (is_gimple_call (stmt
)
384 && gimple_call_internal_p (stmt
))
385 switch (gimple_call_internal_fn (stmt
))
388 operand
= gimple_call_arg (stmt
, 3);
393 operand
= gimple_call_arg (stmt
, 2);
403 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
405 operand
= gimple_assign_rhs1 (stmt
);
406 if (TREE_CODE (operand
) != SSA_NAME
)
417 Function process_use.
420 - a USE in STMT in a loop represented by LOOP_VINFO
421 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
422 that defined USE. This is done by calling mark_relevant and passing it
423 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
424 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
428 Generally, LIVE_P and RELEVANT are used to define the liveness and
429 relevance info of the DEF_STMT of this USE:
430 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
431 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
433 - case 1: If USE is used only for address computations (e.g. array indexing),
434 which does not need to be directly vectorized, then the liveness/relevance
435 of the respective DEF_STMT is left unchanged.
436 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
437 skip DEF_STMT cause it had already been processed.
438 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
439 be modified accordingly.
441 Return true if everything is as expected. Return false otherwise. */
444 process_use (gimple
*stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
445 enum vect_relevant relevant
, vec
<gimple
*> *worklist
,
448 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
449 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
450 stmt_vec_info dstmt_vinfo
;
451 basic_block bb
, def_bb
;
453 enum vect_def_type dt
;
455 /* case 1: we are only interested in uses that need to be vectorized. Uses
456 that are used for address computation are not considered relevant. */
457 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
460 if (!vect_is_simple_use (use
, loop_vinfo
, &def_stmt
, &dt
))
462 if (dump_enabled_p ())
463 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
464 "not vectorized: unsupported use in stmt.\n");
468 if (!def_stmt
|| gimple_nop_p (def_stmt
))
471 def_bb
= gimple_bb (def_stmt
);
472 if (!flow_bb_inside_loop_p (loop
, def_bb
))
474 if (dump_enabled_p ())
475 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
479 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
480 DEF_STMT must have already been processed, because this should be the
481 only way that STMT, which is a reduction-phi, was put in the worklist,
482 as there should be no other uses for DEF_STMT in the loop. So we just
483 check that everything is as expected, and we are done. */
484 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
485 bb
= gimple_bb (stmt
);
486 if (gimple_code (stmt
) == GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
488 && gimple_code (def_stmt
) != GIMPLE_PHI
489 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
490 && bb
->loop_father
== def_bb
->loop_father
)
492 if (dump_enabled_p ())
493 dump_printf_loc (MSG_NOTE
, vect_location
,
494 "reduc-stmt defining reduc-phi in the same nest.\n");
495 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
496 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
497 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
498 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
499 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
503 /* case 3a: outer-loop stmt defining an inner-loop stmt:
504 outer-loop-header-bb:
510 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
512 if (dump_enabled_p ())
513 dump_printf_loc (MSG_NOTE
, vect_location
,
514 "outer-loop def-stmt defining inner-loop stmt.\n");
518 case vect_unused_in_scope
:
519 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
520 vect_used_in_scope
: vect_unused_in_scope
;
523 case vect_used_in_outer_by_reduction
:
524 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
525 relevant
= vect_used_by_reduction
;
528 case vect_used_in_outer
:
529 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
530 relevant
= vect_used_in_scope
;
533 case vect_used_in_scope
:
541 /* case 3b: inner-loop stmt defining an outer-loop stmt:
542 outer-loop-header-bb:
546 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
548 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
550 if (dump_enabled_p ())
551 dump_printf_loc (MSG_NOTE
, vect_location
,
552 "inner-loop def-stmt defining outer-loop stmt.\n");
556 case vect_unused_in_scope
:
557 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
558 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
559 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
562 case vect_used_by_reduction
:
563 relevant
= vect_used_in_outer_by_reduction
;
566 case vect_used_in_scope
:
567 relevant
= vect_used_in_outer
;
575 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
576 is_pattern_stmt_p (stmt_vinfo
));
581 /* Function vect_mark_stmts_to_be_vectorized.
583 Not all stmts in the loop need to be vectorized. For example:
592 Stmt 1 and 3 do not need to be vectorized, because loop control and
593 addressing of vectorized data-refs are handled differently.
595 This pass detects such stmts. */
598 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
600 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
601 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
602 unsigned int nbbs
= loop
->num_nodes
;
603 gimple_stmt_iterator si
;
606 stmt_vec_info stmt_vinfo
;
610 enum vect_relevant relevant
, tmp_relevant
;
611 enum vect_def_type def_type
;
613 if (dump_enabled_p ())
614 dump_printf_loc (MSG_NOTE
, vect_location
,
615 "=== vect_mark_stmts_to_be_vectorized ===\n");
617 auto_vec
<gimple
*, 64> worklist
;
619 /* 1. Init worklist. */
620 for (i
= 0; i
< nbbs
; i
++)
623 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
626 if (dump_enabled_p ())
628 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
629 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
632 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
633 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
635 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
637 stmt
= gsi_stmt (si
);
638 if (dump_enabled_p ())
640 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
641 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
644 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
645 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
649 /* 2. Process_worklist */
650 while (worklist
.length () > 0)
655 stmt
= worklist
.pop ();
656 if (dump_enabled_p ())
658 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
659 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant and live/dead according to the
664 liveness and relevance properties of STMT. */
665 stmt_vinfo
= vinfo_for_stmt (stmt
);
666 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
667 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
669 /* Generally, the liveness and relevance properties of STMT are
670 propagated as is to the DEF_STMTs of its USEs:
671 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
672 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
674 One exception is when STMT has been identified as defining a reduction
675 variable; in this case we set the liveness/relevance as follows:
677 relevant = vect_used_by_reduction
678 This is because we distinguish between two kinds of relevant stmts -
679 those that are used by a reduction computation, and those that are
680 (also) used by a regular computation. This allows us later on to
681 identify stmts that are used solely by a reduction, and therefore the
682 order of the results that they produce does not have to be kept. */
684 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
685 tmp_relevant
= relevant
;
688 case vect_reduction_def
:
689 switch (tmp_relevant
)
691 case vect_unused_in_scope
:
692 relevant
= vect_used_by_reduction
;
695 case vect_used_by_reduction
:
696 if (gimple_code (stmt
) == GIMPLE_PHI
)
701 if (dump_enabled_p ())
702 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
703 "unsupported use of reduction.\n");
710 case vect_nested_cycle
:
711 if (tmp_relevant
!= vect_unused_in_scope
712 && tmp_relevant
!= vect_used_in_outer_by_reduction
713 && tmp_relevant
!= vect_used_in_outer
)
715 if (dump_enabled_p ())
716 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
717 "unsupported use of nested cycle.\n");
725 case vect_double_reduction_def
:
726 if (tmp_relevant
!= vect_unused_in_scope
727 && tmp_relevant
!= vect_used_by_reduction
)
729 if (dump_enabled_p ())
730 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
731 "unsupported use of double reduction.\n");
743 if (is_pattern_stmt_p (stmt_vinfo
))
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (is_gimple_assign (stmt
))
750 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
751 tree op
= gimple_assign_rhs1 (stmt
);
754 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
756 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
757 live_p
, relevant
, &worklist
, false)
758 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
759 live_p
, relevant
, &worklist
, false))
763 for (; i
< gimple_num_ops (stmt
); i
++)
765 op
= gimple_op (stmt
, i
);
766 if (TREE_CODE (op
) == SSA_NAME
767 && !process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
772 else if (is_gimple_call (stmt
))
774 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
776 tree arg
= gimple_call_arg (stmt
, i
);
777 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
784 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
786 tree op
= USE_FROM_PTR (use_p
);
787 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
792 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
795 tree decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
797 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
801 } /* while worklist */
807 /* Function vect_model_simple_cost.
809 Models cost for simple operations, i.e. those that only emit ncopies of a
810 single op. Right now, this does not account for multiple insns that could
811 be generated for the single vector op. We will handle that shortly. */
814 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
815 enum vect_def_type
*dt
,
816 stmt_vector_for_cost
*prologue_cost_vec
,
817 stmt_vector_for_cost
*body_cost_vec
)
820 int inside_cost
= 0, prologue_cost
= 0;
822 /* The SLP costs were already calculated during SLP tree build. */
823 if (PURE_SLP_STMT (stmt_info
))
826 /* FORNOW: Assuming maximum 2 args per stmts. */
827 for (i
= 0; i
< 2; i
++)
828 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
829 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
830 stmt_info
, 0, vect_prologue
);
832 /* Pass the inside-of-loop statements to the target-specific cost model. */
833 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
834 stmt_info
, 0, vect_body
);
836 if (dump_enabled_p ())
837 dump_printf_loc (MSG_NOTE
, vect_location
,
838 "vect_model_simple_cost: inside_cost = %d, "
839 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
843 /* Model cost for type demotion and promotion operations. PWR is normally
844 zero for single-step promotions and demotions. It will be one if
845 two-step promotion/demotion is required, and so on. Each additional
846 step doubles the number of instructions required. */
849 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
850 enum vect_def_type
*dt
, int pwr
)
853 int inside_cost
= 0, prologue_cost
= 0;
854 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
855 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
856 void *target_cost_data
;
858 /* The SLP costs were already calculated during SLP tree build. */
859 if (PURE_SLP_STMT (stmt_info
))
863 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
865 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
867 for (i
= 0; i
< pwr
+ 1; i
++)
869 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
871 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
872 vec_promote_demote
, stmt_info
, 0,
876 /* FORNOW: Assuming maximum 2 args per stmts. */
877 for (i
= 0; i
< 2; i
++)
878 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
879 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
880 stmt_info
, 0, vect_prologue
);
882 if (dump_enabled_p ())
883 dump_printf_loc (MSG_NOTE
, vect_location
,
884 "vect_model_promotion_demotion_cost: inside_cost = %d, "
885 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
888 /* Function vect_cost_group_size
890 For grouped load or store, return the group_size only if it is the first
891 load or store of a group, else return 1. This ensures that group size is
892 only returned once per group. */
895 vect_cost_group_size (stmt_vec_info stmt_info
)
897 gimple
*first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
899 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
900 return GROUP_SIZE (stmt_info
);
906 /* Function vect_model_store_cost
908 Models cost for stores. In the case of grouped accesses, one access
909 has the overhead of the grouped access attributed to it. */
912 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
913 bool store_lanes_p
, enum vect_def_type dt
,
915 stmt_vector_for_cost
*prologue_cost_vec
,
916 stmt_vector_for_cost
*body_cost_vec
)
919 unsigned int inside_cost
= 0, prologue_cost
= 0;
920 struct data_reference
*first_dr
;
923 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
924 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
925 stmt_info
, 0, vect_prologue
);
927 /* Grouped access? */
928 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
932 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
937 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
938 group_size
= vect_cost_group_size (stmt_info
);
941 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
943 /* Not a grouped access. */
947 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
950 /* We assume that the cost of a single store-lanes instruction is
951 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
952 access is instead being provided by a permute-and-store operation,
953 include the cost of the permutes. */
954 if (!store_lanes_p
&& group_size
> 1
955 && !STMT_VINFO_STRIDED_P (stmt_info
))
957 /* Uses a high and low interleave or shuffle operations for each
959 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
960 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
961 stmt_info
, 0, vect_body
);
963 if (dump_enabled_p ())
964 dump_printf_loc (MSG_NOTE
, vect_location
,
965 "vect_model_store_cost: strided group_size = %d .\n",
969 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
970 /* Costs of the stores. */
971 if (STMT_VINFO_STRIDED_P (stmt_info
)
972 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
974 /* N scalar stores plus extracting the elements. */
975 inside_cost
+= record_stmt_cost (body_cost_vec
,
976 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
977 scalar_store
, stmt_info
, 0, vect_body
);
980 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
982 if (STMT_VINFO_STRIDED_P (stmt_info
))
983 inside_cost
+= record_stmt_cost (body_cost_vec
,
984 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
985 vec_to_scalar
, stmt_info
, 0, vect_body
);
987 if (dump_enabled_p ())
988 dump_printf_loc (MSG_NOTE
, vect_location
,
989 "vect_model_store_cost: inside_cost = %d, "
990 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
994 /* Calculate cost of DR's memory access. */
996 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
997 unsigned int *inside_cost
,
998 stmt_vector_for_cost
*body_cost_vec
)
1000 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1001 gimple
*stmt
= DR_STMT (dr
);
1002 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1004 switch (alignment_support_scheme
)
1008 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1009 vector_store
, stmt_info
, 0,
1012 if (dump_enabled_p ())
1013 dump_printf_loc (MSG_NOTE
, vect_location
,
1014 "vect_model_store_cost: aligned.\n");
1018 case dr_unaligned_supported
:
1020 /* Here, we assign an additional cost for the unaligned store. */
1021 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1022 unaligned_store
, stmt_info
,
1023 DR_MISALIGNMENT (dr
), vect_body
);
1024 if (dump_enabled_p ())
1025 dump_printf_loc (MSG_NOTE
, vect_location
,
1026 "vect_model_store_cost: unaligned supported by "
1031 case dr_unaligned_unsupported
:
1033 *inside_cost
= VECT_MAX_COST
;
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1037 "vect_model_store_cost: unsupported access.\n");
1047 /* Function vect_model_load_cost
1049 Models cost for loads. In the case of grouped accesses, the last access
1050 has the overhead of the grouped access attributed to it. Since unaligned
1051 accesses are supported for loads, we also account for the costs of the
1052 access scheme chosen. */
1055 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1056 bool load_lanes_p
, slp_tree slp_node
,
1057 stmt_vector_for_cost
*prologue_cost_vec
,
1058 stmt_vector_for_cost
*body_cost_vec
)
1062 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1063 unsigned int inside_cost
= 0, prologue_cost
= 0;
1065 /* Grouped accesses? */
1066 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1067 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1069 group_size
= vect_cost_group_size (stmt_info
);
1070 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1072 /* Not a grouped access. */
1079 /* We assume that the cost of a single load-lanes instruction is
1080 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1081 access is instead being provided by a load-and-permute operation,
1082 include the cost of the permutes. */
1083 if (!load_lanes_p
&& group_size
> 1
1084 && !STMT_VINFO_STRIDED_P (stmt_info
))
1086 /* Uses an even and odd extract operations or shuffle operations
1087 for each needed permute. */
1088 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1089 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1090 stmt_info
, 0, vect_body
);
1092 if (dump_enabled_p ())
1093 dump_printf_loc (MSG_NOTE
, vect_location
,
1094 "vect_model_load_cost: strided group_size = %d .\n",
1098 /* The loads themselves. */
1099 if (STMT_VINFO_STRIDED_P (stmt_info
)
1100 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1102 /* N scalar loads plus gathering them into a vector. */
1103 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1104 inside_cost
+= record_stmt_cost (body_cost_vec
,
1105 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1106 scalar_load
, stmt_info
, 0, vect_body
);
1109 vect_get_load_cost (first_dr
, ncopies
,
1110 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1111 || group_size
> 1 || slp_node
),
1112 &inside_cost
, &prologue_cost
,
1113 prologue_cost_vec
, body_cost_vec
, true);
1114 if (STMT_VINFO_STRIDED_P (stmt_info
))
1115 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1116 stmt_info
, 0, vect_body
);
1118 if (dump_enabled_p ())
1119 dump_printf_loc (MSG_NOTE
, vect_location
,
1120 "vect_model_load_cost: inside_cost = %d, "
1121 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1125 /* Calculate cost of DR's memory access. */
1127 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1128 bool add_realign_cost
, unsigned int *inside_cost
,
1129 unsigned int *prologue_cost
,
1130 stmt_vector_for_cost
*prologue_cost_vec
,
1131 stmt_vector_for_cost
*body_cost_vec
,
1132 bool record_prologue_costs
)
1134 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1135 gimple
*stmt
= DR_STMT (dr
);
1136 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1138 switch (alignment_support_scheme
)
1142 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1143 stmt_info
, 0, vect_body
);
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE
, vect_location
,
1147 "vect_model_load_cost: aligned.\n");
1151 case dr_unaligned_supported
:
1153 /* Here, we assign an additional cost for the unaligned load. */
1154 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1155 unaligned_load
, stmt_info
,
1156 DR_MISALIGNMENT (dr
), vect_body
);
1158 if (dump_enabled_p ())
1159 dump_printf_loc (MSG_NOTE
, vect_location
,
1160 "vect_model_load_cost: unaligned supported by "
1165 case dr_explicit_realign
:
1167 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1168 vector_load
, stmt_info
, 0, vect_body
);
1169 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1170 vec_perm
, stmt_info
, 0, vect_body
);
1172 /* FIXME: If the misalignment remains fixed across the iterations of
1173 the containing loop, the following cost should be added to the
1175 if (targetm
.vectorize
.builtin_mask_for_load
)
1176 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1177 stmt_info
, 0, vect_body
);
1179 if (dump_enabled_p ())
1180 dump_printf_loc (MSG_NOTE
, vect_location
,
1181 "vect_model_load_cost: explicit realign\n");
1185 case dr_explicit_realign_optimized
:
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE
, vect_location
,
1189 "vect_model_load_cost: unaligned software "
1192 /* Unaligned software pipeline has a load of an address, an initial
1193 load, and possibly a mask operation to "prime" the loop. However,
1194 if this is an access in a group of loads, which provide grouped
1195 access, then the above cost should only be considered for one
1196 access in the group. Inside the loop, there is a load op
1197 and a realignment op. */
1199 if (add_realign_cost
&& record_prologue_costs
)
1201 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1202 vector_stmt
, stmt_info
,
1204 if (targetm
.vectorize
.builtin_mask_for_load
)
1205 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1206 vector_stmt
, stmt_info
,
1210 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1211 stmt_info
, 0, vect_body
);
1212 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1213 stmt_info
, 0, vect_body
);
1215 if (dump_enabled_p ())
1216 dump_printf_loc (MSG_NOTE
, vect_location
,
1217 "vect_model_load_cost: explicit realign optimized"
1223 case dr_unaligned_unsupported
:
1225 *inside_cost
= VECT_MAX_COST
;
1227 if (dump_enabled_p ())
1228 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1229 "vect_model_load_cost: unsupported access.\n");
1238 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1239 the loop preheader for the vectorized stmt STMT. */
1242 vect_init_vector_1 (gimple
*stmt
, gimple
*new_stmt
, gimple_stmt_iterator
*gsi
)
1245 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1248 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1249 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1253 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1257 if (nested_in_vect_loop_p (loop
, stmt
))
1260 pe
= loop_preheader_edge (loop
);
1261 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1262 gcc_assert (!new_bb
);
1266 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1268 gimple_stmt_iterator gsi_bb_start
;
1270 gcc_assert (bb_vinfo
);
1271 bb
= BB_VINFO_BB (bb_vinfo
);
1272 gsi_bb_start
= gsi_after_labels (bb
);
1273 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1277 if (dump_enabled_p ())
1279 dump_printf_loc (MSG_NOTE
, vect_location
,
1280 "created new init_stmt: ");
1281 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1285 /* Function vect_init_vector.
1287 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1288 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1289 vector type a vector with all elements equal to VAL is created first.
1290 Place the initialization at BSI if it is not NULL. Otherwise, place the
1291 initialization at the loop preheader.
1292 Return the DEF of INIT_STMT.
1293 It will be used in the vectorization of STMT. */
1296 vect_init_vector (gimple
*stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1301 if (TREE_CODE (type
) == VECTOR_TYPE
1302 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1304 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1306 /* Scalar boolean value should be transformed into
1307 all zeros or all ones value before building a vector. */
1308 if (VECTOR_BOOLEAN_TYPE_P (type
))
1310 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1311 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1313 if (CONSTANT_CLASS_P (val
))
1314 val
= integer_zerop (val
) ? false_val
: true_val
;
1317 new_temp
= make_ssa_name (TREE_TYPE (type
));
1318 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1319 val
, true_val
, false_val
);
1320 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1324 else if (CONSTANT_CLASS_P (val
))
1325 val
= fold_convert (TREE_TYPE (type
), val
);
1328 new_temp
= make_ssa_name (TREE_TYPE (type
));
1329 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1330 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1334 val
= build_vector_from_val (type
, val
);
1337 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1338 init_stmt
= gimple_build_assign (new_temp
, val
);
1339 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1344 /* Function vect_get_vec_def_for_operand.
1346 OP is an operand in STMT. This function returns a (vector) def that will be
1347 used in the vectorized stmt for STMT.
1349 In the case that OP is an SSA_NAME which is defined in the loop, then
1350 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1352 In case OP is an invariant or constant, a new stmt that creates a vector def
1353 needs to be introduced. VECTYPE may be used to specify a required type for
1354 vector invariant. */
1357 vect_get_vec_def_for_operand (tree op
, gimple
*stmt
, tree vectype
)
1362 stmt_vec_info def_stmt_info
= NULL
;
1363 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1364 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1365 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1366 enum vect_def_type dt
;
1370 if (dump_enabled_p ())
1372 dump_printf_loc (MSG_NOTE
, vect_location
,
1373 "vect_get_vec_def_for_operand: ");
1374 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1375 dump_printf (MSG_NOTE
, "\n");
1378 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &def_stmt
, &dt
);
1379 gcc_assert (is_simple_use
);
1380 if (dump_enabled_p ())
1382 int loc_printed
= 0;
1386 dump_printf (MSG_NOTE
, " def_stmt = ");
1388 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1389 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1395 /* operand is a constant or a loop invariant. */
1396 case vect_constant_def
:
1397 case vect_external_def
:
1400 vector_type
= vectype
;
1401 else if (TREE_CODE (TREE_TYPE (op
)) == BOOLEAN_TYPE
1402 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1403 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1405 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1407 gcc_assert (vector_type
);
1408 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1411 /* operand is defined inside the loop. */
1412 case vect_internal_def
:
1414 /* Get the def from the vectorized stmt. */
1415 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1417 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1418 /* Get vectorized pattern statement. */
1420 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1421 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1422 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1423 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1424 gcc_assert (vec_stmt
);
1425 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1426 vec_oprnd
= PHI_RESULT (vec_stmt
);
1427 else if (is_gimple_call (vec_stmt
))
1428 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1430 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1434 /* operand is defined by a loop header phi - reduction */
1435 case vect_reduction_def
:
1436 case vect_double_reduction_def
:
1437 case vect_nested_cycle
:
1438 /* Code should use get_initial_def_for_reduction. */
1441 /* operand is defined by loop-header phi - induction. */
1442 case vect_induction_def
:
1444 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1446 /* Get the def from the vectorized stmt. */
1447 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1448 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1449 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1450 vec_oprnd
= PHI_RESULT (vec_stmt
);
1452 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1462 /* Function vect_get_vec_def_for_stmt_copy
1464 Return a vector-def for an operand. This function is used when the
1465 vectorized stmt to be created (by the caller to this function) is a "copy"
1466 created in case the vectorized result cannot fit in one vector, and several
1467 copies of the vector-stmt are required. In this case the vector-def is
1468 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1469 of the stmt that defines VEC_OPRND.
1470 DT is the type of the vector def VEC_OPRND.
1473 In case the vectorization factor (VF) is bigger than the number
1474 of elements that can fit in a vectype (nunits), we have to generate
1475 more than one vector stmt to vectorize the scalar stmt. This situation
1476 arises when there are multiple data-types operated upon in the loop; the
1477 smallest data-type determines the VF, and as a result, when vectorizing
1478 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1479 vector stmt (each computing a vector of 'nunits' results, and together
1480 computing 'VF' results in each iteration). This function is called when
1481 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1482 which VF=16 and nunits=4, so the number of copies required is 4):
1484 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1486 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1487 VS1.1: vx.1 = memref1 VS1.2
1488 VS1.2: vx.2 = memref2 VS1.3
1489 VS1.3: vx.3 = memref3
1491 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1492 VSnew.1: vz1 = vx.1 + ... VSnew.2
1493 VSnew.2: vz2 = vx.2 + ... VSnew.3
1494 VSnew.3: vz3 = vx.3 + ...
1496 The vectorization of S1 is explained in vectorizable_load.
1497 The vectorization of S2:
1498 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1499 the function 'vect_get_vec_def_for_operand' is called to
1500 get the relevant vector-def for each operand of S2. For operand x it
1501 returns the vector-def 'vx.0'.
1503 To create the remaining copies of the vector-stmt (VSnew.j), this
1504 function is called to get the relevant vector-def for each operand. It is
1505 obtained from the respective VS1.j stmt, which is recorded in the
1506 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1508 For example, to obtain the vector-def 'vx.1' in order to create the
1509 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1510 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1511 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1512 and return its def ('vx.1').
1513 Overall, to create the above sequence this function will be called 3 times:
1514 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1515 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1516 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1519 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1521 gimple
*vec_stmt_for_operand
;
1522 stmt_vec_info def_stmt_info
;
1524 /* Do nothing; can reuse same def. */
1525 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1528 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1529 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1530 gcc_assert (def_stmt_info
);
1531 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1532 gcc_assert (vec_stmt_for_operand
);
1533 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1534 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1536 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1541 /* Get vectorized definitions for the operands to create a copy of an original
1542 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1545 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1546 vec
<tree
> *vec_oprnds0
,
1547 vec
<tree
> *vec_oprnds1
)
1549 tree vec_oprnd
= vec_oprnds0
->pop ();
1551 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1552 vec_oprnds0
->quick_push (vec_oprnd
);
1554 if (vec_oprnds1
&& vec_oprnds1
->length ())
1556 vec_oprnd
= vec_oprnds1
->pop ();
1557 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1558 vec_oprnds1
->quick_push (vec_oprnd
);
1563 /* Get vectorized definitions for OP0 and OP1.
1564 REDUC_INDEX is the index of reduction operand in case of reduction,
1565 and -1 otherwise. */
1568 vect_get_vec_defs (tree op0
, tree op1
, gimple
*stmt
,
1569 vec
<tree
> *vec_oprnds0
,
1570 vec
<tree
> *vec_oprnds1
,
1571 slp_tree slp_node
, int reduc_index
)
1575 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1576 auto_vec
<tree
> ops (nops
);
1577 auto_vec
<vec
<tree
> > vec_defs (nops
);
1579 ops
.quick_push (op0
);
1581 ops
.quick_push (op1
);
1583 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1585 *vec_oprnds0
= vec_defs
[0];
1587 *vec_oprnds1
= vec_defs
[1];
1593 vec_oprnds0
->create (1);
1594 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
);
1595 vec_oprnds0
->quick_push (vec_oprnd
);
1599 vec_oprnds1
->create (1);
1600 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
);
1601 vec_oprnds1
->quick_push (vec_oprnd
);
1607 /* Function vect_finish_stmt_generation.
1609 Insert a new stmt. */
1612 vect_finish_stmt_generation (gimple
*stmt
, gimple
*vec_stmt
,
1613 gimple_stmt_iterator
*gsi
)
1615 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1616 vec_info
*vinfo
= stmt_info
->vinfo
;
1618 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1620 if (!gsi_end_p (*gsi
)
1621 && gimple_has_mem_ops (vec_stmt
))
1623 gimple
*at_stmt
= gsi_stmt (*gsi
);
1624 tree vuse
= gimple_vuse (at_stmt
);
1625 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1627 tree vdef
= gimple_vdef (at_stmt
);
1628 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1629 /* If we have an SSA vuse and insert a store, update virtual
1630 SSA form to avoid triggering the renamer. Do so only
1631 if we can easily see all uses - which is what almost always
1632 happens with the way vectorized stmts are inserted. */
1633 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1634 && ((is_gimple_assign (vec_stmt
)
1635 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1636 || (is_gimple_call (vec_stmt
)
1637 && !(gimple_call_flags (vec_stmt
)
1638 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1640 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1641 gimple_set_vdef (vec_stmt
, new_vdef
);
1642 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1646 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1648 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, vinfo
));
1650 if (dump_enabled_p ())
1652 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1653 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1656 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1658 /* While EH edges will generally prevent vectorization, stmt might
1659 e.g. be in a must-not-throw region. Ensure newly created stmts
1660 that could throw are part of the same region. */
1661 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1662 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1663 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1666 /* We want to vectorize a call to combined function CFN with function
1667 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1668 as the types of all inputs. Check whether this is possible using
1669 an internal function, returning its code if so or IFN_LAST if not. */
1672 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1673 tree vectype_out
, tree vectype_in
)
1676 if (internal_fn_p (cfn
))
1677 ifn
= as_internal_fn (cfn
);
1679 ifn
= associated_internal_fn (fndecl
);
1680 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1682 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1683 if (info
.vectorizable
)
1685 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1686 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1687 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1688 OPTIMIZE_FOR_SPEED
))
1696 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
*,
1697 gimple_stmt_iterator
*);
1700 /* Function vectorizable_mask_load_store.
1702 Check if STMT performs a conditional load or store that can be vectorized.
1703 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1704 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1705 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1708 vectorizable_mask_load_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
1709 gimple
**vec_stmt
, slp_tree slp_node
)
1711 tree vec_dest
= NULL
;
1712 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1713 stmt_vec_info prev_stmt_info
;
1714 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1715 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1716 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1717 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1718 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1719 tree rhs_vectype
= NULL_TREE
;
1724 tree dataref_ptr
= NULL_TREE
;
1726 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1730 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1731 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1732 int gather_scale
= 1;
1733 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1737 enum vect_def_type dt
;
1739 if (slp_node
!= NULL
)
1742 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1743 gcc_assert (ncopies
>= 1);
1745 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1746 mask
= gimple_call_arg (stmt
, 2);
1748 if (TREE_CODE (TREE_TYPE (mask
)) != BOOLEAN_TYPE
)
1751 /* FORNOW. This restriction should be relaxed. */
1752 if (nested_in_vect_loop
&& ncopies
> 1)
1754 if (dump_enabled_p ())
1755 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1756 "multiple types in nested loop.");
1760 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1763 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
1767 if (!STMT_VINFO_DATA_REF (stmt_info
))
1770 elem_type
= TREE_TYPE (vectype
);
1772 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1775 if (STMT_VINFO_STRIDED_P (stmt_info
))
1778 if (TREE_CODE (mask
) != SSA_NAME
)
1781 if (!vect_is_simple_use (mask
, loop_vinfo
, &def_stmt
, &dt
, &mask_vectype
))
1785 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
1787 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
1792 tree rhs
= gimple_call_arg (stmt
, 3);
1793 if (!vect_is_simple_use (rhs
, loop_vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
1797 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1800 gather_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &gather_base
,
1801 &gather_off
, &gather_scale
);
1802 gcc_assert (gather_decl
);
1803 if (!vect_is_simple_use (gather_off
, loop_vinfo
, &def_stmt
, &gather_dt
,
1804 &gather_off_vectype
))
1806 if (dump_enabled_p ())
1807 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1808 "gather index use not simple.");
1812 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1814 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1815 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1817 if (dump_enabled_p ())
1818 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1819 "masked gather with integer mask not supported.");
1823 else if (tree_int_cst_compare (nested_in_vect_loop
1824 ? STMT_VINFO_DR_STEP (stmt_info
)
1825 : DR_STEP (dr
), size_zero_node
) <= 0)
1827 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1828 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
),
1829 TYPE_MODE (mask_vectype
),
1832 && !useless_type_conversion_p (vectype
, rhs_vectype
)))
1835 if (!vec_stmt
) /* transformation not required. */
1837 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1839 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1842 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1848 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
1850 tree vec_oprnd0
= NULL_TREE
, op
;
1851 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1852 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1853 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1854 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1855 tree mask_perm_mask
= NULL_TREE
;
1856 edge pe
= loop_preheader_edge (loop
);
1859 enum { NARROW
, NONE
, WIDEN
} modifier
;
1860 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1862 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1863 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1864 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1865 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1866 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1867 scaletype
= TREE_VALUE (arglist
);
1868 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1869 && types_compatible_p (srctype
, masktype
));
1871 if (nunits
== gather_off_nunits
)
1873 else if (nunits
== gather_off_nunits
/ 2)
1875 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1878 for (i
= 0; i
< gather_off_nunits
; ++i
)
1879 sel
[i
] = i
| nunits
;
1881 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
1883 else if (nunits
== gather_off_nunits
* 2)
1885 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1888 for (i
= 0; i
< nunits
; ++i
)
1889 sel
[i
] = i
< gather_off_nunits
1890 ? i
: i
+ nunits
- gather_off_nunits
;
1892 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
1894 for (i
= 0; i
< nunits
; ++i
)
1895 sel
[i
] = i
| gather_off_nunits
;
1896 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
1901 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1903 ptr
= fold_convert (ptrtype
, gather_base
);
1904 if (!is_gimple_min_invariant (ptr
))
1906 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1907 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1908 gcc_assert (!new_bb
);
1911 scale
= build_int_cst (scaletype
, gather_scale
);
1913 prev_stmt_info
= NULL
;
1914 for (j
= 0; j
< ncopies
; ++j
)
1916 if (modifier
== WIDEN
&& (j
& 1))
1917 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1918 perm_mask
, stmt
, gsi
);
1921 = vect_get_vec_def_for_operand (gather_off
, stmt
);
1924 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1926 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1928 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1929 == TYPE_VECTOR_SUBPARTS (idxtype
));
1930 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
1931 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1933 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1934 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1938 if (mask_perm_mask
&& (j
& 1))
1939 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1940 mask_perm_mask
, stmt
, gsi
);
1944 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
1947 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
1948 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
1952 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
1954 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
1955 == TYPE_VECTOR_SUBPARTS (masktype
));
1956 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
1957 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
1959 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
1960 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1966 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
1969 if (!useless_type_conversion_p (vectype
, rettype
))
1971 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
1972 == TYPE_VECTOR_SUBPARTS (rettype
));
1973 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
1974 gimple_call_set_lhs (new_stmt
, op
);
1975 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1976 var
= make_ssa_name (vec_dest
);
1977 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
1978 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1982 var
= make_ssa_name (vec_dest
, new_stmt
);
1983 gimple_call_set_lhs (new_stmt
, var
);
1986 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1988 if (modifier
== NARROW
)
1995 var
= permute_vec_elements (prev_res
, var
,
1996 perm_mask
, stmt
, gsi
);
1997 new_stmt
= SSA_NAME_DEF_STMT (var
);
2000 if (prev_stmt_info
== NULL
)
2001 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2003 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2004 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2007 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2009 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2011 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2012 stmt_info
= vinfo_for_stmt (stmt
);
2014 tree lhs
= gimple_call_lhs (stmt
);
2015 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2016 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2017 set_vinfo_for_stmt (stmt
, NULL
);
2018 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2019 gsi_replace (gsi
, new_stmt
, true);
2024 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2025 prev_stmt_info
= NULL
;
2026 for (i
= 0; i
< ncopies
; i
++)
2028 unsigned align
, misalign
;
2032 tree rhs
= gimple_call_arg (stmt
, 3);
2033 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
);
2034 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2035 /* We should have catched mismatched types earlier. */
2036 gcc_assert (useless_type_conversion_p (vectype
,
2037 TREE_TYPE (vec_rhs
)));
2038 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2039 NULL_TREE
, &dummy
, gsi
,
2040 &ptr_incr
, false, &inv_p
);
2041 gcc_assert (!inv_p
);
2045 vect_is_simple_use (vec_rhs
, loop_vinfo
, &def_stmt
, &dt
);
2046 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2047 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2048 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2049 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2050 TYPE_SIZE_UNIT (vectype
));
2053 align
= TYPE_ALIGN_UNIT (vectype
);
2054 if (aligned_access_p (dr
))
2056 else if (DR_MISALIGNMENT (dr
) == -1)
2058 align
= TYPE_ALIGN_UNIT (elem_type
);
2062 misalign
= DR_MISALIGNMENT (dr
);
2063 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2065 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2066 misalign
? misalign
& -misalign
: align
);
2068 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2069 ptr
, vec_mask
, vec_rhs
);
2070 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2072 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2074 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2075 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2080 tree vec_mask
= NULL_TREE
;
2081 prev_stmt_info
= NULL
;
2082 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2083 for (i
= 0; i
< ncopies
; i
++)
2085 unsigned align
, misalign
;
2089 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
);
2090 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2091 NULL_TREE
, &dummy
, gsi
,
2092 &ptr_incr
, false, &inv_p
);
2093 gcc_assert (!inv_p
);
2097 vect_is_simple_use (vec_mask
, loop_vinfo
, &def_stmt
, &dt
);
2098 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2099 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2100 TYPE_SIZE_UNIT (vectype
));
2103 align
= TYPE_ALIGN_UNIT (vectype
);
2104 if (aligned_access_p (dr
))
2106 else if (DR_MISALIGNMENT (dr
) == -1)
2108 align
= TYPE_ALIGN_UNIT (elem_type
);
2112 misalign
= DR_MISALIGNMENT (dr
);
2113 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2115 tree ptr
= build_int_cst (TREE_TYPE (gimple_call_arg (stmt
, 1)),
2116 misalign
? misalign
& -misalign
: align
);
2118 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2120 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2121 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2123 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2125 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2126 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2132 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2134 if (STMT_VINFO_RELATED_STMT (stmt_info
))
2136 stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2137 stmt_info
= vinfo_for_stmt (stmt
);
2139 tree lhs
= gimple_call_lhs (stmt
);
2140 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2141 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2142 set_vinfo_for_stmt (stmt
, NULL
);
2143 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2144 gsi_replace (gsi
, new_stmt
, true);
2150 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2151 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2152 in a single step. On success, store the binary pack code in
2156 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
2157 tree_code
*convert_code
)
2159 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
2160 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
2164 int multi_step_cvt
= 0;
2165 auto_vec
<tree
, 8> interm_types
;
2166 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
2167 &code
, &multi_step_cvt
,
2172 *convert_code
= code
;
2176 /* Function vectorizable_call.
2178 Check if GS performs a function call that can be vectorized.
2179 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2180 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2181 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2184 vectorizable_call (gimple
*gs
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
2191 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2192 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2193 tree vectype_out
, vectype_in
;
2196 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2197 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2198 vec_info
*vinfo
= stmt_info
->vinfo
;
2199 tree fndecl
, new_temp
, rhs_type
;
2201 enum vect_def_type dt
[3]
2202 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2203 gimple
*new_stmt
= NULL
;
2205 vec
<tree
> vargs
= vNULL
;
2206 enum { NARROW
, NONE
, WIDEN
} modifier
;
2210 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2213 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2217 /* Is GS a vectorizable call? */
2218 stmt
= dyn_cast
<gcall
*> (gs
);
2222 if (gimple_call_internal_p (stmt
)
2223 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2224 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2225 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2228 if (gimple_call_lhs (stmt
) == NULL_TREE
2229 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2232 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2234 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2236 /* Process function arguments. */
2237 rhs_type
= NULL_TREE
;
2238 vectype_in
= NULL_TREE
;
2239 nargs
= gimple_call_num_args (stmt
);
2241 /* Bail out if the function has more than three arguments, we do not have
2242 interesting builtin functions to vectorize with more than two arguments
2243 except for fma. No arguments is also not good. */
2244 if (nargs
== 0 || nargs
> 3)
2247 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2248 if (gimple_call_internal_p (stmt
)
2249 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2252 rhs_type
= unsigned_type_node
;
2255 for (i
= 0; i
< nargs
; i
++)
2259 op
= gimple_call_arg (stmt
, i
);
2261 /* We can only handle calls with arguments of the same type. */
2263 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2265 if (dump_enabled_p ())
2266 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2267 "argument types differ.\n");
2271 rhs_type
= TREE_TYPE (op
);
2273 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[i
], &opvectype
))
2275 if (dump_enabled_p ())
2276 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2277 "use not simple.\n");
2282 vectype_in
= opvectype
;
2284 && opvectype
!= vectype_in
)
2286 if (dump_enabled_p ())
2287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2288 "argument vector types differ.\n");
2292 /* If all arguments are external or constant defs use a vector type with
2293 the same size as the output vector type. */
2295 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2297 gcc_assert (vectype_in
);
2300 if (dump_enabled_p ())
2302 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2303 "no vectype for scalar type ");
2304 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2305 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2312 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2313 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2314 if (nunits_in
== nunits_out
/ 2)
2316 else if (nunits_out
== nunits_in
)
2318 else if (nunits_out
== nunits_in
/ 2)
2323 /* We only handle functions that do not read or clobber memory. */
2324 if (gimple_vuse (stmt
))
2326 if (dump_enabled_p ())
2327 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2328 "function reads from or writes to memory.\n");
2332 /* For now, we only vectorize functions if a target specific builtin
2333 is available. TODO -- in some cases, it might be profitable to
2334 insert the calls for pieces of the vector, in order to be able
2335 to vectorize other operations in the loop. */
2337 internal_fn ifn
= IFN_LAST
;
2338 combined_fn cfn
= gimple_call_combined_fn (stmt
);
2339 tree callee
= gimple_call_fndecl (stmt
);
2341 /* First try using an internal function. */
2342 tree_code convert_code
= ERROR_MARK
;
2344 && (modifier
== NONE
2345 || (modifier
== NARROW
2346 && simple_integer_narrowing (vectype_out
, vectype_in
,
2348 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
2351 /* If that fails, try asking for a target-specific built-in function. */
2352 if (ifn
== IFN_LAST
)
2354 if (cfn
!= CFN_LAST
)
2355 fndecl
= targetm
.vectorize
.builtin_vectorized_function
2356 (cfn
, vectype_out
, vectype_in
);
2358 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
2359 (callee
, vectype_out
, vectype_in
);
2362 if (ifn
== IFN_LAST
&& !fndecl
)
2364 if (cfn
== CFN_GOMP_SIMD_LANE
2367 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2368 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2369 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2370 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2372 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2373 { 0, 1, 2, ... vf - 1 } vector. */
2374 gcc_assert (nargs
== 0);
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2380 "function is not vectorizable.\n");
2385 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2387 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
2388 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2390 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2392 /* Sanity check: make sure that at least one copy of the vectorized stmt
2393 needs to be generated. */
2394 gcc_assert (ncopies
>= 1);
2396 if (!vec_stmt
) /* transformation not required. */
2398 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2399 if (dump_enabled_p ())
2400 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2402 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2403 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
2404 add_stmt_cost (stmt_info
->vinfo
->target_cost_data
, ncopies
/ 2,
2405 vec_promote_demote
, stmt_info
, 0, vect_body
);
2412 if (dump_enabled_p ())
2413 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2416 scalar_dest
= gimple_call_lhs (stmt
);
2417 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2419 prev_stmt_info
= NULL
;
2420 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
2422 tree prev_res
= NULL_TREE
;
2423 for (j
= 0; j
< ncopies
; ++j
)
2425 /* Build argument list for the vectorized call. */
2427 vargs
.create (nargs
);
2433 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2434 vec
<tree
> vec_oprnds0
;
2436 for (i
= 0; i
< nargs
; i
++)
2437 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2438 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2439 vec_oprnds0
= vec_defs
[0];
2441 /* Arguments are ready. Create the new vector stmt. */
2442 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2445 for (k
= 0; k
< nargs
; k
++)
2447 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2448 vargs
[k
] = vec_oprndsk
[i
];
2450 if (modifier
== NARROW
)
2452 tree half_res
= make_ssa_name (vectype_in
);
2453 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2454 gimple_call_set_lhs (new_stmt
, half_res
);
2455 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2458 prev_res
= half_res
;
2461 new_temp
= make_ssa_name (vec_dest
);
2462 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2463 prev_res
, half_res
);
2467 if (ifn
!= IFN_LAST
)
2468 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2470 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2471 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2472 gimple_call_set_lhs (new_stmt
, new_temp
);
2474 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2475 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2478 for (i
= 0; i
< nargs
; i
++)
2480 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2481 vec_oprndsi
.release ();
2486 for (i
= 0; i
< nargs
; i
++)
2488 op
= gimple_call_arg (stmt
, i
);
2491 = vect_get_vec_def_for_operand (op
, stmt
);
2494 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2496 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2499 vargs
.quick_push (vec_oprnd0
);
2502 if (gimple_call_internal_p (stmt
)
2503 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2505 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2507 for (k
= 0; k
< nunits_out
; ++k
)
2508 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2509 tree cst
= build_vector (vectype_out
, v
);
2511 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
2512 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
2513 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2514 new_temp
= make_ssa_name (vec_dest
);
2515 new_stmt
= gimple_build_assign (new_temp
, new_var
);
2517 else if (modifier
== NARROW
)
2519 tree half_res
= make_ssa_name (vectype_in
);
2520 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2521 gimple_call_set_lhs (new_stmt
, half_res
);
2522 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2525 prev_res
= half_res
;
2528 new_temp
= make_ssa_name (vec_dest
);
2529 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
2530 prev_res
, half_res
);
2534 if (ifn
!= IFN_LAST
)
2535 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2537 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2538 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2539 gimple_call_set_lhs (new_stmt
, new_temp
);
2541 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2543 if (j
== (modifier
== NARROW
? 1 : 0))
2544 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2546 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2548 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2551 else if (modifier
== NARROW
)
2553 for (j
= 0; j
< ncopies
; ++j
)
2555 /* Build argument list for the vectorized call. */
2557 vargs
.create (nargs
* 2);
2563 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2564 vec
<tree
> vec_oprnds0
;
2566 for (i
= 0; i
< nargs
; i
++)
2567 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2568 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2569 vec_oprnds0
= vec_defs
[0];
2571 /* Arguments are ready. Create the new vector stmt. */
2572 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2576 for (k
= 0; k
< nargs
; k
++)
2578 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2579 vargs
.quick_push (vec_oprndsk
[i
]);
2580 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2582 if (ifn
!= IFN_LAST
)
2583 new_stmt
= gimple_build_call_internal_vec (ifn
, vargs
);
2585 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2586 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2587 gimple_call_set_lhs (new_stmt
, new_temp
);
2588 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2589 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2592 for (i
= 0; i
< nargs
; i
++)
2594 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2595 vec_oprndsi
.release ();
2600 for (i
= 0; i
< nargs
; i
++)
2602 op
= gimple_call_arg (stmt
, i
);
2606 = vect_get_vec_def_for_operand (op
, stmt
);
2608 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2612 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2614 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2616 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2619 vargs
.quick_push (vec_oprnd0
);
2620 vargs
.quick_push (vec_oprnd1
);
2623 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2624 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2625 gimple_call_set_lhs (new_stmt
, new_temp
);
2626 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2629 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2631 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2633 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2636 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2639 /* No current target implements this case. */
2644 /* The call in STMT might prevent it from being removed in dce.
2645 We however cannot remove it here, due to the way the ssa name
2646 it defines is mapped to the new definition. So just replace
2647 rhs of the statement with something harmless. */
2652 type
= TREE_TYPE (scalar_dest
);
2653 if (is_pattern_stmt_p (stmt_info
))
2654 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2656 lhs
= gimple_call_lhs (stmt
);
2658 if (gimple_call_internal_p (stmt
)
2659 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2661 /* Replace uses of the lhs of GOMP_SIMD_LANE call outside the loop
2662 with vf - 1 rather than 0, that is the last iteration of the
2664 imm_use_iterator iter
;
2665 use_operand_p use_p
;
2667 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
2669 basic_block use_bb
= gimple_bb (use_stmt
);
2671 && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo
), use_bb
))
2673 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2674 SET_USE (use_p
, build_int_cst (TREE_TYPE (lhs
),
2675 ncopies
* nunits_out
- 1));
2676 update_stmt (use_stmt
);
2681 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2682 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2683 set_vinfo_for_stmt (stmt
, NULL
);
2684 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2685 gsi_replace (gsi
, new_stmt
, false);
2691 struct simd_call_arg_info
2695 enum vect_def_type dt
;
2696 HOST_WIDE_INT linear_step
;
2698 bool simd_lane_linear
;
2701 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2702 is linear within simd lane (but not within whole loop), note it in
2706 vect_simd_lane_linear (tree op
, struct loop
*loop
,
2707 struct simd_call_arg_info
*arginfo
)
2709 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
2711 if (!is_gimple_assign (def_stmt
)
2712 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
2713 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
2716 tree base
= gimple_assign_rhs1 (def_stmt
);
2717 HOST_WIDE_INT linear_step
= 0;
2718 tree v
= gimple_assign_rhs2 (def_stmt
);
2719 while (TREE_CODE (v
) == SSA_NAME
)
2722 def_stmt
= SSA_NAME_DEF_STMT (v
);
2723 if (is_gimple_assign (def_stmt
))
2724 switch (gimple_assign_rhs_code (def_stmt
))
2727 t
= gimple_assign_rhs2 (def_stmt
);
2728 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
2730 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
2731 v
= gimple_assign_rhs1 (def_stmt
);
2734 t
= gimple_assign_rhs2 (def_stmt
);
2735 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
2737 linear_step
= tree_to_shwi (t
);
2738 v
= gimple_assign_rhs1 (def_stmt
);
2741 t
= gimple_assign_rhs1 (def_stmt
);
2742 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
2743 || (TYPE_PRECISION (TREE_TYPE (v
))
2744 < TYPE_PRECISION (TREE_TYPE (t
))))
2753 else if (is_gimple_call (def_stmt
)
2754 && gimple_call_internal_p (def_stmt
)
2755 && gimple_call_internal_fn (def_stmt
) == IFN_GOMP_SIMD_LANE
2757 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
2758 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
2763 arginfo
->linear_step
= linear_step
;
2765 arginfo
->simd_lane_linear
= true;
2771 /* Function vectorizable_simd_clone_call.
2773 Check if STMT performs a function call that can be vectorized
2774 by calling a simd clone of the function.
2775 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2776 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2777 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2780 vectorizable_simd_clone_call (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
2781 gimple
**vec_stmt
, slp_tree slp_node
)
2786 tree vec_oprnd0
= NULL_TREE
;
2787 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2789 unsigned int nunits
;
2790 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2791 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2792 vec_info
*vinfo
= stmt_info
->vinfo
;
2793 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2794 tree fndecl
, new_temp
;
2796 gimple
*new_stmt
= NULL
;
2798 vec
<simd_call_arg_info
> arginfo
= vNULL
;
2799 vec
<tree
> vargs
= vNULL
;
2801 tree lhs
, rtype
, ratype
;
2802 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2804 /* Is STMT a vectorizable call? */
2805 if (!is_gimple_call (stmt
))
2808 fndecl
= gimple_call_fndecl (stmt
);
2809 if (fndecl
== NULL_TREE
)
2812 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2813 if (node
== NULL
|| node
->simd_clones
== NULL
)
2816 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2819 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
2823 if (gimple_call_lhs (stmt
)
2824 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2827 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2829 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2831 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2835 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2838 /* Process function arguments. */
2839 nargs
= gimple_call_num_args (stmt
);
2841 /* Bail out if the function has zero arguments. */
2845 arginfo
.create (nargs
);
2847 for (i
= 0; i
< nargs
; i
++)
2849 simd_call_arg_info thisarginfo
;
2852 thisarginfo
.linear_step
= 0;
2853 thisarginfo
.align
= 0;
2854 thisarginfo
.op
= NULL_TREE
;
2855 thisarginfo
.simd_lane_linear
= false;
2857 op
= gimple_call_arg (stmt
, i
);
2858 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &thisarginfo
.dt
,
2859 &thisarginfo
.vectype
)
2860 || thisarginfo
.dt
== vect_uninitialized_def
)
2862 if (dump_enabled_p ())
2863 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2864 "use not simple.\n");
2869 if (thisarginfo
.dt
== vect_constant_def
2870 || thisarginfo
.dt
== vect_external_def
)
2871 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2873 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2875 /* For linear arguments, the analyze phase should have saved
2876 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2877 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
2878 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
2880 gcc_assert (vec_stmt
);
2881 thisarginfo
.linear_step
2882 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
2884 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
2885 thisarginfo
.simd_lane_linear
2886 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
2887 == boolean_true_node
);
2888 /* If loop has been peeled for alignment, we need to adjust it. */
2889 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
2890 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
2891 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
2893 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
2894 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
2895 tree opt
= TREE_TYPE (thisarginfo
.op
);
2896 bias
= fold_convert (TREE_TYPE (step
), bias
);
2897 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
2899 = fold_build2 (POINTER_TYPE_P (opt
)
2900 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
2901 thisarginfo
.op
, bias
);
2905 && thisarginfo
.dt
!= vect_constant_def
2906 && thisarginfo
.dt
!= vect_external_def
2908 && TREE_CODE (op
) == SSA_NAME
2909 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
2911 && tree_fits_shwi_p (iv
.step
))
2913 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2914 thisarginfo
.op
= iv
.base
;
2916 else if ((thisarginfo
.dt
== vect_constant_def
2917 || thisarginfo
.dt
== vect_external_def
)
2918 && POINTER_TYPE_P (TREE_TYPE (op
)))
2919 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2920 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2922 if (POINTER_TYPE_P (TREE_TYPE (op
))
2923 && !thisarginfo
.linear_step
2925 && thisarginfo
.dt
!= vect_constant_def
2926 && thisarginfo
.dt
!= vect_external_def
2929 && TREE_CODE (op
) == SSA_NAME
)
2930 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
2932 arginfo
.quick_push (thisarginfo
);
2935 unsigned int badness
= 0;
2936 struct cgraph_node
*bestn
= NULL
;
2937 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
2938 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
2940 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2941 n
= n
->simdclone
->next_clone
)
2943 unsigned int this_badness
= 0;
2944 if (n
->simdclone
->simdlen
2945 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2946 || n
->simdclone
->nargs
!= nargs
)
2948 if (n
->simdclone
->simdlen
2949 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2950 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2951 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2952 if (n
->simdclone
->inbranch
)
2953 this_badness
+= 2048;
2954 int target_badness
= targetm
.simd_clone
.usable (n
);
2955 if (target_badness
< 0)
2957 this_badness
+= target_badness
* 512;
2958 /* FORNOW: Have to add code to add the mask argument. */
2959 if (n
->simdclone
->inbranch
)
2961 for (i
= 0; i
< nargs
; i
++)
2963 switch (n
->simdclone
->args
[i
].arg_type
)
2965 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2966 if (!useless_type_conversion_p
2967 (n
->simdclone
->args
[i
].orig_type
,
2968 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2970 else if (arginfo
[i
].dt
== vect_constant_def
2971 || arginfo
[i
].dt
== vect_external_def
2972 || arginfo
[i
].linear_step
)
2975 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2976 if (arginfo
[i
].dt
!= vect_constant_def
2977 && arginfo
[i
].dt
!= vect_external_def
)
2980 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2981 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
2982 if (arginfo
[i
].dt
== vect_constant_def
2983 || arginfo
[i
].dt
== vect_external_def
2984 || (arginfo
[i
].linear_step
2985 != n
->simdclone
->args
[i
].linear_step
))
2988 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2989 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
2990 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
2991 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
2992 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
2993 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
2997 case SIMD_CLONE_ARG_TYPE_MASK
:
3000 if (i
== (size_t) -1)
3002 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3007 if (arginfo
[i
].align
)
3008 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3009 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3011 if (i
== (size_t) -1)
3013 if (bestn
== NULL
|| this_badness
< badness
)
3016 badness
= this_badness
;
3026 for (i
= 0; i
< nargs
; i
++)
3027 if ((arginfo
[i
].dt
== vect_constant_def
3028 || arginfo
[i
].dt
== vect_external_def
)
3029 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3032 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3034 if (arginfo
[i
].vectype
== NULL
3035 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3036 > bestn
->simdclone
->simdlen
))
3043 fndecl
= bestn
->decl
;
3044 nunits
= bestn
->simdclone
->simdlen
;
3045 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3047 /* If the function isn't const, only allow it in simd loops where user
3048 has asserted that at least nunits consecutive iterations can be
3049 performed using SIMD instructions. */
3050 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3051 && gimple_vuse (stmt
))
3057 /* Sanity check: make sure that at least one copy of the vectorized stmt
3058 needs to be generated. */
3059 gcc_assert (ncopies
>= 1);
3061 if (!vec_stmt
) /* transformation not required. */
3063 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3064 for (i
= 0; i
< nargs
; i
++)
3065 if (bestn
->simdclone
->args
[i
].arg_type
3066 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3068 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3070 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3071 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3072 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3073 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3074 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3075 tree sll
= arginfo
[i
].simd_lane_linear
3076 ? boolean_true_node
: boolean_false_node
;
3077 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3079 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3080 if (dump_enabled_p ())
3081 dump_printf_loc (MSG_NOTE
, vect_location
,
3082 "=== vectorizable_simd_clone_call ===\n");
3083 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3090 if (dump_enabled_p ())
3091 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3094 scalar_dest
= gimple_call_lhs (stmt
);
3095 vec_dest
= NULL_TREE
;
3100 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3101 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
3102 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
3105 rtype
= TREE_TYPE (ratype
);
3109 prev_stmt_info
= NULL
;
3110 for (j
= 0; j
< ncopies
; ++j
)
3112 /* Build argument list for the vectorized call. */
3114 vargs
.create (nargs
);
3118 for (i
= 0; i
< nargs
; i
++)
3120 unsigned int k
, l
, m
, o
;
3122 op
= gimple_call_arg (stmt
, i
);
3123 switch (bestn
->simdclone
->args
[i
].arg_type
)
3125 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3126 atype
= bestn
->simdclone
->args
[i
].vector_type
;
3127 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
3128 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
3130 if (TYPE_VECTOR_SUBPARTS (atype
)
3131 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
3133 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
3134 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
3135 / TYPE_VECTOR_SUBPARTS (atype
));
3136 gcc_assert ((k
& (k
- 1)) == 0);
3139 = vect_get_vec_def_for_operand (op
, stmt
);
3142 vec_oprnd0
= arginfo
[i
].op
;
3143 if ((m
& (k
- 1)) == 0)
3145 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3148 arginfo
[i
].op
= vec_oprnd0
;
3150 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
3152 bitsize_int ((m
& (k
- 1)) * prec
));
3154 = gimple_build_assign (make_ssa_name (atype
),
3156 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3157 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3161 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3162 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3163 gcc_assert ((k
& (k
- 1)) == 0);
3164 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3166 vec_alloc (ctor_elts
, k
);
3169 for (l
= 0; l
< k
; l
++)
3171 if (m
== 0 && l
== 0)
3173 = vect_get_vec_def_for_operand (op
, stmt
);
3176 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3178 arginfo
[i
].op
= vec_oprnd0
;
3181 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3185 vargs
.safe_push (vec_oprnd0
);
3188 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3190 = gimple_build_assign (make_ssa_name (atype
),
3192 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3193 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3198 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3199 vargs
.safe_push (op
);
3201 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3206 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3211 edge pe
= loop_preheader_edge (loop
);
3212 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3213 gcc_assert (!new_bb
);
3215 if (arginfo
[i
].simd_lane_linear
)
3217 vargs
.safe_push (arginfo
[i
].op
);
3220 tree phi_res
= copy_ssa_name (op
);
3221 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3222 set_vinfo_for_stmt (new_phi
,
3223 new_stmt_vec_info (new_phi
, loop_vinfo
));
3224 add_phi_arg (new_phi
, arginfo
[i
].op
,
3225 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3227 = POINTER_TYPE_P (TREE_TYPE (op
))
3228 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3229 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3230 ? sizetype
: TREE_TYPE (op
);
3232 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3234 tree tcst
= wide_int_to_tree (type
, cst
);
3235 tree phi_arg
= copy_ssa_name (op
);
3237 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3238 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3239 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3240 set_vinfo_for_stmt (new_stmt
,
3241 new_stmt_vec_info (new_stmt
, loop_vinfo
));
3242 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3244 arginfo
[i
].op
= phi_res
;
3245 vargs
.safe_push (phi_res
);
3250 = POINTER_TYPE_P (TREE_TYPE (op
))
3251 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3252 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3253 ? sizetype
: TREE_TYPE (op
);
3255 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3257 tree tcst
= wide_int_to_tree (type
, cst
);
3258 new_temp
= make_ssa_name (TREE_TYPE (op
));
3259 new_stmt
= gimple_build_assign (new_temp
, code
,
3260 arginfo
[i
].op
, tcst
);
3261 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3262 vargs
.safe_push (new_temp
);
3265 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3266 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3267 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3268 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3274 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3277 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3279 new_temp
= create_tmp_var (ratype
);
3280 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3281 == TYPE_VECTOR_SUBPARTS (rtype
))
3282 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3284 new_temp
= make_ssa_name (rtype
, new_stmt
);
3285 gimple_call_set_lhs (new_stmt
, new_temp
);
3287 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3291 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3294 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3295 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3296 gcc_assert ((k
& (k
- 1)) == 0);
3297 for (l
= 0; l
< k
; l
++)
3302 t
= build_fold_addr_expr (new_temp
);
3303 t
= build2 (MEM_REF
, vectype
, t
,
3304 build_int_cst (TREE_TYPE (t
),
3305 l
* prec
/ BITS_PER_UNIT
));
3308 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3309 size_int (prec
), bitsize_int (l
* prec
));
3311 = gimple_build_assign (make_ssa_name (vectype
), t
);
3312 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3313 if (j
== 0 && l
== 0)
3314 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3316 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3318 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3323 tree clobber
= build_constructor (ratype
, NULL
);
3324 TREE_THIS_VOLATILE (clobber
) = 1;
3325 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3326 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3330 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3332 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3333 / TYPE_VECTOR_SUBPARTS (rtype
));
3334 gcc_assert ((k
& (k
- 1)) == 0);
3335 if ((j
& (k
- 1)) == 0)
3336 vec_alloc (ret_ctor_elts
, k
);
3339 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3340 for (m
= 0; m
< o
; m
++)
3342 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3343 size_int (m
), NULL_TREE
, NULL_TREE
);
3345 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3346 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3347 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3348 gimple_assign_lhs (new_stmt
));
3350 tree clobber
= build_constructor (ratype
, NULL
);
3351 TREE_THIS_VOLATILE (clobber
) = 1;
3352 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3353 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3356 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3357 if ((j
& (k
- 1)) != k
- 1)
3359 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3361 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3362 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3364 if ((unsigned) j
== k
- 1)
3365 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3367 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3369 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3374 tree t
= build_fold_addr_expr (new_temp
);
3375 t
= build2 (MEM_REF
, vectype
, t
,
3376 build_int_cst (TREE_TYPE (t
), 0));
3378 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3379 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3380 tree clobber
= build_constructor (ratype
, NULL
);
3381 TREE_THIS_VOLATILE (clobber
) = 1;
3382 vect_finish_stmt_generation (stmt
,
3383 gimple_build_assign (new_temp
,
3389 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3391 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3393 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3398 /* The call in STMT might prevent it from being removed in dce.
3399 We however cannot remove it here, due to the way the ssa name
3400 it defines is mapped to the new definition. So just replace
3401 rhs of the statement with something harmless. */
3408 type
= TREE_TYPE (scalar_dest
);
3409 if (is_pattern_stmt_p (stmt_info
))
3410 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3412 lhs
= gimple_call_lhs (stmt
);
3413 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3416 new_stmt
= gimple_build_nop ();
3417 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3418 set_vinfo_for_stmt (stmt
, NULL
);
3419 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3420 gsi_replace (gsi
, new_stmt
, true);
3421 unlink_stmt_vdef (stmt
);
3427 /* Function vect_gen_widened_results_half
3429 Create a vector stmt whose code, type, number of arguments, and result
3430 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3431 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3432 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3433 needs to be created (DECL is a function-decl of a target-builtin).
3434 STMT is the original scalar stmt that we are vectorizing. */
3437 vect_gen_widened_results_half (enum tree_code code
,
3439 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3440 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3446 /* Generate half of the widened result: */
3447 if (code
== CALL_EXPR
)
3449 /* Target specific support */
3450 if (op_type
== binary_op
)
3451 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3453 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3454 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3455 gimple_call_set_lhs (new_stmt
, new_temp
);
3459 /* Generic support */
3460 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3461 if (op_type
!= binary_op
)
3463 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3464 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3465 gimple_assign_set_lhs (new_stmt
, new_temp
);
3467 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3473 /* Get vectorized definitions for loop-based vectorization. For the first
3474 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3475 scalar operand), and for the rest we get a copy with
3476 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3477 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3478 The vectors are collected into VEC_OPRNDS. */
3481 vect_get_loop_based_defs (tree
*oprnd
, gimple
*stmt
, enum vect_def_type dt
,
3482 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3486 /* Get first vector operand. */
3487 /* All the vector operands except the very first one (that is scalar oprnd)
3489 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3490 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
);
3492 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3494 vec_oprnds
->quick_push (vec_oprnd
);
3496 /* Get second vector operand. */
3497 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3498 vec_oprnds
->quick_push (vec_oprnd
);
3502 /* For conversion in multiple steps, continue to get operands
3505 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3509 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3510 For multi-step conversions store the resulting vectors and call the function
3514 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3515 int multi_step_cvt
, gimple
*stmt
,
3517 gimple_stmt_iterator
*gsi
,
3518 slp_tree slp_node
, enum tree_code code
,
3519 stmt_vec_info
*prev_stmt_info
)
3522 tree vop0
, vop1
, new_tmp
, vec_dest
;
3524 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3526 vec_dest
= vec_dsts
.pop ();
3528 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3530 /* Create demotion operation. */
3531 vop0
= (*vec_oprnds
)[i
];
3532 vop1
= (*vec_oprnds
)[i
+ 1];
3533 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3534 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3535 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3536 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3539 /* Store the resulting vector for next recursive call. */
3540 (*vec_oprnds
)[i
/2] = new_tmp
;
3543 /* This is the last step of the conversion sequence. Store the
3544 vectors in SLP_NODE or in vector info of the scalar statement
3545 (or in STMT_VINFO_RELATED_STMT chain). */
3547 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3550 if (!*prev_stmt_info
)
3551 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3553 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3555 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3560 /* For multi-step demotion operations we first generate demotion operations
3561 from the source type to the intermediate types, and then combine the
3562 results (stored in VEC_OPRNDS) in demotion operation to the destination
3566 /* At each level of recursion we have half of the operands we had at the
3568 vec_oprnds
->truncate ((i
+1)/2);
3569 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3570 stmt
, vec_dsts
, gsi
, slp_node
,
3571 VEC_PACK_TRUNC_EXPR
,
3575 vec_dsts
.quick_push (vec_dest
);
3579 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3580 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3581 the resulting vectors and call the function recursively. */
3584 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3585 vec
<tree
> *vec_oprnds1
,
3586 gimple
*stmt
, tree vec_dest
,
3587 gimple_stmt_iterator
*gsi
,
3588 enum tree_code code1
,
3589 enum tree_code code2
, tree decl1
,
3590 tree decl2
, int op_type
)
3593 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3594 gimple
*new_stmt1
, *new_stmt2
;
3595 vec
<tree
> vec_tmp
= vNULL
;
3597 vec_tmp
.create (vec_oprnds0
->length () * 2);
3598 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3600 if (op_type
== binary_op
)
3601 vop1
= (*vec_oprnds1
)[i
];
3605 /* Generate the two halves of promotion operation. */
3606 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3607 op_type
, vec_dest
, gsi
, stmt
);
3608 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3609 op_type
, vec_dest
, gsi
, stmt
);
3610 if (is_gimple_call (new_stmt1
))
3612 new_tmp1
= gimple_call_lhs (new_stmt1
);
3613 new_tmp2
= gimple_call_lhs (new_stmt2
);
3617 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3618 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3621 /* Store the results for the next step. */
3622 vec_tmp
.quick_push (new_tmp1
);
3623 vec_tmp
.quick_push (new_tmp2
);
3626 vec_oprnds0
->release ();
3627 *vec_oprnds0
= vec_tmp
;
3631 /* Check if STMT performs a conversion operation, that can be vectorized.
3632 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3633 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3634 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3637 vectorizable_conversion (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
3638 gimple
**vec_stmt
, slp_tree slp_node
)
3642 tree op0
, op1
= NULL_TREE
;
3643 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3644 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3645 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3646 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3647 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3648 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3651 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3652 gimple
*new_stmt
= NULL
;
3653 stmt_vec_info prev_stmt_info
;
3656 tree vectype_out
, vectype_in
;
3658 tree lhs_type
, rhs_type
;
3659 enum { NARROW
, NONE
, WIDEN
} modifier
;
3660 vec
<tree
> vec_oprnds0
= vNULL
;
3661 vec
<tree
> vec_oprnds1
= vNULL
;
3663 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3664 vec_info
*vinfo
= stmt_info
->vinfo
;
3665 int multi_step_cvt
= 0;
3666 vec
<tree
> vec_dsts
= vNULL
;
3667 vec
<tree
> interm_types
= vNULL
;
3668 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3670 machine_mode rhs_mode
;
3671 unsigned short fltsz
;
3673 /* Is STMT a vectorizable conversion? */
3675 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3678 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3682 if (!is_gimple_assign (stmt
))
3685 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3688 code
= gimple_assign_rhs_code (stmt
);
3689 if (!CONVERT_EXPR_CODE_P (code
)
3690 && code
!= FIX_TRUNC_EXPR
3691 && code
!= FLOAT_EXPR
3692 && code
!= WIDEN_MULT_EXPR
3693 && code
!= WIDEN_LSHIFT_EXPR
)
3696 op_type
= TREE_CODE_LENGTH (code
);
3698 /* Check types of lhs and rhs. */
3699 scalar_dest
= gimple_assign_lhs (stmt
);
3700 lhs_type
= TREE_TYPE (scalar_dest
);
3701 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3703 op0
= gimple_assign_rhs1 (stmt
);
3704 rhs_type
= TREE_TYPE (op0
);
3706 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3707 && !((INTEGRAL_TYPE_P (lhs_type
)
3708 && INTEGRAL_TYPE_P (rhs_type
))
3709 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3710 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3713 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3714 && ((INTEGRAL_TYPE_P (lhs_type
)
3715 && (TYPE_PRECISION (lhs_type
)
3716 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3717 || (INTEGRAL_TYPE_P (rhs_type
)
3718 && (TYPE_PRECISION (rhs_type
)
3719 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
))))))
3721 if (dump_enabled_p ())
3722 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3723 "type conversion to/from bit-precision unsupported."
3728 /* Check the operands of the operation. */
3729 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
3731 if (dump_enabled_p ())
3732 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3733 "use not simple.\n");
3736 if (op_type
== binary_op
)
3740 op1
= gimple_assign_rhs2 (stmt
);
3741 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3742 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3744 if (CONSTANT_CLASS_P (op0
))
3745 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &vectype_in
);
3747 ok
= vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]);
3751 if (dump_enabled_p ())
3752 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3753 "use not simple.\n");
3758 /* If op0 is an external or constant defs use a vector type of
3759 the same size as the output vector type. */
3761 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3763 gcc_assert (vectype_in
);
3766 if (dump_enabled_p ())
3768 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3769 "no vectype for scalar type ");
3770 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3771 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3777 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3778 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3780 if (dump_enabled_p ())
3782 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3783 "can't convert between boolean and non "
3785 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3786 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3792 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3793 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3794 if (nunits_in
< nunits_out
)
3796 else if (nunits_out
== nunits_in
)
3801 /* Multiple types in SLP are handled by creating the appropriate number of
3802 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3804 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3806 else if (modifier
== NARROW
)
3807 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3809 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3811 /* Sanity check: make sure that at least one copy of the vectorized stmt
3812 needs to be generated. */
3813 gcc_assert (ncopies
>= 1);
3815 /* Supportable by target? */
3819 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3821 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3826 if (dump_enabled_p ())
3827 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3828 "conversion not supported by target.\n");
3832 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3833 &code1
, &code2
, &multi_step_cvt
,
3836 /* Binary widening operation can only be supported directly by the
3838 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3842 if (code
!= FLOAT_EXPR
3843 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3844 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3847 rhs_mode
= TYPE_MODE (rhs_type
);
3848 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3849 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3850 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3851 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3854 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3855 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3856 if (cvt_type
== NULL_TREE
)
3859 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3861 if (!supportable_convert_operation (code
, vectype_out
,
3862 cvt_type
, &decl1
, &codecvt1
))
3865 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3866 cvt_type
, &codecvt1
,
3867 &codecvt2
, &multi_step_cvt
,
3871 gcc_assert (multi_step_cvt
== 0);
3873 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3874 vectype_in
, &code1
, &code2
,
3875 &multi_step_cvt
, &interm_types
))
3879 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3882 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3883 codecvt2
= ERROR_MARK
;
3887 interm_types
.safe_push (cvt_type
);
3888 cvt_type
= NULL_TREE
;
3893 gcc_assert (op_type
== unary_op
);
3894 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3895 &code1
, &multi_step_cvt
,
3899 if (code
!= FIX_TRUNC_EXPR
3900 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3901 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3904 rhs_mode
= TYPE_MODE (rhs_type
);
3906 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3907 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3908 if (cvt_type
== NULL_TREE
)
3910 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3913 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3914 &code1
, &multi_step_cvt
,
3923 if (!vec_stmt
) /* transformation not required. */
3925 if (dump_enabled_p ())
3926 dump_printf_loc (MSG_NOTE
, vect_location
,
3927 "=== vectorizable_conversion ===\n");
3928 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3930 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3931 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3933 else if (modifier
== NARROW
)
3935 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3936 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3940 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3941 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3943 interm_types
.release ();
3948 if (dump_enabled_p ())
3949 dump_printf_loc (MSG_NOTE
, vect_location
,
3950 "transform conversion. ncopies = %d.\n", ncopies
);
3952 if (op_type
== binary_op
)
3954 if (CONSTANT_CLASS_P (op0
))
3955 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3956 else if (CONSTANT_CLASS_P (op1
))
3957 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3960 /* In case of multi-step conversion, we first generate conversion operations
3961 to the intermediate types, and then from that types to the final one.
3962 We create vector destinations for the intermediate type (TYPES) received
3963 from supportable_*_operation, and store them in the correct order
3964 for future use in vect_create_vectorized_*_stmts (). */
3965 vec_dsts
.create (multi_step_cvt
+ 1);
3966 vec_dest
= vect_create_destination_var (scalar_dest
,
3967 (cvt_type
&& modifier
== WIDEN
)
3968 ? cvt_type
: vectype_out
);
3969 vec_dsts
.quick_push (vec_dest
);
3973 for (i
= interm_types
.length () - 1;
3974 interm_types
.iterate (i
, &intermediate_type
); i
--)
3976 vec_dest
= vect_create_destination_var (scalar_dest
,
3978 vec_dsts
.quick_push (vec_dest
);
3983 vec_dest
= vect_create_destination_var (scalar_dest
,
3985 ? vectype_out
: cvt_type
);
3989 if (modifier
== WIDEN
)
3991 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3992 if (op_type
== binary_op
)
3993 vec_oprnds1
.create (1);
3995 else if (modifier
== NARROW
)
3996 vec_oprnds0
.create (
3997 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3999 else if (code
== WIDEN_LSHIFT_EXPR
)
4000 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4003 prev_stmt_info
= NULL
;
4007 for (j
= 0; j
< ncopies
; j
++)
4010 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
4013 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
4015 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4017 /* Arguments are ready, create the new vector stmt. */
4018 if (code1
== CALL_EXPR
)
4020 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4021 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4022 gimple_call_set_lhs (new_stmt
, new_temp
);
4026 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4027 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4028 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4029 gimple_assign_set_lhs (new_stmt
, new_temp
);
4032 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4034 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4037 if (!prev_stmt_info
)
4038 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4040 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4041 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4048 /* In case the vectorization factor (VF) is bigger than the number
4049 of elements that we can fit in a vectype (nunits), we have to
4050 generate more than one vector stmt - i.e - we need to "unroll"
4051 the vector stmt by a factor VF/nunits. */
4052 for (j
= 0; j
< ncopies
; j
++)
4059 if (code
== WIDEN_LSHIFT_EXPR
)
4064 /* Store vec_oprnd1 for every vector stmt to be created
4065 for SLP_NODE. We check during the analysis that all
4066 the shift arguments are the same. */
4067 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4068 vec_oprnds1
.quick_push (vec_oprnd1
);
4070 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4074 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
4075 &vec_oprnds1
, slp_node
, -1);
4079 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
);
4080 vec_oprnds0
.quick_push (vec_oprnd0
);
4081 if (op_type
== binary_op
)
4083 if (code
== WIDEN_LSHIFT_EXPR
)
4086 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
);
4087 vec_oprnds1
.quick_push (vec_oprnd1
);
4093 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
4094 vec_oprnds0
.truncate (0);
4095 vec_oprnds0
.quick_push (vec_oprnd0
);
4096 if (op_type
== binary_op
)
4098 if (code
== WIDEN_LSHIFT_EXPR
)
4101 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
4103 vec_oprnds1
.truncate (0);
4104 vec_oprnds1
.quick_push (vec_oprnd1
);
4108 /* Arguments are ready. Create the new vector stmts. */
4109 for (i
= multi_step_cvt
; i
>= 0; i
--)
4111 tree this_dest
= vec_dsts
[i
];
4112 enum tree_code c1
= code1
, c2
= code2
;
4113 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4118 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
4120 stmt
, this_dest
, gsi
,
4121 c1
, c2
, decl1
, decl2
,
4125 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4129 if (codecvt1
== CALL_EXPR
)
4131 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4132 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4133 gimple_call_set_lhs (new_stmt
, new_temp
);
4137 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4138 new_temp
= make_ssa_name (vec_dest
);
4139 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4143 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4146 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4149 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4152 if (!prev_stmt_info
)
4153 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
4155 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4156 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4161 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4165 /* In case the vectorization factor (VF) is bigger than the number
4166 of elements that we can fit in a vectype (nunits), we have to
4167 generate more than one vector stmt - i.e - we need to "unroll"
4168 the vector stmt by a factor VF/nunits. */
4169 for (j
= 0; j
< ncopies
; j
++)
4173 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4177 vec_oprnds0
.truncate (0);
4178 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
4179 vect_pow2 (multi_step_cvt
) - 1);
4182 /* Arguments are ready. Create the new vector stmts. */
4184 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4186 if (codecvt1
== CALL_EXPR
)
4188 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4189 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4190 gimple_call_set_lhs (new_stmt
, new_temp
);
4194 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4195 new_temp
= make_ssa_name (vec_dest
);
4196 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4200 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4201 vec_oprnds0
[i
] = new_temp
;
4204 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4205 stmt
, vec_dsts
, gsi
,
4210 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4214 vec_oprnds0
.release ();
4215 vec_oprnds1
.release ();
4216 vec_dsts
.release ();
4217 interm_types
.release ();
4223 /* Function vectorizable_assignment.
4225 Check if STMT performs an assignment (copy) that can be vectorized.
4226 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4227 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4228 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4231 vectorizable_assignment (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4232 gimple
**vec_stmt
, slp_tree slp_node
)
4237 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4238 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4241 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4244 vec
<tree
> vec_oprnds
= vNULL
;
4246 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4247 vec_info
*vinfo
= stmt_info
->vinfo
;
4248 gimple
*new_stmt
= NULL
;
4249 stmt_vec_info prev_stmt_info
= NULL
;
4250 enum tree_code code
;
4253 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4256 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4260 /* Is vectorizable assignment? */
4261 if (!is_gimple_assign (stmt
))
4264 scalar_dest
= gimple_assign_lhs (stmt
);
4265 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4268 code
= gimple_assign_rhs_code (stmt
);
4269 if (gimple_assign_single_p (stmt
)
4270 || code
== PAREN_EXPR
4271 || CONVERT_EXPR_CODE_P (code
))
4272 op
= gimple_assign_rhs1 (stmt
);
4276 if (code
== VIEW_CONVERT_EXPR
)
4277 op
= TREE_OPERAND (op
, 0);
4279 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4280 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4282 /* Multiple types in SLP are handled by creating the appropriate number of
4283 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4285 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4288 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4290 gcc_assert (ncopies
>= 1);
4292 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
[0], &vectype_in
))
4294 if (dump_enabled_p ())
4295 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4296 "use not simple.\n");
4300 /* We can handle NOP_EXPR conversions that do not change the number
4301 of elements or the vector size. */
4302 if ((CONVERT_EXPR_CODE_P (code
)
4303 || code
== VIEW_CONVERT_EXPR
)
4305 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4306 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4307 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4310 /* We do not handle bit-precision changes. */
4311 if ((CONVERT_EXPR_CODE_P (code
)
4312 || code
== VIEW_CONVERT_EXPR
)
4313 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4314 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4315 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4316 || ((TYPE_PRECISION (TREE_TYPE (op
))
4317 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4318 /* But a conversion that does not change the bit-pattern is ok. */
4319 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4320 > TYPE_PRECISION (TREE_TYPE (op
)))
4321 && TYPE_UNSIGNED (TREE_TYPE (op
)))
4322 /* Conversion between boolean types of different sizes is
4323 a simple assignment in case their vectypes are same
4325 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
4326 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
4328 if (dump_enabled_p ())
4329 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4330 "type conversion to/from bit-precision "
4335 if (!vec_stmt
) /* transformation not required. */
4337 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4338 if (dump_enabled_p ())
4339 dump_printf_loc (MSG_NOTE
, vect_location
,
4340 "=== vectorizable_assignment ===\n");
4341 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4346 if (dump_enabled_p ())
4347 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4350 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4353 for (j
= 0; j
< ncopies
; j
++)
4357 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4359 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4361 /* Arguments are ready. create the new vector stmt. */
4362 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4364 if (CONVERT_EXPR_CODE_P (code
)
4365 || code
== VIEW_CONVERT_EXPR
)
4366 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4367 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4368 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4369 gimple_assign_set_lhs (new_stmt
, new_temp
);
4370 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4372 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4379 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4381 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4383 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4386 vec_oprnds
.release ();
4391 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4392 either as shift by a scalar or by a vector. */
4395 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4398 machine_mode vec_mode
;
4403 vectype
= get_vectype_for_scalar_type (scalar_type
);
4407 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4409 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4411 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4413 || (optab_handler (optab
, TYPE_MODE (vectype
))
4414 == CODE_FOR_nothing
))
4418 vec_mode
= TYPE_MODE (vectype
);
4419 icode
= (int) optab_handler (optab
, vec_mode
);
4420 if (icode
== CODE_FOR_nothing
)
4427 /* Function vectorizable_shift.
4429 Check if STMT performs a shift operation that can be vectorized.
4430 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4431 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4432 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4435 vectorizable_shift (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4436 gimple
**vec_stmt
, slp_tree slp_node
)
4440 tree op0
, op1
= NULL
;
4441 tree vec_oprnd1
= NULL_TREE
;
4442 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4444 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4445 enum tree_code code
;
4446 machine_mode vec_mode
;
4450 machine_mode optab_op2_mode
;
4452 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4453 gimple
*new_stmt
= NULL
;
4454 stmt_vec_info prev_stmt_info
;
4461 vec
<tree
> vec_oprnds0
= vNULL
;
4462 vec
<tree
> vec_oprnds1
= vNULL
;
4465 bool scalar_shift_arg
= true;
4466 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4467 vec_info
*vinfo
= stmt_info
->vinfo
;
4470 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4473 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4477 /* Is STMT a vectorizable binary/unary operation? */
4478 if (!is_gimple_assign (stmt
))
4481 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4484 code
= gimple_assign_rhs_code (stmt
);
4486 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4487 || code
== RROTATE_EXPR
))
4490 scalar_dest
= gimple_assign_lhs (stmt
);
4491 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4492 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4493 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4495 if (dump_enabled_p ())
4496 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4497 "bit-precision shifts not supported.\n");
4501 op0
= gimple_assign_rhs1 (stmt
);
4502 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4504 if (dump_enabled_p ())
4505 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4506 "use not simple.\n");
4509 /* If op0 is an external or constant def use a vector type with
4510 the same size as the output vector type. */
4512 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4514 gcc_assert (vectype
);
4517 if (dump_enabled_p ())
4518 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4519 "no vectype for scalar type\n");
4523 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4524 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4525 if (nunits_out
!= nunits_in
)
4528 op1
= gimple_assign_rhs2 (stmt
);
4529 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1], &op1_vectype
))
4531 if (dump_enabled_p ())
4532 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4533 "use not simple.\n");
4538 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4542 /* Multiple types in SLP are handled by creating the appropriate number of
4543 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4545 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4548 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4550 gcc_assert (ncopies
>= 1);
4552 /* Determine whether the shift amount is a vector, or scalar. If the
4553 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4555 if ((dt
[1] == vect_internal_def
4556 || dt
[1] == vect_induction_def
)
4558 scalar_shift_arg
= false;
4559 else if (dt
[1] == vect_constant_def
4560 || dt
[1] == vect_external_def
4561 || dt
[1] == vect_internal_def
)
4563 /* In SLP, need to check whether the shift count is the same,
4564 in loops if it is a constant or invariant, it is always
4568 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4571 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4572 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4573 scalar_shift_arg
= false;
4578 if (dump_enabled_p ())
4579 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4580 "operand mode requires invariant argument.\n");
4584 /* Vector shifted by vector. */
4585 if (!scalar_shift_arg
)
4587 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4588 if (dump_enabled_p ())
4589 dump_printf_loc (MSG_NOTE
, vect_location
,
4590 "vector/vector shift/rotate found.\n");
4593 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4594 if (op1_vectype
== NULL_TREE
4595 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4597 if (dump_enabled_p ())
4598 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4599 "unusable type for last operand in"
4600 " vector/vector shift/rotate.\n");
4604 /* See if the machine has a vector shifted by scalar insn and if not
4605 then see if it has a vector shifted by vector insn. */
4608 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4610 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4612 if (dump_enabled_p ())
4613 dump_printf_loc (MSG_NOTE
, vect_location
,
4614 "vector/scalar shift/rotate found.\n");
4618 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4620 && (optab_handler (optab
, TYPE_MODE (vectype
))
4621 != CODE_FOR_nothing
))
4623 scalar_shift_arg
= false;
4625 if (dump_enabled_p ())
4626 dump_printf_loc (MSG_NOTE
, vect_location
,
4627 "vector/vector shift/rotate found.\n");
4629 /* Unlike the other binary operators, shifts/rotates have
4630 the rhs being int, instead of the same type as the lhs,
4631 so make sure the scalar is the right type if we are
4632 dealing with vectors of long long/long/short/char. */
4633 if (dt
[1] == vect_constant_def
)
4634 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4635 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4639 && TYPE_MODE (TREE_TYPE (vectype
))
4640 != TYPE_MODE (TREE_TYPE (op1
)))
4642 if (dump_enabled_p ())
4643 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4644 "unusable type for last operand in"
4645 " vector/vector shift/rotate.\n");
4648 if (vec_stmt
&& !slp_node
)
4650 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4651 op1
= vect_init_vector (stmt
, op1
,
4652 TREE_TYPE (vectype
), NULL
);
4659 /* Supportable by target? */
4662 if (dump_enabled_p ())
4663 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4667 vec_mode
= TYPE_MODE (vectype
);
4668 icode
= (int) optab_handler (optab
, vec_mode
);
4669 if (icode
== CODE_FOR_nothing
)
4671 if (dump_enabled_p ())
4672 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4673 "op not supported by target.\n");
4674 /* Check only during analysis. */
4675 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4676 || (vf
< vect_min_worthwhile_factor (code
)
4679 if (dump_enabled_p ())
4680 dump_printf_loc (MSG_NOTE
, vect_location
,
4681 "proceeding using word mode.\n");
4684 /* Worthwhile without SIMD support? Check only during analysis. */
4685 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4686 && vf
< vect_min_worthwhile_factor (code
)
4689 if (dump_enabled_p ())
4690 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4691 "not worthwhile without SIMD support.\n");
4695 if (!vec_stmt
) /* transformation not required. */
4697 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4698 if (dump_enabled_p ())
4699 dump_printf_loc (MSG_NOTE
, vect_location
,
4700 "=== vectorizable_shift ===\n");
4701 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4707 if (dump_enabled_p ())
4708 dump_printf_loc (MSG_NOTE
, vect_location
,
4709 "transform binary/unary operation.\n");
4712 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4714 prev_stmt_info
= NULL
;
4715 for (j
= 0; j
< ncopies
; j
++)
4720 if (scalar_shift_arg
)
4722 /* Vector shl and shr insn patterns can be defined with scalar
4723 operand 2 (shift operand). In this case, use constant or loop
4724 invariant op1 directly, without extending it to vector mode
4726 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4727 if (!VECTOR_MODE_P (optab_op2_mode
))
4729 if (dump_enabled_p ())
4730 dump_printf_loc (MSG_NOTE
, vect_location
,
4731 "operand 1 using scalar mode.\n");
4733 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4734 vec_oprnds1
.quick_push (vec_oprnd1
);
4737 /* Store vec_oprnd1 for every vector stmt to be created
4738 for SLP_NODE. We check during the analysis that all
4739 the shift arguments are the same.
4740 TODO: Allow different constants for different vector
4741 stmts generated for an SLP instance. */
4742 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4743 vec_oprnds1
.quick_push (vec_oprnd1
);
4748 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4749 (a special case for certain kind of vector shifts); otherwise,
4750 operand 1 should be of a vector type (the usual case). */
4752 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4755 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4759 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4761 /* Arguments are ready. Create the new vector stmt. */
4762 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4764 vop1
= vec_oprnds1
[i
];
4765 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4766 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4767 gimple_assign_set_lhs (new_stmt
, new_temp
);
4768 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4770 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4777 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4779 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4780 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4783 vec_oprnds0
.release ();
4784 vec_oprnds1
.release ();
4790 /* Function vectorizable_operation.
4792 Check if STMT performs a binary, unary or ternary operation that can
4794 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4795 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4796 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4799 vectorizable_operation (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
4800 gimple
**vec_stmt
, slp_tree slp_node
)
4804 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4805 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4807 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4808 enum tree_code code
;
4809 machine_mode vec_mode
;
4813 bool target_support_p
;
4815 enum vect_def_type dt
[3]
4816 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4817 gimple
*new_stmt
= NULL
;
4818 stmt_vec_info prev_stmt_info
;
4824 vec
<tree
> vec_oprnds0
= vNULL
;
4825 vec
<tree
> vec_oprnds1
= vNULL
;
4826 vec
<tree
> vec_oprnds2
= vNULL
;
4827 tree vop0
, vop1
, vop2
;
4828 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4829 vec_info
*vinfo
= stmt_info
->vinfo
;
4832 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4835 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4839 /* Is STMT a vectorizable binary/unary operation? */
4840 if (!is_gimple_assign (stmt
))
4843 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4846 code
= gimple_assign_rhs_code (stmt
);
4848 /* For pointer addition, we should use the normal plus for
4849 the vector addition. */
4850 if (code
== POINTER_PLUS_EXPR
)
4853 /* Support only unary or binary operations. */
4854 op_type
= TREE_CODE_LENGTH (code
);
4855 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4857 if (dump_enabled_p ())
4858 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4859 "num. args = %d (not unary/binary/ternary op).\n",
4864 scalar_dest
= gimple_assign_lhs (stmt
);
4865 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4867 /* Most operations cannot handle bit-precision types without extra
4869 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4870 && (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4871 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4872 /* Exception are bitwise binary operations. */
4873 && code
!= BIT_IOR_EXPR
4874 && code
!= BIT_XOR_EXPR
4875 && code
!= BIT_AND_EXPR
)
4877 if (dump_enabled_p ())
4878 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4879 "bit-precision arithmetic not supported.\n");
4883 op0
= gimple_assign_rhs1 (stmt
);
4884 if (!vect_is_simple_use (op0
, vinfo
, &def_stmt
, &dt
[0], &vectype
))
4886 if (dump_enabled_p ())
4887 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4888 "use not simple.\n");
4891 /* If op0 is an external or constant def use a vector type with
4892 the same size as the output vector type. */
4895 /* For boolean type we cannot determine vectype by
4896 invariant value (don't know whether it is a vector
4897 of booleans or vector of integers). We use output
4898 vectype because operations on boolean don't change
4900 if (TREE_CODE (TREE_TYPE (op0
)) == BOOLEAN_TYPE
)
4902 if (TREE_CODE (TREE_TYPE (scalar_dest
)) != BOOLEAN_TYPE
)
4904 if (dump_enabled_p ())
4905 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4906 "not supported operation on bool value.\n");
4909 vectype
= vectype_out
;
4912 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4915 gcc_assert (vectype
);
4918 if (dump_enabled_p ())
4920 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4921 "no vectype for scalar type ");
4922 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4924 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4930 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4931 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4932 if (nunits_out
!= nunits_in
)
4935 if (op_type
== binary_op
|| op_type
== ternary_op
)
4937 op1
= gimple_assign_rhs2 (stmt
);
4938 if (!vect_is_simple_use (op1
, vinfo
, &def_stmt
, &dt
[1]))
4940 if (dump_enabled_p ())
4941 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4942 "use not simple.\n");
4946 if (op_type
== ternary_op
)
4948 op2
= gimple_assign_rhs3 (stmt
);
4949 if (!vect_is_simple_use (op2
, vinfo
, &def_stmt
, &dt
[2]))
4951 if (dump_enabled_p ())
4952 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4953 "use not simple.\n");
4959 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4963 /* Multiple types in SLP are handled by creating the appropriate number of
4964 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4966 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4969 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4971 gcc_assert (ncopies
>= 1);
4973 /* Shifts are handled in vectorizable_shift (). */
4974 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4975 || code
== RROTATE_EXPR
)
4978 /* Supportable by target? */
4980 vec_mode
= TYPE_MODE (vectype
);
4981 if (code
== MULT_HIGHPART_EXPR
)
4982 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
4985 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4988 if (dump_enabled_p ())
4989 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4993 target_support_p
= (optab_handler (optab
, vec_mode
)
4994 != CODE_FOR_nothing
);
4997 if (!target_support_p
)
4999 if (dump_enabled_p ())
5000 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5001 "op not supported by target.\n");
5002 /* Check only during analysis. */
5003 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
5004 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
5006 if (dump_enabled_p ())
5007 dump_printf_loc (MSG_NOTE
, vect_location
,
5008 "proceeding using word mode.\n");
5011 /* Worthwhile without SIMD support? Check only during analysis. */
5012 if (!VECTOR_MODE_P (vec_mode
)
5014 && vf
< vect_min_worthwhile_factor (code
))
5016 if (dump_enabled_p ())
5017 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5018 "not worthwhile without SIMD support.\n");
5022 if (!vec_stmt
) /* transformation not required. */
5024 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5025 if (dump_enabled_p ())
5026 dump_printf_loc (MSG_NOTE
, vect_location
,
5027 "=== vectorizable_operation ===\n");
5028 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
5034 if (dump_enabled_p ())
5035 dump_printf_loc (MSG_NOTE
, vect_location
,
5036 "transform binary/unary operation.\n");
5039 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5041 /* In case the vectorization factor (VF) is bigger than the number
5042 of elements that we can fit in a vectype (nunits), we have to generate
5043 more than one vector stmt - i.e - we need to "unroll" the
5044 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5045 from one copy of the vector stmt to the next, in the field
5046 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5047 stages to find the correct vector defs to be used when vectorizing
5048 stmts that use the defs of the current stmt. The example below
5049 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5050 we need to create 4 vectorized stmts):
5052 before vectorization:
5053 RELATED_STMT VEC_STMT
5057 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5059 RELATED_STMT VEC_STMT
5060 VS1_0: vx0 = memref0 VS1_1 -
5061 VS1_1: vx1 = memref1 VS1_2 -
5062 VS1_2: vx2 = memref2 VS1_3 -
5063 VS1_3: vx3 = memref3 - -
5064 S1: x = load - VS1_0
5067 step2: vectorize stmt S2 (done here):
5068 To vectorize stmt S2 we first need to find the relevant vector
5069 def for the first operand 'x'. This is, as usual, obtained from
5070 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5071 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5072 relevant vector def 'vx0'. Having found 'vx0' we can generate
5073 the vector stmt VS2_0, and as usual, record it in the
5074 STMT_VINFO_VEC_STMT of stmt S2.
5075 When creating the second copy (VS2_1), we obtain the relevant vector
5076 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5077 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5078 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5079 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5080 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5081 chain of stmts and pointers:
5082 RELATED_STMT VEC_STMT
5083 VS1_0: vx0 = memref0 VS1_1 -
5084 VS1_1: vx1 = memref1 VS1_2 -
5085 VS1_2: vx2 = memref2 VS1_3 -
5086 VS1_3: vx3 = memref3 - -
5087 S1: x = load - VS1_0
5088 VS2_0: vz0 = vx0 + v1 VS2_1 -
5089 VS2_1: vz1 = vx1 + v1 VS2_2 -
5090 VS2_2: vz2 = vx2 + v1 VS2_3 -
5091 VS2_3: vz3 = vx3 + v1 - -
5092 S2: z = x + 1 - VS2_0 */
5094 prev_stmt_info
= NULL
;
5095 for (j
= 0; j
< ncopies
; j
++)
5100 if (op_type
== binary_op
|| op_type
== ternary_op
)
5101 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
5104 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
5106 if (op_type
== ternary_op
)
5108 vec_oprnds2
.create (1);
5109 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
5115 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
5116 if (op_type
== ternary_op
)
5118 tree vec_oprnd
= vec_oprnds2
.pop ();
5119 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
5124 /* Arguments are ready. Create the new vector stmt. */
5125 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5127 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5128 ? vec_oprnds1
[i
] : NULL_TREE
);
5129 vop2
= ((op_type
== ternary_op
)
5130 ? vec_oprnds2
[i
] : NULL_TREE
);
5131 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
5132 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5133 gimple_assign_set_lhs (new_stmt
, new_temp
);
5134 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5136 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5143 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5145 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5146 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5149 vec_oprnds0
.release ();
5150 vec_oprnds1
.release ();
5151 vec_oprnds2
.release ();
5156 /* A helper function to ensure data reference DR's base alignment
5160 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
5165 if (DR_VECT_AUX (dr
)->base_misaligned
)
5167 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5168 tree base_decl
= DR_VECT_AUX (dr
)->base_decl
;
5170 if (decl_in_symtab_p (base_decl
))
5171 symtab_node::get (base_decl
)->increase_alignment (TYPE_ALIGN (vectype
));
5174 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
5175 DECL_USER_ALIGN (base_decl
) = 1;
5177 DR_VECT_AUX (dr
)->base_misaligned
= false;
5182 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5183 reversal of the vector elements. If that is impossible to do,
5187 perm_mask_for_reverse (tree vectype
)
5192 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5193 sel
= XALLOCAVEC (unsigned char, nunits
);
5195 for (i
= 0; i
< nunits
; ++i
)
5196 sel
[i
] = nunits
- 1 - i
;
5198 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
5200 return vect_gen_perm_mask_checked (vectype
, sel
);
5203 /* Function vectorizable_store.
5205 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5207 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5208 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5209 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5212 vectorizable_store (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
5218 tree vec_oprnd
= NULL_TREE
;
5219 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5220 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5222 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5223 struct loop
*loop
= NULL
;
5224 machine_mode vec_mode
;
5226 enum dr_alignment_support alignment_support_scheme
;
5228 enum vect_def_type dt
;
5229 stmt_vec_info prev_stmt_info
= NULL
;
5230 tree dataref_ptr
= NULL_TREE
;
5231 tree dataref_offset
= NULL_TREE
;
5232 gimple
*ptr_incr
= NULL
;
5235 gimple
*next_stmt
, *first_stmt
= NULL
;
5236 bool grouped_store
= false;
5237 bool store_lanes_p
= false;
5238 unsigned int group_size
, i
;
5239 vec
<tree
> dr_chain
= vNULL
;
5240 vec
<tree
> oprnds
= vNULL
;
5241 vec
<tree
> result_chain
= vNULL
;
5243 bool negative
= false;
5244 tree offset
= NULL_TREE
;
5245 vec
<tree
> vec_oprnds
= vNULL
;
5246 bool slp
= (slp_node
!= NULL
);
5247 unsigned int vec_num
;
5248 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5249 vec_info
*vinfo
= stmt_info
->vinfo
;
5251 tree scatter_base
= NULL_TREE
, scatter_off
= NULL_TREE
;
5252 tree scatter_off_vectype
= NULL_TREE
, scatter_decl
= NULL_TREE
;
5253 int scatter_scale
= 1;
5254 enum vect_def_type scatter_idx_dt
= vect_unknown_def_type
;
5255 enum vect_def_type scatter_src_dt
= vect_unknown_def_type
;
5258 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5261 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5265 /* Is vectorizable store? */
5267 if (!is_gimple_assign (stmt
))
5270 scalar_dest
= gimple_assign_lhs (stmt
);
5271 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5272 && is_pattern_stmt_p (stmt_info
))
5273 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5274 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5275 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5276 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5277 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5278 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5279 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5280 && TREE_CODE (scalar_dest
) != MEM_REF
)
5283 gcc_assert (gimple_assign_single_p (stmt
));
5285 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
5286 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5289 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5291 /* Multiple types in SLP are handled by creating the appropriate number of
5292 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5294 if (slp
|| PURE_SLP_STMT (stmt_info
))
5297 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5299 gcc_assert (ncopies
>= 1);
5301 /* FORNOW. This restriction should be relaxed. */
5302 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5304 if (dump_enabled_p ())
5305 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5306 "multiple types in nested loop.\n");
5310 op
= gimple_assign_rhs1 (stmt
);
5312 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
, &rhs_vectype
))
5314 if (dump_enabled_p ())
5315 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5316 "use not simple.\n");
5320 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
5323 elem_type
= TREE_TYPE (vectype
);
5324 vec_mode
= TYPE_MODE (vectype
);
5326 /* FORNOW. In some cases can vectorize even if data-type not supported
5327 (e.g. - array initialization with 0). */
5328 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5331 if (!STMT_VINFO_DATA_REF (stmt_info
))
5334 if (!STMT_VINFO_STRIDED_P (stmt_info
))
5337 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5338 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5339 size_zero_node
) < 0;
5340 if (negative
&& ncopies
> 1)
5342 if (dump_enabled_p ())
5343 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5344 "multiple types with negative step.\n");
5349 gcc_assert (!grouped_store
);
5350 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5351 if (alignment_support_scheme
!= dr_aligned
5352 && alignment_support_scheme
!= dr_unaligned_supported
)
5354 if (dump_enabled_p ())
5355 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5356 "negative step but alignment required.\n");
5359 if (dt
!= vect_constant_def
5360 && dt
!= vect_external_def
5361 && !perm_mask_for_reverse (vectype
))
5363 if (dump_enabled_p ())
5364 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5365 "negative step and reversing not supported.\n");
5371 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5373 grouped_store
= true;
5374 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5375 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5377 && !PURE_SLP_STMT (stmt_info
)
5378 && !STMT_VINFO_STRIDED_P (stmt_info
))
5380 if (vect_store_lanes_supported (vectype
, group_size
))
5381 store_lanes_p
= true;
5382 else if (!vect_grouped_store_supported (vectype
, group_size
))
5386 if (STMT_VINFO_STRIDED_P (stmt_info
)
5387 && (slp
|| PURE_SLP_STMT (stmt_info
))
5388 && (group_size
> nunits
5389 || nunits
% group_size
!= 0))
5391 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5392 "unhandled strided group store\n");
5396 if (first_stmt
== stmt
)
5398 /* STMT is the leader of the group. Check the operands of all the
5399 stmts of the group. */
5400 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5403 gcc_assert (gimple_assign_single_p (next_stmt
));
5404 op
= gimple_assign_rhs1 (next_stmt
);
5405 if (!vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
))
5407 if (dump_enabled_p ())
5408 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5409 "use not simple.\n");
5412 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5417 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5420 scatter_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &scatter_base
,
5421 &scatter_off
, &scatter_scale
);
5422 gcc_assert (scatter_decl
);
5423 if (!vect_is_simple_use (scatter_off
, vinfo
, &def_stmt
, &scatter_idx_dt
,
5424 &scatter_off_vectype
))
5426 if (dump_enabled_p ())
5427 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5428 "scatter index use not simple.");
5433 if (!vec_stmt
) /* transformation not required. */
5435 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5436 /* The SLP costs are calculated during SLP analysis. */
5437 if (!PURE_SLP_STMT (stmt_info
))
5438 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5445 ensure_base_align (stmt_info
, dr
);
5447 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5449 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, op
, src
;
5450 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (scatter_decl
));
5451 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5452 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
5453 edge pe
= loop_preheader_edge (loop
);
5456 enum { NARROW
, NONE
, WIDEN
} modifier
;
5457 int scatter_off_nunits
= TYPE_VECTOR_SUBPARTS (scatter_off_vectype
);
5459 if (nunits
== (unsigned int) scatter_off_nunits
)
5461 else if (nunits
== (unsigned int) scatter_off_nunits
/ 2)
5463 unsigned char *sel
= XALLOCAVEC (unsigned char, scatter_off_nunits
);
5466 for (i
= 0; i
< (unsigned int) scatter_off_nunits
; ++i
)
5467 sel
[i
] = i
| nunits
;
5469 perm_mask
= vect_gen_perm_mask_checked (scatter_off_vectype
, sel
);
5470 gcc_assert (perm_mask
!= NULL_TREE
);
5472 else if (nunits
== (unsigned int) scatter_off_nunits
* 2)
5474 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5477 for (i
= 0; i
< (unsigned int) nunits
; ++i
)
5478 sel
[i
] = i
| scatter_off_nunits
;
5480 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
5481 gcc_assert (perm_mask
!= NULL_TREE
);
5487 rettype
= TREE_TYPE (TREE_TYPE (scatter_decl
));
5488 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5489 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5490 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5491 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5492 scaletype
= TREE_VALUE (arglist
);
5494 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
5495 && TREE_CODE (rettype
) == VOID_TYPE
);
5497 ptr
= fold_convert (ptrtype
, scatter_base
);
5498 if (!is_gimple_min_invariant (ptr
))
5500 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5501 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5502 gcc_assert (!new_bb
);
5505 /* Currently we support only unconditional scatter stores,
5506 so mask should be all ones. */
5507 mask
= build_int_cst (masktype
, -1);
5508 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5510 scale
= build_int_cst (scaletype
, scatter_scale
);
5512 prev_stmt_info
= NULL
;
5513 for (j
= 0; j
< ncopies
; ++j
)
5518 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt
), stmt
);
5520 = vect_get_vec_def_for_operand (scatter_off
, stmt
);
5522 else if (modifier
!= NONE
&& (j
& 1))
5524 if (modifier
== WIDEN
)
5527 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5528 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
5531 else if (modifier
== NARROW
)
5533 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
5536 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt
, vec_oprnd0
);
5544 = vect_get_vec_def_for_stmt_copy (scatter_src_dt
, vec_oprnd1
);
5546 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt
, vec_oprnd0
);
5549 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
5551 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
))
5552 == TYPE_VECTOR_SUBPARTS (srctype
));
5553 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
5554 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
5555 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
5556 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5560 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5562 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5563 == TYPE_VECTOR_SUBPARTS (idxtype
));
5564 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
5565 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5566 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
5567 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5572 = gimple_build_call (scatter_decl
, 5, ptr
, mask
, op
, src
, scale
);
5574 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5576 if (prev_stmt_info
== NULL
)
5577 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5579 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5580 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5587 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5588 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5590 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5593 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5595 /* We vectorize all the stmts of the interleaving group when we
5596 reach the last stmt in the group. */
5597 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5598 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5607 grouped_store
= false;
5608 /* VEC_NUM is the number of vect stmts to be created for this
5610 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5611 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5612 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
)) == first_stmt
);
5613 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5614 op
= gimple_assign_rhs1 (first_stmt
);
5617 /* VEC_NUM is the number of vect stmts to be created for this
5619 vec_num
= group_size
;
5625 group_size
= vec_num
= 1;
5628 if (dump_enabled_p ())
5629 dump_printf_loc (MSG_NOTE
, vect_location
,
5630 "transform store. ncopies = %d\n", ncopies
);
5632 if (STMT_VINFO_STRIDED_P (stmt_info
))
5634 gimple_stmt_iterator incr_gsi
;
5640 gimple_seq stmts
= NULL
;
5641 tree stride_base
, stride_step
, alias_off
;
5645 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5648 = fold_build_pointer_plus
5649 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
5650 size_binop (PLUS_EXPR
,
5651 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
5652 convert_to_ptrofftype (DR_INIT(first_dr
))));
5653 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
5655 /* For a store with loop-invariant (but other than power-of-2)
5656 stride (i.e. not a grouped access) like so:
5658 for (i = 0; i < n; i += stride)
5661 we generate a new induction variable and new stores from
5662 the components of the (vectorized) rhs:
5664 for (j = 0; ; j += VF*stride)
5669 array[j + stride] = tmp2;
5673 unsigned nstores
= nunits
;
5674 tree ltype
= elem_type
;
5677 nstores
= nunits
/ group_size
;
5678 if (group_size
< nunits
)
5679 ltype
= build_vector_type (elem_type
, group_size
);
5682 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
5683 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5687 ivstep
= stride_step
;
5688 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
5689 build_int_cst (TREE_TYPE (ivstep
),
5690 ncopies
* nstores
));
5692 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
5694 create_iv (stride_base
, ivstep
, NULL
,
5695 loop
, &incr_gsi
, insert_after
,
5697 incr
= gsi_stmt (incr_gsi
);
5698 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
5700 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
5702 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
5704 prev_stmt_info
= NULL
;
5705 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (first_dr
)), 0);
5706 next_stmt
= first_stmt
;
5707 for (g
= 0; g
< group_size
; g
++)
5709 running_off
= offvar
;
5712 tree size
= TYPE_SIZE_UNIT (ltype
);
5713 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
5715 tree newoff
= copy_ssa_name (running_off
, NULL
);
5716 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5718 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5719 running_off
= newoff
;
5721 for (j
= 0; j
< ncopies
; j
++)
5723 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5724 and first_stmt == stmt. */
5729 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
5731 vec_oprnd
= vec_oprnds
[0];
5735 gcc_assert (gimple_assign_single_p (next_stmt
));
5736 op
= gimple_assign_rhs1 (next_stmt
);
5737 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
5743 vec_oprnd
= vec_oprnds
[j
];
5746 vect_is_simple_use (vec_oprnd
, vinfo
, &def_stmt
, &dt
);
5747 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
5751 for (i
= 0; i
< nstores
; i
++)
5753 tree newref
, newoff
;
5754 gimple
*incr
, *assign
;
5755 tree size
= TYPE_SIZE (ltype
);
5756 /* Extract the i'th component. */
5757 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
5758 bitsize_int (i
), size
);
5759 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
5762 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
5766 newref
= build2 (MEM_REF
, ltype
,
5767 running_off
, alias_off
);
5769 /* And store it to *running_off. */
5770 assign
= gimple_build_assign (newref
, elem
);
5771 vect_finish_stmt_generation (stmt
, assign
, gsi
);
5773 newoff
= copy_ssa_name (running_off
, NULL
);
5774 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5775 running_off
, stride_step
);
5776 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5778 running_off
= newoff
;
5779 if (g
== group_size
- 1
5782 if (j
== 0 && i
== 0)
5783 STMT_VINFO_VEC_STMT (stmt_info
)
5784 = *vec_stmt
= assign
;
5786 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
5787 prev_stmt_info
= vinfo_for_stmt (assign
);
5791 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5796 dr_chain
.create (group_size
);
5797 oprnds
.create (group_size
);
5799 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5800 gcc_assert (alignment_support_scheme
);
5801 /* Targets with store-lane instructions must not require explicit
5803 gcc_assert (!store_lanes_p
5804 || alignment_support_scheme
== dr_aligned
5805 || alignment_support_scheme
== dr_unaligned_supported
);
5808 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5811 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5813 aggr_type
= vectype
;
5815 /* In case the vectorization factor (VF) is bigger than the number
5816 of elements that we can fit in a vectype (nunits), we have to generate
5817 more than one vector stmt - i.e - we need to "unroll" the
5818 vector stmt by a factor VF/nunits. For more details see documentation in
5819 vect_get_vec_def_for_copy_stmt. */
5821 /* In case of interleaving (non-unit grouped access):
5828 We create vectorized stores starting from base address (the access of the
5829 first stmt in the chain (S2 in the above example), when the last store stmt
5830 of the chain (S4) is reached:
5833 VS2: &base + vec_size*1 = vx0
5834 VS3: &base + vec_size*2 = vx1
5835 VS4: &base + vec_size*3 = vx3
5837 Then permutation statements are generated:
5839 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5840 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5843 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5844 (the order of the data-refs in the output of vect_permute_store_chain
5845 corresponds to the order of scalar stmts in the interleaving chain - see
5846 the documentation of vect_permute_store_chain()).
5848 In case of both multiple types and interleaving, above vector stores and
5849 permutation stmts are created for every copy. The result vector stmts are
5850 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5851 STMT_VINFO_RELATED_STMT for the next copies.
5854 prev_stmt_info
= NULL
;
5855 for (j
= 0; j
< ncopies
; j
++)
5862 /* Get vectorized arguments for SLP_NODE. */
5863 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5864 NULL
, slp_node
, -1);
5866 vec_oprnd
= vec_oprnds
[0];
5870 /* For interleaved stores we collect vectorized defs for all the
5871 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5872 used as an input to vect_permute_store_chain(), and OPRNDS as
5873 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5875 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5876 OPRNDS are of size 1. */
5877 next_stmt
= first_stmt
;
5878 for (i
= 0; i
< group_size
; i
++)
5880 /* Since gaps are not supported for interleaved stores,
5881 GROUP_SIZE is the exact number of stmts in the chain.
5882 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5883 there is no interleaving, GROUP_SIZE is 1, and only one
5884 iteration of the loop will be executed. */
5885 gcc_assert (next_stmt
5886 && gimple_assign_single_p (next_stmt
));
5887 op
= gimple_assign_rhs1 (next_stmt
);
5889 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
);
5890 dr_chain
.quick_push (vec_oprnd
);
5891 oprnds
.quick_push (vec_oprnd
);
5892 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5896 /* We should have catched mismatched types earlier. */
5897 gcc_assert (useless_type_conversion_p (vectype
,
5898 TREE_TYPE (vec_oprnd
)));
5899 bool simd_lane_access_p
5900 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5901 if (simd_lane_access_p
5902 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5903 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5904 && integer_zerop (DR_OFFSET (first_dr
))
5905 && integer_zerop (DR_INIT (first_dr
))
5906 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5907 get_alias_set (DR_REF (first_dr
))))
5909 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5910 dataref_offset
= build_int_cst (reference_alias_ptr_type
5911 (DR_REF (first_dr
)), 0);
5916 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5917 simd_lane_access_p
? loop
: NULL
,
5918 offset
, &dummy
, gsi
, &ptr_incr
,
5919 simd_lane_access_p
, &inv_p
);
5920 gcc_assert (bb_vinfo
|| !inv_p
);
5924 /* For interleaved stores we created vectorized defs for all the
5925 defs stored in OPRNDS in the previous iteration (previous copy).
5926 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5927 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5929 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5930 OPRNDS are of size 1. */
5931 for (i
= 0; i
< group_size
; i
++)
5934 vect_is_simple_use (op
, vinfo
, &def_stmt
, &dt
);
5935 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5936 dr_chain
[i
] = vec_oprnd
;
5937 oprnds
[i
] = vec_oprnd
;
5941 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5942 TYPE_SIZE_UNIT (aggr_type
));
5944 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5945 TYPE_SIZE_UNIT (aggr_type
));
5952 /* Combine all the vectors into an array. */
5953 vec_array
= create_vector_array (vectype
, vec_num
);
5954 for (i
= 0; i
< vec_num
; i
++)
5956 vec_oprnd
= dr_chain
[i
];
5957 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5961 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5962 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5963 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5964 gimple_call_set_lhs (new_stmt
, data_ref
);
5965 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5973 result_chain
.create (group_size
);
5975 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5979 next_stmt
= first_stmt
;
5980 for (i
= 0; i
< vec_num
; i
++)
5982 unsigned align
, misalign
;
5985 /* Bump the vector pointer. */
5986 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5990 vec_oprnd
= vec_oprnds
[i
];
5991 else if (grouped_store
)
5992 /* For grouped stores vectorized defs are interleaved in
5993 vect_permute_store_chain(). */
5994 vec_oprnd
= result_chain
[i
];
5996 data_ref
= fold_build2 (MEM_REF
, TREE_TYPE (vec_oprnd
),
6000 : build_int_cst (reference_alias_ptr_type
6001 (DR_REF (first_dr
)), 0));
6002 align
= TYPE_ALIGN_UNIT (vectype
);
6003 if (aligned_access_p (first_dr
))
6005 else if (DR_MISALIGNMENT (first_dr
) == -1)
6007 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
6008 align
= TYPE_ALIGN_UNIT (elem_type
);
6010 align
= get_object_alignment (DR_REF (first_dr
))
6013 TREE_TYPE (data_ref
)
6014 = build_aligned_type (TREE_TYPE (data_ref
),
6015 align
* BITS_PER_UNIT
);
6019 TREE_TYPE (data_ref
)
6020 = build_aligned_type (TREE_TYPE (data_ref
),
6021 TYPE_ALIGN (elem_type
));
6022 misalign
= DR_MISALIGNMENT (first_dr
);
6024 if (dataref_offset
== NULL_TREE
6025 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6026 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
6030 && dt
!= vect_constant_def
6031 && dt
!= vect_external_def
)
6033 tree perm_mask
= perm_mask_for_reverse (vectype
);
6035 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
6037 tree new_temp
= make_ssa_name (perm_dest
);
6039 /* Generate the permute statement. */
6041 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
6042 vec_oprnd
, perm_mask
);
6043 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6045 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6046 vec_oprnd
= new_temp
;
6049 /* Arguments are ready. Create the new vector stmt. */
6050 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
6051 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6056 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
6064 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6066 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6067 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6071 dr_chain
.release ();
6073 result_chain
.release ();
6074 vec_oprnds
.release ();
6079 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6080 VECTOR_CST mask. No checks are made that the target platform supports the
6081 mask, so callers may wish to test can_vec_perm_p separately, or use
6082 vect_gen_perm_mask_checked. */
6085 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
6087 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
6090 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6092 mask_elt_type
= lang_hooks
.types
.type_for_mode
6093 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
6094 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
6096 mask_elts
= XALLOCAVEC (tree
, nunits
);
6097 for (i
= nunits
- 1; i
>= 0; i
--)
6098 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
6099 mask_vec
= build_vector (mask_type
, mask_elts
);
6104 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6105 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6108 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
6110 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
6111 return vect_gen_perm_mask_any (vectype
, sel
);
6114 /* Given a vector variable X and Y, that was generated for the scalar
6115 STMT, generate instructions to permute the vector elements of X and Y
6116 using permutation mask MASK_VEC, insert them at *GSI and return the
6117 permuted vector variable. */
6120 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple
*stmt
,
6121 gimple_stmt_iterator
*gsi
)
6123 tree vectype
= TREE_TYPE (x
);
6124 tree perm_dest
, data_ref
;
6127 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
6128 data_ref
= make_ssa_name (perm_dest
);
6130 /* Generate the permute statement. */
6131 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
6132 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
6137 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6138 inserting them on the loops preheader edge. Returns true if we
6139 were successful in doing so (and thus STMT can be moved then),
6140 otherwise returns false. */
6143 hoist_defs_of_uses (gimple
*stmt
, struct loop
*loop
)
6149 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6151 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6152 if (!gimple_nop_p (def_stmt
)
6153 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6155 /* Make sure we don't need to recurse. While we could do
6156 so in simple cases when there are more complex use webs
6157 we don't have an easy way to preserve stmt order to fulfil
6158 dependencies within them. */
6161 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
6163 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
6165 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
6166 if (!gimple_nop_p (def_stmt2
)
6167 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
6177 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
6179 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
6180 if (!gimple_nop_p (def_stmt
)
6181 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
6183 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6184 gsi_remove (&gsi
, false);
6185 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
6192 /* vectorizable_load.
6194 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6196 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6197 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6198 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6201 vectorizable_load (gimple
*stmt
, gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
6202 slp_tree slp_node
, slp_instance slp_node_instance
)
6205 tree vec_dest
= NULL
;
6206 tree data_ref
= NULL
;
6207 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6208 stmt_vec_info prev_stmt_info
;
6209 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6210 struct loop
*loop
= NULL
;
6211 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
6212 bool nested_in_vect_loop
= false;
6213 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
6217 gimple
*new_stmt
= NULL
;
6219 enum dr_alignment_support alignment_support_scheme
;
6220 tree dataref_ptr
= NULL_TREE
;
6221 tree dataref_offset
= NULL_TREE
;
6222 gimple
*ptr_incr
= NULL
;
6224 int i
, j
, group_size
= -1, group_gap_adj
;
6225 tree msq
= NULL_TREE
, lsq
;
6226 tree offset
= NULL_TREE
;
6227 tree byte_offset
= NULL_TREE
;
6228 tree realignment_token
= NULL_TREE
;
6230 vec
<tree
> dr_chain
= vNULL
;
6231 bool grouped_load
= false;
6232 bool load_lanes_p
= false;
6234 gimple
*first_stmt_for_drptr
= NULL
;
6236 bool negative
= false;
6237 bool compute_in_loop
= false;
6238 struct loop
*at_loop
;
6240 bool slp
= (slp_node
!= NULL
);
6241 bool slp_perm
= false;
6242 enum tree_code code
;
6243 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6246 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
6247 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
6248 int gather_scale
= 1;
6249 enum vect_def_type gather_dt
= vect_unknown_def_type
;
6250 vec_info
*vinfo
= stmt_info
->vinfo
;
6252 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6255 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6259 /* Is vectorizable load? */
6260 if (!is_gimple_assign (stmt
))
6263 scalar_dest
= gimple_assign_lhs (stmt
);
6264 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
6267 code
= gimple_assign_rhs_code (stmt
);
6268 if (code
!= ARRAY_REF
6269 && code
!= BIT_FIELD_REF
6270 && code
!= INDIRECT_REF
6271 && code
!= COMPONENT_REF
6272 && code
!= IMAGPART_EXPR
6273 && code
!= REALPART_EXPR
6275 && TREE_CODE_CLASS (code
) != tcc_declaration
)
6278 if (!STMT_VINFO_DATA_REF (stmt_info
))
6281 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6282 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6286 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6287 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
6288 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6293 /* Multiple types in SLP are handled by creating the appropriate number of
6294 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6296 if (slp
|| PURE_SLP_STMT (stmt_info
))
6299 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6301 gcc_assert (ncopies
>= 1);
6303 /* FORNOW. This restriction should be relaxed. */
6304 if (nested_in_vect_loop
&& ncopies
> 1)
6306 if (dump_enabled_p ())
6307 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6308 "multiple types in nested loop.\n");
6312 /* Invalidate assumptions made by dependence analysis when vectorization
6313 on the unrolled body effectively re-orders stmts. */
6315 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6316 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6317 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6319 if (dump_enabled_p ())
6320 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6321 "cannot perform implicit CSE when unrolling "
6322 "with negative dependence distance\n");
6326 elem_type
= TREE_TYPE (vectype
);
6327 mode
= TYPE_MODE (vectype
);
6329 /* FORNOW. In some cases can vectorize even if data-type not supported
6330 (e.g. - data copies). */
6331 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
6333 if (dump_enabled_p ())
6334 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6335 "Aligned load, but unsupported type.\n");
6339 /* Check if the load is a part of an interleaving chain. */
6340 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6342 grouped_load
= true;
6344 gcc_assert (!nested_in_vect_loop
&& !STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
6346 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6348 /* If this is single-element interleaving with an element distance
6349 that leaves unused vector loads around punt - we at least create
6350 very sub-optimal code in that case (and blow up memory,
6352 bool force_peeling
= false;
6353 if (first_stmt
== stmt
6354 && !GROUP_NEXT_ELEMENT (stmt_info
))
6356 if (GROUP_SIZE (stmt_info
) > TYPE_VECTOR_SUBPARTS (vectype
))
6358 if (dump_enabled_p ())
6359 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6360 "single-element interleaving not supported "
6361 "for not adjacent vector loads\n");
6365 /* Single-element interleaving requires peeling for gaps. */
6366 force_peeling
= true;
6369 /* If there is a gap in the end of the group or the group size cannot
6370 be made a multiple of the vector element count then we access excess
6371 elements in the last iteration and thus need to peel that off. */
6373 && ! STMT_VINFO_STRIDED_P (stmt_info
)
6375 || GROUP_GAP (vinfo_for_stmt (first_stmt
)) != 0
6376 || (!slp
&& vf
% GROUP_SIZE (vinfo_for_stmt (first_stmt
)) != 0)))
6378 if (dump_enabled_p ())
6379 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6380 "Data access with gaps requires scalar "
6384 if (dump_enabled_p ())
6385 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6386 "Peeling for outer loop is not supported\n");
6390 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
6393 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6396 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6398 && !PURE_SLP_STMT (stmt_info
)
6399 && !STMT_VINFO_STRIDED_P (stmt_info
))
6401 if (vect_load_lanes_supported (vectype
, group_size
))
6402 load_lanes_p
= true;
6403 else if (!vect_grouped_load_supported (vectype
, group_size
))
6407 /* Invalidate assumptions made by dependence analysis when vectorization
6408 on the unrolled body effectively re-orders stmts. */
6409 if (!PURE_SLP_STMT (stmt_info
)
6410 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6411 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6412 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6414 if (dump_enabled_p ())
6415 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6416 "cannot perform implicit CSE when performing "
6417 "group loads with negative dependence distance\n");
6421 /* Similarly when the stmt is a load that is both part of a SLP
6422 instance and a loop vectorized stmt via the same-dr mechanism
6423 we have to give up. */
6424 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6425 && (STMT_SLP_TYPE (stmt_info
)
6426 != STMT_SLP_TYPE (vinfo_for_stmt
6427 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6429 if (dump_enabled_p ())
6430 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6431 "conflicting SLP types for CSEd load\n");
6437 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6440 gather_decl
= vect_check_gather_scatter (stmt
, loop_vinfo
, &gather_base
,
6441 &gather_off
, &gather_scale
);
6442 gcc_assert (gather_decl
);
6443 if (!vect_is_simple_use (gather_off
, vinfo
, &def_stmt
, &gather_dt
,
6444 &gather_off_vectype
))
6446 if (dump_enabled_p ())
6447 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6448 "gather index use not simple.\n");
6452 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6455 && (slp
|| PURE_SLP_STMT (stmt_info
)))
6456 && (group_size
> nunits
6457 || nunits
% group_size
!= 0))
6459 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6460 "unhandled strided group load\n");
6466 negative
= tree_int_cst_compare (nested_in_vect_loop
6467 ? STMT_VINFO_DR_STEP (stmt_info
)
6469 size_zero_node
) < 0;
6470 if (negative
&& ncopies
> 1)
6472 if (dump_enabled_p ())
6473 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6474 "multiple types with negative step.\n");
6482 if (dump_enabled_p ())
6483 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6484 "negative step for group load not supported"
6488 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
6489 if (alignment_support_scheme
!= dr_aligned
6490 && alignment_support_scheme
!= dr_unaligned_supported
)
6492 if (dump_enabled_p ())
6493 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6494 "negative step but alignment required.\n");
6497 if (!perm_mask_for_reverse (vectype
))
6499 if (dump_enabled_p ())
6500 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6501 "negative step and reversing not supported."
6508 if (!vec_stmt
) /* transformation not required. */
6510 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6511 /* The SLP costs are calculated during SLP analysis. */
6512 if (!PURE_SLP_STMT (stmt_info
))
6513 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
,
6518 if (dump_enabled_p ())
6519 dump_printf_loc (MSG_NOTE
, vect_location
,
6520 "transform load. ncopies = %d\n", ncopies
);
6524 ensure_base_align (stmt_info
, dr
);
6526 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6528 tree vec_oprnd0
= NULL_TREE
, op
;
6529 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
6530 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6531 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6532 edge pe
= loop_preheader_edge (loop
);
6535 enum { NARROW
, NONE
, WIDEN
} modifier
;
6536 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
6538 if (nunits
== gather_off_nunits
)
6540 else if (nunits
== gather_off_nunits
/ 2)
6542 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
6545 for (i
= 0; i
< gather_off_nunits
; ++i
)
6546 sel
[i
] = i
| nunits
;
6548 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
6550 else if (nunits
== gather_off_nunits
* 2)
6552 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
6555 for (i
= 0; i
< nunits
; ++i
)
6556 sel
[i
] = i
< gather_off_nunits
6557 ? i
: i
+ nunits
- gather_off_nunits
;
6559 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6565 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
6566 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6567 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6568 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6569 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6570 scaletype
= TREE_VALUE (arglist
);
6571 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6573 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6575 ptr
= fold_convert (ptrtype
, gather_base
);
6576 if (!is_gimple_min_invariant (ptr
))
6578 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6579 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6580 gcc_assert (!new_bb
);
6583 /* Currently we support only unconditional gather loads,
6584 so mask should be all ones. */
6585 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6586 mask
= build_int_cst (masktype
, -1);
6587 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6589 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6590 mask
= build_vector_from_val (masktype
, mask
);
6591 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6593 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6597 for (j
= 0; j
< 6; ++j
)
6599 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6600 mask
= build_real (TREE_TYPE (masktype
), r
);
6601 mask
= build_vector_from_val (masktype
, mask
);
6602 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6607 scale
= build_int_cst (scaletype
, gather_scale
);
6609 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6610 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6611 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6615 for (j
= 0; j
< 6; ++j
)
6617 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6618 merge
= build_real (TREE_TYPE (rettype
), r
);
6622 merge
= build_vector_from_val (rettype
, merge
);
6623 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6625 prev_stmt_info
= NULL
;
6626 for (j
= 0; j
< ncopies
; ++j
)
6628 if (modifier
== WIDEN
&& (j
& 1))
6629 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6630 perm_mask
, stmt
, gsi
);
6633 = vect_get_vec_def_for_operand (gather_off
, stmt
);
6636 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
6638 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6640 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6641 == TYPE_VECTOR_SUBPARTS (idxtype
));
6642 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6643 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6645 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6646 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6651 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
6653 if (!useless_type_conversion_p (vectype
, rettype
))
6655 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6656 == TYPE_VECTOR_SUBPARTS (rettype
));
6657 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
6658 gimple_call_set_lhs (new_stmt
, op
);
6659 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6660 var
= make_ssa_name (vec_dest
);
6661 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6663 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6667 var
= make_ssa_name (vec_dest
, new_stmt
);
6668 gimple_call_set_lhs (new_stmt
, var
);
6671 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6673 if (modifier
== NARROW
)
6680 var
= permute_vec_elements (prev_res
, var
,
6681 perm_mask
, stmt
, gsi
);
6682 new_stmt
= SSA_NAME_DEF_STMT (var
);
6685 if (prev_stmt_info
== NULL
)
6686 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6688 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6689 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6693 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6695 gimple_stmt_iterator incr_gsi
;
6701 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6702 gimple_seq stmts
= NULL
;
6703 tree stride_base
, stride_step
, alias_off
;
6705 gcc_assert (!nested_in_vect_loop
);
6707 if (slp
&& grouped_load
)
6708 first_dr
= STMT_VINFO_DATA_REF
6709 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
)));
6714 = fold_build_pointer_plus
6715 (DR_BASE_ADDRESS (first_dr
),
6716 size_binop (PLUS_EXPR
,
6717 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
6718 convert_to_ptrofftype (DR_INIT (first_dr
))));
6719 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
6721 /* For a load with loop-invariant (but other than power-of-2)
6722 stride (i.e. not a grouped access) like so:
6724 for (i = 0; i < n; i += stride)
6727 we generate a new induction variable and new accesses to
6728 form a new vector (or vectors, depending on ncopies):
6730 for (j = 0; ; j += VF*stride)
6732 tmp2 = array[j + stride];
6734 vectemp = {tmp1, tmp2, ...}
6737 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
6738 build_int_cst (TREE_TYPE (stride_step
), vf
));
6740 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6742 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
6743 loop
, &incr_gsi
, insert_after
,
6745 incr
= gsi_stmt (incr_gsi
);
6746 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
));
6748 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
6749 &stmts
, true, NULL_TREE
);
6751 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6753 prev_stmt_info
= NULL
;
6754 running_off
= offvar
;
6755 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (first_dr
)), 0);
6756 int nloads
= nunits
;
6757 tree ltype
= TREE_TYPE (vectype
);
6758 auto_vec
<tree
> dr_chain
;
6761 nloads
= nunits
/ group_size
;
6762 if (group_size
< nunits
)
6763 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
6766 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
6767 /* For SLP permutation support we need to load the whole group,
6768 not only the number of vector stmts the permutation result
6772 ncopies
= (group_size
* vf
+ nunits
- 1) / nunits
;
6773 dr_chain
.create (ncopies
);
6776 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6778 for (j
= 0; j
< ncopies
; j
++)
6784 vec_alloc (v
, nloads
);
6785 for (i
= 0; i
< nloads
; i
++)
6787 tree newref
, newoff
;
6789 newref
= build2 (MEM_REF
, ltype
, running_off
, alias_off
);
6791 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
6794 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
6795 newoff
= copy_ssa_name (running_off
);
6796 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6797 running_off
, stride_step
);
6798 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6800 running_off
= newoff
;
6803 vec_inv
= build_constructor (vectype
, v
);
6804 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6805 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6809 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
6810 build2 (MEM_REF
, ltype
,
6811 running_off
, alias_off
));
6812 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6814 tree newoff
= copy_ssa_name (running_off
);
6815 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6816 running_off
, stride_step
);
6817 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6819 running_off
= newoff
;
6825 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
6827 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6832 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6834 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6835 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6839 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6840 slp_node_instance
, false);
6846 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6847 /* For SLP vectorization we directly vectorize a subchain
6848 without permutation. */
6849 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6850 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6851 /* For BB vectorization always use the first stmt to base
6852 the data ref pointer on. */
6854 first_stmt_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6856 /* Check if the chain of loads is already vectorized. */
6857 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6858 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6859 ??? But we can only do so if there is exactly one
6860 as we have no way to get at the rest. Leave the CSE
6862 ??? With the group load eventually participating
6863 in multiple different permutations (having multiple
6864 slp nodes which refer to the same group) the CSE
6865 is even wrong code. See PR56270. */
6868 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6871 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6872 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6875 /* VEC_NUM is the number of vect stmts to be created for this group. */
6878 grouped_load
= false;
6879 /* For SLP permutation support we need to load the whole group,
6880 not only the number of vector stmts the permutation result
6883 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
6885 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6886 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
6889 vec_num
= group_size
;
6895 group_size
= vec_num
= 1;
6899 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6900 gcc_assert (alignment_support_scheme
);
6901 /* Targets with load-lane instructions must not require explicit
6903 gcc_assert (!load_lanes_p
6904 || alignment_support_scheme
== dr_aligned
6905 || alignment_support_scheme
== dr_unaligned_supported
);
6907 /* In case the vectorization factor (VF) is bigger than the number
6908 of elements that we can fit in a vectype (nunits), we have to generate
6909 more than one vector stmt - i.e - we need to "unroll" the
6910 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6911 from one copy of the vector stmt to the next, in the field
6912 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6913 stages to find the correct vector defs to be used when vectorizing
6914 stmts that use the defs of the current stmt. The example below
6915 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6916 need to create 4 vectorized stmts):
6918 before vectorization:
6919 RELATED_STMT VEC_STMT
6923 step 1: vectorize stmt S1:
6924 We first create the vector stmt VS1_0, and, as usual, record a
6925 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6926 Next, we create the vector stmt VS1_1, and record a pointer to
6927 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6928 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6930 RELATED_STMT VEC_STMT
6931 VS1_0: vx0 = memref0 VS1_1 -
6932 VS1_1: vx1 = memref1 VS1_2 -
6933 VS1_2: vx2 = memref2 VS1_3 -
6934 VS1_3: vx3 = memref3 - -
6935 S1: x = load - VS1_0
6938 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6939 information we recorded in RELATED_STMT field is used to vectorize
6942 /* In case of interleaving (non-unit grouped access):
6949 Vectorized loads are created in the order of memory accesses
6950 starting from the access of the first stmt of the chain:
6953 VS2: vx1 = &base + vec_size*1
6954 VS3: vx3 = &base + vec_size*2
6955 VS4: vx4 = &base + vec_size*3
6957 Then permutation statements are generated:
6959 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6960 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6963 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6964 (the order of the data-refs in the output of vect_permute_load_chain
6965 corresponds to the order of scalar stmts in the interleaving chain - see
6966 the documentation of vect_permute_load_chain()).
6967 The generation of permutation stmts and recording them in
6968 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6970 In case of both multiple types and interleaving, the vector loads and
6971 permutation stmts above are created for every copy. The result vector
6972 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6973 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6975 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6976 on a target that supports unaligned accesses (dr_unaligned_supported)
6977 we generate the following code:
6981 p = p + indx * vectype_size;
6986 Otherwise, the data reference is potentially unaligned on a target that
6987 does not support unaligned accesses (dr_explicit_realign_optimized) -
6988 then generate the following code, in which the data in each iteration is
6989 obtained by two vector loads, one from the previous iteration, and one
6990 from the current iteration:
6992 msq_init = *(floor(p1))
6993 p2 = initial_addr + VS - 1;
6994 realignment_token = call target_builtin;
6997 p2 = p2 + indx * vectype_size
6999 vec_dest = realign_load (msq, lsq, realignment_token)
7004 /* If the misalignment remains the same throughout the execution of the
7005 loop, we can create the init_addr and permutation mask at the loop
7006 preheader. Otherwise, it needs to be created inside the loop.
7007 This can only occur when vectorizing memory accesses in the inner-loop
7008 nested within an outer-loop that is being vectorized. */
7010 if (nested_in_vect_loop
7011 && (TREE_INT_CST_LOW (DR_STEP (dr
))
7012 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
7014 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
7015 compute_in_loop
= true;
7018 if ((alignment_support_scheme
== dr_explicit_realign_optimized
7019 || alignment_support_scheme
== dr_explicit_realign
)
7020 && !compute_in_loop
)
7022 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
7023 alignment_support_scheme
, NULL_TREE
,
7025 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7027 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
7028 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
7036 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7039 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7041 aggr_type
= vectype
;
7043 prev_stmt_info
= NULL
;
7044 for (j
= 0; j
< ncopies
; j
++)
7046 /* 1. Create the vector or array pointer update chain. */
7049 bool simd_lane_access_p
7050 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
7051 if (simd_lane_access_p
7052 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
7053 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
7054 && integer_zerop (DR_OFFSET (first_dr
))
7055 && integer_zerop (DR_INIT (first_dr
))
7056 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7057 get_alias_set (DR_REF (first_dr
)))
7058 && (alignment_support_scheme
== dr_aligned
7059 || alignment_support_scheme
== dr_unaligned_supported
))
7061 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
7062 dataref_offset
= build_int_cst (reference_alias_ptr_type
7063 (DR_REF (first_dr
)), 0);
7066 else if (first_stmt_for_drptr
7067 && first_stmt
!= first_stmt_for_drptr
)
7070 = vect_create_data_ref_ptr (first_stmt_for_drptr
, aggr_type
,
7071 at_loop
, offset
, &dummy
, gsi
,
7072 &ptr_incr
, simd_lane_access_p
,
7073 &inv_p
, byte_offset
);
7074 /* Adjust the pointer by the difference to first_stmt. */
7075 data_reference_p ptrdr
7076 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr
));
7077 tree diff
= fold_convert (sizetype
,
7078 size_binop (MINUS_EXPR
,
7081 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7086 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
7087 offset
, &dummy
, gsi
, &ptr_incr
,
7088 simd_lane_access_p
, &inv_p
,
7091 else if (dataref_offset
)
7092 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
7093 TYPE_SIZE_UNIT (aggr_type
));
7095 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
7096 TYPE_SIZE_UNIT (aggr_type
));
7098 if (grouped_load
|| slp_perm
)
7099 dr_chain
.create (vec_num
);
7105 vec_array
= create_vector_array (vectype
, vec_num
);
7108 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7109 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
7110 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
7111 gimple_call_set_lhs (new_stmt
, vec_array
);
7112 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7114 /* Extract each vector into an SSA_NAME. */
7115 for (i
= 0; i
< vec_num
; i
++)
7117 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
7119 dr_chain
.quick_push (new_temp
);
7122 /* Record the mapping between SSA_NAMEs and statements. */
7123 vect_record_grouped_load_vectors (stmt
, dr_chain
);
7127 for (i
= 0; i
< vec_num
; i
++)
7130 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7133 /* 2. Create the vector-load in the loop. */
7134 switch (alignment_support_scheme
)
7137 case dr_unaligned_supported
:
7139 unsigned int align
, misalign
;
7142 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
7145 : build_int_cst (reference_alias_ptr_type
7146 (DR_REF (first_dr
)), 0));
7147 align
= TYPE_ALIGN_UNIT (vectype
);
7148 if (alignment_support_scheme
== dr_aligned
)
7150 gcc_assert (aligned_access_p (first_dr
));
7153 else if (DR_MISALIGNMENT (first_dr
) == -1)
7155 if (DR_VECT_AUX (first_dr
)->base_element_aligned
)
7156 align
= TYPE_ALIGN_UNIT (elem_type
);
7158 align
= (get_object_alignment (DR_REF (first_dr
))
7161 TREE_TYPE (data_ref
)
7162 = build_aligned_type (TREE_TYPE (data_ref
),
7163 align
* BITS_PER_UNIT
);
7167 TREE_TYPE (data_ref
)
7168 = build_aligned_type (TREE_TYPE (data_ref
),
7169 TYPE_ALIGN (elem_type
));
7170 misalign
= DR_MISALIGNMENT (first_dr
);
7172 if (dataref_offset
== NULL_TREE
7173 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7174 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
7178 case dr_explicit_realign
:
7182 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
7184 if (compute_in_loop
)
7185 msq
= vect_setup_realignment (first_stmt
, gsi
,
7187 dr_explicit_realign
,
7190 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7191 ptr
= copy_ssa_name (dataref_ptr
);
7193 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7194 new_stmt
= gimple_build_assign
7195 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
7197 (TREE_TYPE (dataref_ptr
),
7198 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7199 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7201 = build2 (MEM_REF
, vectype
, ptr
,
7202 build_int_cst (reference_alias_ptr_type
7203 (DR_REF (first_dr
)), 0));
7204 vec_dest
= vect_create_destination_var (scalar_dest
,
7206 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7207 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7208 gimple_assign_set_lhs (new_stmt
, new_temp
);
7209 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
7210 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
7211 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7214 bump
= size_binop (MULT_EXPR
, vs
,
7215 TYPE_SIZE_UNIT (elem_type
));
7216 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
7217 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
7218 new_stmt
= gimple_build_assign
7219 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
7222 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7223 ptr
= copy_ssa_name (ptr
, new_stmt
);
7224 gimple_assign_set_lhs (new_stmt
, ptr
);
7225 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7227 = build2 (MEM_REF
, vectype
, ptr
,
7228 build_int_cst (reference_alias_ptr_type
7229 (DR_REF (first_dr
)), 0));
7232 case dr_explicit_realign_optimized
:
7233 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
7234 new_temp
= copy_ssa_name (dataref_ptr
);
7236 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
7237 new_stmt
= gimple_build_assign
7238 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
7240 (TREE_TYPE (dataref_ptr
),
7241 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
7242 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7244 = build2 (MEM_REF
, vectype
, new_temp
,
7245 build_int_cst (reference_alias_ptr_type
7246 (DR_REF (first_dr
)), 0));
7251 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7252 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
7253 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7254 gimple_assign_set_lhs (new_stmt
, new_temp
);
7255 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7257 /* 3. Handle explicit realignment if necessary/supported.
7259 vec_dest = realign_load (msq, lsq, realignment_token) */
7260 if (alignment_support_scheme
== dr_explicit_realign_optimized
7261 || alignment_support_scheme
== dr_explicit_realign
)
7263 lsq
= gimple_assign_lhs (new_stmt
);
7264 if (!realignment_token
)
7265 realignment_token
= dataref_ptr
;
7266 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7267 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
7268 msq
, lsq
, realignment_token
);
7269 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7270 gimple_assign_set_lhs (new_stmt
, new_temp
);
7271 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7273 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
7276 if (i
== vec_num
- 1 && j
== ncopies
- 1)
7277 add_phi_arg (phi
, lsq
,
7278 loop_latch_edge (containing_loop
),
7284 /* 4. Handle invariant-load. */
7285 if (inv_p
&& !bb_vinfo
)
7287 gcc_assert (!grouped_load
);
7288 /* If we have versioned for aliasing or the loop doesn't
7289 have any data dependencies that would preclude this,
7290 then we are sure this is a loop invariant load and
7291 thus we can insert it on the preheader edge. */
7292 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7293 && !nested_in_vect_loop
7294 && hoist_defs_of_uses (stmt
, loop
))
7296 if (dump_enabled_p ())
7298 dump_printf_loc (MSG_NOTE
, vect_location
,
7299 "hoisting out of the vectorized "
7301 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7303 tree tem
= copy_ssa_name (scalar_dest
);
7304 gsi_insert_on_edge_immediate
7305 (loop_preheader_edge (loop
),
7306 gimple_build_assign (tem
,
7308 (gimple_assign_rhs1 (stmt
))));
7309 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
7310 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7311 set_vinfo_for_stmt (new_stmt
,
7312 new_stmt_vec_info (new_stmt
, vinfo
));
7316 gimple_stmt_iterator gsi2
= *gsi
;
7318 new_temp
= vect_init_vector (stmt
, scalar_dest
,
7320 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7326 tree perm_mask
= perm_mask_for_reverse (vectype
);
7327 new_temp
= permute_vec_elements (new_temp
, new_temp
,
7328 perm_mask
, stmt
, gsi
);
7329 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7332 /* Collect vector loads and later create their permutation in
7333 vect_transform_grouped_load (). */
7334 if (grouped_load
|| slp_perm
)
7335 dr_chain
.quick_push (new_temp
);
7337 /* Store vector loads in the corresponding SLP_NODE. */
7338 if (slp
&& !slp_perm
)
7339 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7341 /* Bump the vector pointer to account for a gap or for excess
7342 elements loaded for a permuted SLP load. */
7343 if (group_gap_adj
!= 0)
7347 = wide_int_to_tree (sizetype
,
7348 wi::smul (TYPE_SIZE_UNIT (elem_type
),
7349 group_gap_adj
, &ovf
));
7350 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7355 if (slp
&& !slp_perm
)
7360 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7361 slp_node_instance
, false))
7363 dr_chain
.release ();
7372 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
7373 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7378 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7380 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7381 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7384 dr_chain
.release ();
7390 /* Function vect_is_simple_cond.
7393 LOOP - the loop that is being vectorized.
7394 COND - Condition that is checked for simple use.
7397 *COMP_VECTYPE - the vector type for the comparison.
7399 Returns whether a COND can be vectorized. Checks whether
7400 condition operands are supportable using vec_is_simple_use. */
7403 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, tree
*comp_vectype
)
7406 enum vect_def_type dt
;
7407 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7410 if (TREE_CODE (cond
) == SSA_NAME
7411 && TREE_CODE (TREE_TYPE (cond
)) == BOOLEAN_TYPE
)
7413 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (cond
);
7414 if (!vect_is_simple_use (cond
, vinfo
, &lhs_def_stmt
,
7417 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
7422 if (!COMPARISON_CLASS_P (cond
))
7425 lhs
= TREE_OPERAND (cond
, 0);
7426 rhs
= TREE_OPERAND (cond
, 1);
7428 if (TREE_CODE (lhs
) == SSA_NAME
)
7430 gimple
*lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
7431 if (!vect_is_simple_use (lhs
, vinfo
, &lhs_def_stmt
, &dt
, &vectype1
))
7434 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
7435 && TREE_CODE (lhs
) != FIXED_CST
)
7438 if (TREE_CODE (rhs
) == SSA_NAME
)
7440 gimple
*rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7441 if (!vect_is_simple_use (rhs
, vinfo
, &rhs_def_stmt
, &dt
, &vectype2
))
7444 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
7445 && TREE_CODE (rhs
) != FIXED_CST
)
7448 if (vectype1
&& vectype2
7449 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7452 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7456 /* vectorizable_condition.
7458 Check if STMT is conditional modify expression that can be vectorized.
7459 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7460 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7463 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7464 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7465 else clause if it is 2).
7467 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7470 vectorizable_condition (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7471 gimple
**vec_stmt
, tree reduc_def
, int reduc_index
,
7474 tree scalar_dest
= NULL_TREE
;
7475 tree vec_dest
= NULL_TREE
;
7476 tree cond_expr
, then_clause
, else_clause
;
7477 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7478 tree comp_vectype
= NULL_TREE
;
7479 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7480 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7483 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7484 enum vect_def_type dt
, dts
[4];
7486 enum tree_code code
;
7487 stmt_vec_info prev_stmt_info
= NULL
;
7489 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7490 vec
<tree
> vec_oprnds0
= vNULL
;
7491 vec
<tree
> vec_oprnds1
= vNULL
;
7492 vec
<tree
> vec_oprnds2
= vNULL
;
7493 vec
<tree
> vec_oprnds3
= vNULL
;
7495 bool masked
= false;
7497 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7500 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
) == TREE_CODE_REDUCTION
)
7502 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7505 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7506 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7510 /* FORNOW: not yet supported. */
7511 if (STMT_VINFO_LIVE_P (stmt_info
))
7513 if (dump_enabled_p ())
7514 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7515 "value used after loop.\n");
7520 /* Is vectorizable conditional operation? */
7521 if (!is_gimple_assign (stmt
))
7524 code
= gimple_assign_rhs_code (stmt
);
7526 if (code
!= COND_EXPR
)
7529 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7530 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7531 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7533 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
7536 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7538 gcc_assert (ncopies
>= 1);
7539 if (reduc_index
&& ncopies
> 1)
7540 return false; /* FORNOW */
7542 cond_expr
= gimple_assign_rhs1 (stmt
);
7543 then_clause
= gimple_assign_rhs2 (stmt
);
7544 else_clause
= gimple_assign_rhs3 (stmt
);
7546 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
, &comp_vectype
)
7551 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
,
7554 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &def_stmt
, &dt
,
7558 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
7561 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
7564 masked
= !COMPARISON_CLASS_P (cond_expr
);
7565 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
7567 if (vec_cmp_type
== NULL_TREE
)
7572 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
7573 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
7580 vec_oprnds0
.create (1);
7581 vec_oprnds1
.create (1);
7582 vec_oprnds2
.create (1);
7583 vec_oprnds3
.create (1);
7587 scalar_dest
= gimple_assign_lhs (stmt
);
7588 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7590 /* Handle cond expr. */
7591 for (j
= 0; j
< ncopies
; j
++)
7593 gassign
*new_stmt
= NULL
;
7598 auto_vec
<tree
, 4> ops
;
7599 auto_vec
<vec
<tree
>, 4> vec_defs
;
7602 ops
.safe_push (cond_expr
);
7605 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
7606 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
7608 ops
.safe_push (then_clause
);
7609 ops
.safe_push (else_clause
);
7610 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7611 vec_oprnds3
= vec_defs
.pop ();
7612 vec_oprnds2
= vec_defs
.pop ();
7614 vec_oprnds1
= vec_defs
.pop ();
7615 vec_oprnds0
= vec_defs
.pop ();
7618 vec_defs
.release ();
7626 = vect_get_vec_def_for_operand (cond_expr
, stmt
,
7628 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
,
7634 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
7635 stmt
, comp_vectype
);
7636 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0),
7637 loop_vinfo
, >emp
, &dts
[0]);
7640 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
7641 stmt
, comp_vectype
);
7642 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1),
7643 loop_vinfo
, >emp
, &dts
[1]);
7645 if (reduc_index
== 1)
7646 vec_then_clause
= reduc_def
;
7649 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
7651 vect_is_simple_use (then_clause
, loop_vinfo
,
7654 if (reduc_index
== 2)
7655 vec_else_clause
= reduc_def
;
7658 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
7660 vect_is_simple_use (else_clause
, loop_vinfo
, >emp
, &dts
[3]);
7667 = vect_get_vec_def_for_stmt_copy (dts
[0],
7668 vec_oprnds0
.pop ());
7671 = vect_get_vec_def_for_stmt_copy (dts
[1],
7672 vec_oprnds1
.pop ());
7674 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
7675 vec_oprnds2
.pop ());
7676 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
7677 vec_oprnds3
.pop ());
7682 vec_oprnds0
.quick_push (vec_cond_lhs
);
7684 vec_oprnds1
.quick_push (vec_cond_rhs
);
7685 vec_oprnds2
.quick_push (vec_then_clause
);
7686 vec_oprnds3
.quick_push (vec_else_clause
);
7689 /* Arguments are ready. Create the new vector stmt. */
7690 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
7692 vec_then_clause
= vec_oprnds2
[i
];
7693 vec_else_clause
= vec_oprnds3
[i
];
7696 vec_compare
= vec_cond_lhs
;
7699 vec_cond_rhs
= vec_oprnds1
[i
];
7700 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
7701 vec_cond_lhs
, vec_cond_rhs
);
7703 new_temp
= make_ssa_name (vec_dest
);
7704 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
,
7705 vec_compare
, vec_then_clause
,
7707 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7709 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7716 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7718 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7720 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7723 vec_oprnds0
.release ();
7724 vec_oprnds1
.release ();
7725 vec_oprnds2
.release ();
7726 vec_oprnds3
.release ();
7731 /* vectorizable_comparison.
7733 Check if STMT is comparison expression that can be vectorized.
7734 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7735 comparison, put it in VEC_STMT, and insert it at GSI.
7737 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7740 vectorizable_comparison (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
7741 gimple
**vec_stmt
, tree reduc_def
,
7744 tree lhs
, rhs1
, rhs2
;
7745 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7746 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
7747 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7748 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
7750 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7751 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
7754 enum tree_code code
;
7755 stmt_vec_info prev_stmt_info
= NULL
;
7757 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7758 vec
<tree
> vec_oprnds0
= vNULL
;
7759 vec
<tree
> vec_oprnds1
= vNULL
;
7764 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7767 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
7770 mask_type
= vectype
;
7771 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7773 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
7776 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7778 gcc_assert (ncopies
>= 1);
7779 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7780 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7784 if (STMT_VINFO_LIVE_P (stmt_info
))
7786 if (dump_enabled_p ())
7787 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7788 "value used after loop.\n");
7792 if (!is_gimple_assign (stmt
))
7795 code
= gimple_assign_rhs_code (stmt
);
7797 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
7800 rhs1
= gimple_assign_rhs1 (stmt
);
7801 rhs2
= gimple_assign_rhs2 (stmt
);
7803 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &def_stmt
,
7804 &dts
[0], &vectype1
))
7807 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &def_stmt
,
7808 &dts
[1], &vectype2
))
7811 if (vectype1
&& vectype2
7812 && TYPE_VECTOR_SUBPARTS (vectype1
) != TYPE_VECTOR_SUBPARTS (vectype2
))
7815 vectype
= vectype1
? vectype1
: vectype2
;
7817 /* Invariant comparison. */
7820 vectype
= build_vector_type (TREE_TYPE (rhs1
), nunits
);
7821 if (tree_to_shwi (TYPE_SIZE_UNIT (vectype
)) != current_vector_size
)
7824 else if (nunits
!= TYPE_VECTOR_SUBPARTS (vectype
))
7829 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
7830 vect_model_simple_cost (stmt_info
, ncopies
, dts
, NULL
, NULL
);
7831 return expand_vec_cmp_expr_p (vectype
, mask_type
);
7837 vec_oprnds0
.create (1);
7838 vec_oprnds1
.create (1);
7842 lhs
= gimple_assign_lhs (stmt
);
7843 mask
= vect_create_destination_var (lhs
, mask_type
);
7845 /* Handle cmp expr. */
7846 for (j
= 0; j
< ncopies
; j
++)
7848 gassign
*new_stmt
= NULL
;
7853 auto_vec
<tree
, 2> ops
;
7854 auto_vec
<vec
<tree
>, 2> vec_defs
;
7856 ops
.safe_push (rhs1
);
7857 ops
.safe_push (rhs2
);
7858 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7859 vec_oprnds1
= vec_defs
.pop ();
7860 vec_oprnds0
= vec_defs
.pop ();
7864 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt
, vectype
);
7865 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt
, vectype
);
7870 vec_rhs1
= vect_get_vec_def_for_stmt_copy (dts
[0],
7871 vec_oprnds0
.pop ());
7872 vec_rhs2
= vect_get_vec_def_for_stmt_copy (dts
[1],
7873 vec_oprnds1
.pop ());
7878 vec_oprnds0
.quick_push (vec_rhs1
);
7879 vec_oprnds1
.quick_push (vec_rhs2
);
7882 /* Arguments are ready. Create the new vector stmt. */
7883 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
7885 vec_rhs2
= vec_oprnds1
[i
];
7887 new_temp
= make_ssa_name (mask
);
7888 new_stmt
= gimple_build_assign (new_temp
, code
, vec_rhs1
, vec_rhs2
);
7889 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7891 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7898 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7900 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7902 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7905 vec_oprnds0
.release ();
7906 vec_oprnds1
.release ();
7911 /* Make sure the statement is vectorizable. */
7914 vect_analyze_stmt (gimple
*stmt
, bool *need_to_vectorize
, slp_tree node
)
7916 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7917 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7918 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
7920 tree scalar_type
, vectype
;
7921 gimple
*pattern_stmt
;
7922 gimple_seq pattern_def_seq
;
7924 if (dump_enabled_p ())
7926 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
7927 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7930 if (gimple_has_volatile_ops (stmt
))
7932 if (dump_enabled_p ())
7933 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7934 "not vectorized: stmt has volatile operands\n");
7939 /* Skip stmts that do not need to be vectorized. In loops this is expected
7941 - the COND_EXPR which is the loop exit condition
7942 - any LABEL_EXPRs in the loop
7943 - computations that are used only for array indexing or loop control.
7944 In basic blocks we only analyze statements that are a part of some SLP
7945 instance, therefore, all the statements are relevant.
7947 Pattern statement needs to be analyzed instead of the original statement
7948 if the original statement is not relevant. Otherwise, we analyze both
7949 statements. In basic blocks we are called from some SLP instance
7950 traversal, don't analyze pattern stmts instead, the pattern stmts
7951 already will be part of SLP instance. */
7953 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
7954 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
7955 && !STMT_VINFO_LIVE_P (stmt_info
))
7957 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7959 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7960 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7962 /* Analyze PATTERN_STMT instead of the original stmt. */
7963 stmt
= pattern_stmt
;
7964 stmt_info
= vinfo_for_stmt (pattern_stmt
);
7965 if (dump_enabled_p ())
7967 dump_printf_loc (MSG_NOTE
, vect_location
,
7968 "==> examining pattern statement: ");
7969 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7974 if (dump_enabled_p ())
7975 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
7980 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7983 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7984 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7986 /* Analyze PATTERN_STMT too. */
7987 if (dump_enabled_p ())
7989 dump_printf_loc (MSG_NOTE
, vect_location
,
7990 "==> examining pattern statement: ");
7991 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7994 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
7998 if (is_pattern_stmt_p (stmt_info
)
8000 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
8002 gimple_stmt_iterator si
;
8004 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
8006 gimple
*pattern_def_stmt
= gsi_stmt (si
);
8007 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
8008 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
8010 /* Analyze def stmt of STMT if it's a pattern stmt. */
8011 if (dump_enabled_p ())
8013 dump_printf_loc (MSG_NOTE
, vect_location
,
8014 "==> examining pattern def statement: ");
8015 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
8018 if (!vect_analyze_stmt (pattern_def_stmt
,
8019 need_to_vectorize
, node
))
8025 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
8027 case vect_internal_def
:
8030 case vect_reduction_def
:
8031 case vect_nested_cycle
:
8032 gcc_assert (!bb_vinfo
8033 && (relevance
== vect_used_in_outer
8034 || relevance
== vect_used_in_outer_by_reduction
8035 || relevance
== vect_used_by_reduction
8036 || relevance
== vect_unused_in_scope
));
8039 case vect_induction_def
:
8040 case vect_constant_def
:
8041 case vect_external_def
:
8042 case vect_unknown_def_type
:
8049 gcc_assert (PURE_SLP_STMT (stmt_info
));
8051 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
8052 if (dump_enabled_p ())
8054 dump_printf_loc (MSG_NOTE
, vect_location
,
8055 "get vectype for scalar type: ");
8056 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
8057 dump_printf (MSG_NOTE
, "\n");
8060 vectype
= get_vectype_for_scalar_type (scalar_type
);
8063 if (dump_enabled_p ())
8065 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8066 "not SLPed: unsupported data-type ");
8067 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
8069 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
8074 if (dump_enabled_p ())
8076 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
8077 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
8078 dump_printf (MSG_NOTE
, "\n");
8081 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
8084 if (STMT_VINFO_RELEVANT_P (stmt_info
))
8086 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
8087 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
8088 || (is_gimple_call (stmt
)
8089 && gimple_call_lhs (stmt
) == NULL_TREE
));
8090 *need_to_vectorize
= true;
8093 if (PURE_SLP_STMT (stmt_info
) && !node
)
8095 dump_printf_loc (MSG_NOTE
, vect_location
,
8096 "handled only by SLP analysis\n");
8102 && (STMT_VINFO_RELEVANT_P (stmt_info
)
8103 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
8104 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8105 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8106 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8107 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8108 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8109 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8110 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8111 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8112 || vectorizable_reduction (stmt
, NULL
, NULL
, node
)
8113 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8114 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8118 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
8119 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
8120 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
8121 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
8122 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
8123 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
8124 || vectorizable_call (stmt
, NULL
, NULL
, node
)
8125 || vectorizable_store (stmt
, NULL
, NULL
, node
)
8126 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
)
8127 || vectorizable_comparison (stmt
, NULL
, NULL
, NULL
, node
));
8132 if (dump_enabled_p ())
8134 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8135 "not vectorized: relevant stmt not ");
8136 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8137 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8146 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8147 need extra handling, except for vectorizable reductions. */
8148 if (STMT_VINFO_LIVE_P (stmt_info
)
8149 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8150 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
8154 if (dump_enabled_p ())
8156 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8157 "not vectorized: live stmt not ");
8158 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
8159 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
8169 /* Function vect_transform_stmt.
8171 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8174 vect_transform_stmt (gimple
*stmt
, gimple_stmt_iterator
*gsi
,
8175 bool *grouped_store
, slp_tree slp_node
,
8176 slp_instance slp_node_instance
)
8178 bool is_store
= false;
8179 gimple
*vec_stmt
= NULL
;
8180 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8183 gimple
*old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8185 switch (STMT_VINFO_TYPE (stmt_info
))
8187 case type_demotion_vec_info_type
:
8188 case type_promotion_vec_info_type
:
8189 case type_conversion_vec_info_type
:
8190 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
8194 case induc_vec_info_type
:
8195 gcc_assert (!slp_node
);
8196 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
8200 case shift_vec_info_type
:
8201 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
8205 case op_vec_info_type
:
8206 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
8210 case assignment_vec_info_type
:
8211 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
8215 case load_vec_info_type
:
8216 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
8221 case store_vec_info_type
:
8222 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
8224 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
8226 /* In case of interleaving, the whole chain is vectorized when the
8227 last store in the chain is reached. Store stmts before the last
8228 one are skipped, and there vec_stmt_info shouldn't be freed
8230 *grouped_store
= true;
8231 if (STMT_VINFO_VEC_STMT (stmt_info
))
8238 case condition_vec_info_type
:
8239 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
8243 case comparison_vec_info_type
:
8244 done
= vectorizable_comparison (stmt
, gsi
, &vec_stmt
, NULL
, slp_node
);
8248 case call_vec_info_type
:
8249 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8250 stmt
= gsi_stmt (*gsi
);
8251 if (is_gimple_call (stmt
)
8252 && gimple_call_internal_p (stmt
)
8253 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
8257 case call_simd_clone_vec_info_type
:
8258 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
8259 stmt
= gsi_stmt (*gsi
);
8262 case reduc_vec_info_type
:
8263 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
8268 if (!STMT_VINFO_LIVE_P (stmt_info
))
8270 if (dump_enabled_p ())
8271 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8272 "stmt not supported.\n");
8277 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8278 This would break hybrid SLP vectorization. */
8280 gcc_assert (!vec_stmt
8281 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
8283 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8284 is being vectorized, but outside the immediately enclosing loop. */
8286 && STMT_VINFO_LOOP_VINFO (stmt_info
)
8287 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8288 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
8289 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
8290 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
8291 || STMT_VINFO_RELEVANT (stmt_info
) ==
8292 vect_used_in_outer_by_reduction
))
8294 struct loop
*innerloop
= LOOP_VINFO_LOOP (
8295 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
8296 imm_use_iterator imm_iter
;
8297 use_operand_p use_p
;
8301 if (dump_enabled_p ())
8302 dump_printf_loc (MSG_NOTE
, vect_location
,
8303 "Record the vdef for outer-loop vectorization.\n");
8305 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8306 (to be used when vectorizing outer-loop stmts that use the DEF of
8308 if (gimple_code (stmt
) == GIMPLE_PHI
)
8309 scalar_dest
= PHI_RESULT (stmt
);
8311 scalar_dest
= gimple_assign_lhs (stmt
);
8313 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
8315 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
8317 exit_phi
= USE_STMT (use_p
);
8318 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
8323 /* Handle stmts whose DEF is used outside the loop-nest that is
8324 being vectorized. */
8325 if (STMT_VINFO_LIVE_P (stmt_info
)
8326 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
8328 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
8333 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
8339 /* Remove a group of stores (for SLP or interleaving), free their
8343 vect_remove_stores (gimple
*first_stmt
)
8345 gimple
*next
= first_stmt
;
8347 gimple_stmt_iterator next_si
;
8351 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
8353 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
8354 if (is_pattern_stmt_p (stmt_info
))
8355 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
8356 /* Free the attached stmt_vec_info and remove the stmt. */
8357 next_si
= gsi_for_stmt (next
);
8358 unlink_stmt_vdef (next
);
8359 gsi_remove (&next_si
, true);
8360 release_defs (next
);
8361 free_stmt_vec_info (next
);
8367 /* Function new_stmt_vec_info.
8369 Create and initialize a new stmt_vec_info struct for STMT. */
8372 new_stmt_vec_info (gimple
*stmt
, vec_info
*vinfo
)
8375 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
8377 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
8378 STMT_VINFO_STMT (res
) = stmt
;
8380 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
8381 STMT_VINFO_LIVE_P (res
) = false;
8382 STMT_VINFO_VECTYPE (res
) = NULL
;
8383 STMT_VINFO_VEC_STMT (res
) = NULL
;
8384 STMT_VINFO_VECTORIZABLE (res
) = true;
8385 STMT_VINFO_IN_PATTERN_P (res
) = false;
8386 STMT_VINFO_RELATED_STMT (res
) = NULL
;
8387 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
8388 STMT_VINFO_DATA_REF (res
) = NULL
;
8389 STMT_VINFO_VEC_REDUCTION_TYPE (res
) = TREE_CODE_REDUCTION
;
8391 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
8392 STMT_VINFO_DR_OFFSET (res
) = NULL
;
8393 STMT_VINFO_DR_INIT (res
) = NULL
;
8394 STMT_VINFO_DR_STEP (res
) = NULL
;
8395 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
8397 if (gimple_code (stmt
) == GIMPLE_PHI
8398 && is_loop_header_bb_p (gimple_bb (stmt
)))
8399 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
8401 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
8403 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
8404 STMT_SLP_TYPE (res
) = loop_vect
;
8405 STMT_VINFO_NUM_SLP_USES (res
) = 0;
8407 GROUP_FIRST_ELEMENT (res
) = NULL
;
8408 GROUP_NEXT_ELEMENT (res
) = NULL
;
8409 GROUP_SIZE (res
) = 0;
8410 GROUP_STORE_COUNT (res
) = 0;
8411 GROUP_GAP (res
) = 0;
8412 GROUP_SAME_DR_STMT (res
) = NULL
;
8418 /* Create a hash table for stmt_vec_info. */
8421 init_stmt_vec_info_vec (void)
8423 gcc_assert (!stmt_vec_info_vec
.exists ());
8424 stmt_vec_info_vec
.create (50);
8428 /* Free hash table for stmt_vec_info. */
8431 free_stmt_vec_info_vec (void)
8435 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
8437 free_stmt_vec_info (STMT_VINFO_STMT (info
));
8438 gcc_assert (stmt_vec_info_vec
.exists ());
8439 stmt_vec_info_vec
.release ();
8443 /* Free stmt vectorization related info. */
8446 free_stmt_vec_info (gimple
*stmt
)
8448 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8453 /* Check if this statement has a related "pattern stmt"
8454 (introduced by the vectorizer during the pattern recognition
8455 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8457 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
8459 stmt_vec_info patt_info
8460 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8463 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
8464 gimple
*patt_stmt
= STMT_VINFO_STMT (patt_info
);
8465 gimple_set_bb (patt_stmt
, NULL
);
8466 tree lhs
= gimple_get_lhs (patt_stmt
);
8467 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8468 release_ssa_name (lhs
);
8471 gimple_stmt_iterator si
;
8472 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
8474 gimple
*seq_stmt
= gsi_stmt (si
);
8475 gimple_set_bb (seq_stmt
, NULL
);
8476 lhs
= gimple_get_lhs (seq_stmt
);
8477 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
8478 release_ssa_name (lhs
);
8479 free_stmt_vec_info (seq_stmt
);
8482 free_stmt_vec_info (patt_stmt
);
8486 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
8487 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
8488 set_vinfo_for_stmt (stmt
, NULL
);
8493 /* Function get_vectype_for_scalar_type_and_size.
8495 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8499 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
8501 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
8502 machine_mode simd_mode
;
8503 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
8510 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
8511 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
8514 /* For vector types of elements whose mode precision doesn't
8515 match their types precision we use a element type of mode
8516 precision. The vectorization routines will have to make sure
8517 they support the proper result truncation/extension.
8518 We also make sure to build vector types with INTEGER_TYPE
8519 component type only. */
8520 if (INTEGRAL_TYPE_P (scalar_type
)
8521 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
8522 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
8523 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
8524 TYPE_UNSIGNED (scalar_type
));
8526 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8527 When the component mode passes the above test simply use a type
8528 corresponding to that mode. The theory is that any use that
8529 would cause problems with this will disable vectorization anyway. */
8530 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
8531 && !INTEGRAL_TYPE_P (scalar_type
))
8532 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
8534 /* We can't build a vector type of elements with alignment bigger than
8536 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
8537 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
8538 TYPE_UNSIGNED (scalar_type
));
8540 /* If we felt back to using the mode fail if there was
8541 no scalar type for it. */
8542 if (scalar_type
== NULL_TREE
)
8545 /* If no size was supplied use the mode the target prefers. Otherwise
8546 lookup a vector mode of the specified size. */
8548 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
8550 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
8551 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
8555 vectype
= build_vector_type (scalar_type
, nunits
);
8557 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
8558 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
8564 unsigned int current_vector_size
;
8566 /* Function get_vectype_for_scalar_type.
8568 Returns the vector type corresponding to SCALAR_TYPE as supported
8572 get_vectype_for_scalar_type (tree scalar_type
)
8575 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
8576 current_vector_size
);
8578 && current_vector_size
== 0)
8579 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
8583 /* Function get_mask_type_for_scalar_type.
8585 Returns the mask type corresponding to a result of comparison
8586 of vectors of specified SCALAR_TYPE as supported by target. */
8589 get_mask_type_for_scalar_type (tree scalar_type
)
8591 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
8596 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
8597 current_vector_size
);
8600 /* Function get_same_sized_vectype
8602 Returns a vector type corresponding to SCALAR_TYPE of size
8603 VECTOR_TYPE if supported by the target. */
8606 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
8608 if (TREE_CODE (scalar_type
) == BOOLEAN_TYPE
)
8609 return build_same_sized_truth_vector_type (vector_type
);
8611 return get_vectype_for_scalar_type_and_size
8612 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
8615 /* Function vect_is_simple_use.
8618 VINFO - the vect info of the loop or basic block that is being vectorized.
8619 OPERAND - operand in the loop or bb.
8621 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8622 DT - the type of definition
8624 Returns whether a stmt with OPERAND can be vectorized.
8625 For loops, supportable operands are constants, loop invariants, and operands
8626 that are defined by the current iteration of the loop. Unsupportable
8627 operands are those that are defined by a previous iteration of the loop (as
8628 is the case in reduction/induction computations).
8629 For basic blocks, supportable operands are constants and bb invariants.
8630 For now, operands defined outside the basic block are not supported. */
8633 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
8634 gimple
**def_stmt
, enum vect_def_type
*dt
)
8637 *dt
= vect_unknown_def_type
;
8639 if (dump_enabled_p ())
8641 dump_printf_loc (MSG_NOTE
, vect_location
,
8642 "vect_is_simple_use: operand ");
8643 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
8644 dump_printf (MSG_NOTE
, "\n");
8647 if (CONSTANT_CLASS_P (operand
))
8649 *dt
= vect_constant_def
;
8653 if (is_gimple_min_invariant (operand
))
8655 *dt
= vect_external_def
;
8659 if (TREE_CODE (operand
) != SSA_NAME
)
8661 if (dump_enabled_p ())
8662 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8667 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
8669 *dt
= vect_external_def
;
8673 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
8674 if (dump_enabled_p ())
8676 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
8677 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
8680 if (! vect_stmt_in_region_p (vinfo
, *def_stmt
))
8681 *dt
= vect_external_def
;
8684 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
8685 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
8688 if (dump_enabled_p ())
8690 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
8693 case vect_uninitialized_def
:
8694 dump_printf (MSG_NOTE
, "uninitialized\n");
8696 case vect_constant_def
:
8697 dump_printf (MSG_NOTE
, "constant\n");
8699 case vect_external_def
:
8700 dump_printf (MSG_NOTE
, "external\n");
8702 case vect_internal_def
:
8703 dump_printf (MSG_NOTE
, "internal\n");
8705 case vect_induction_def
:
8706 dump_printf (MSG_NOTE
, "induction\n");
8708 case vect_reduction_def
:
8709 dump_printf (MSG_NOTE
, "reduction\n");
8711 case vect_double_reduction_def
:
8712 dump_printf (MSG_NOTE
, "double reduction\n");
8714 case vect_nested_cycle
:
8715 dump_printf (MSG_NOTE
, "nested cycle\n");
8717 case vect_unknown_def_type
:
8718 dump_printf (MSG_NOTE
, "unknown\n");
8723 if (*dt
== vect_unknown_def_type
)
8725 if (dump_enabled_p ())
8726 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8727 "Unsupported pattern.\n");
8731 switch (gimple_code (*def_stmt
))
8738 if (dump_enabled_p ())
8739 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8740 "unsupported defining stmt:\n");
8747 /* Function vect_is_simple_use.
8749 Same as vect_is_simple_use but also determines the vector operand
8750 type of OPERAND and stores it to *VECTYPE. If the definition of
8751 OPERAND is vect_uninitialized_def, vect_constant_def or
8752 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8753 is responsible to compute the best suited vector type for the
8757 vect_is_simple_use (tree operand
, vec_info
*vinfo
,
8758 gimple
**def_stmt
, enum vect_def_type
*dt
, tree
*vectype
)
8760 if (!vect_is_simple_use (operand
, vinfo
, def_stmt
, dt
))
8763 /* Now get a vector type if the def is internal, otherwise supply
8764 NULL_TREE and leave it up to the caller to figure out a proper
8765 type for the use stmt. */
8766 if (*dt
== vect_internal_def
8767 || *dt
== vect_induction_def
8768 || *dt
== vect_reduction_def
8769 || *dt
== vect_double_reduction_def
8770 || *dt
== vect_nested_cycle
)
8772 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
8774 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8775 && !STMT_VINFO_RELEVANT (stmt_info
)
8776 && !STMT_VINFO_LIVE_P (stmt_info
))
8777 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8779 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8780 gcc_assert (*vectype
!= NULL_TREE
);
8782 else if (*dt
== vect_uninitialized_def
8783 || *dt
== vect_constant_def
8784 || *dt
== vect_external_def
)
8785 *vectype
= NULL_TREE
;
8793 /* Function supportable_widening_operation
8795 Check whether an operation represented by the code CODE is a
8796 widening operation that is supported by the target platform in
8797 vector form (i.e., when operating on arguments of type VECTYPE_IN
8798 producing a result of type VECTYPE_OUT).
8800 Widening operations we currently support are NOP (CONVERT), FLOAT
8801 and WIDEN_MULT. This function checks if these operations are supported
8802 by the target platform either directly (via vector tree-codes), or via
8806 - CODE1 and CODE2 are codes of vector operations to be used when
8807 vectorizing the operation, if available.
8808 - MULTI_STEP_CVT determines the number of required intermediate steps in
8809 case of multi-step conversion (like char->short->int - in that case
8810 MULTI_STEP_CVT will be 1).
8811 - INTERM_TYPES contains the intermediate type required to perform the
8812 widening operation (short in the above example). */
8815 supportable_widening_operation (enum tree_code code
, gimple
*stmt
,
8816 tree vectype_out
, tree vectype_in
,
8817 enum tree_code
*code1
, enum tree_code
*code2
,
8818 int *multi_step_cvt
,
8819 vec
<tree
> *interm_types
)
8821 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8822 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8823 struct loop
*vect_loop
= NULL
;
8824 machine_mode vec_mode
;
8825 enum insn_code icode1
, icode2
;
8826 optab optab1
, optab2
;
8827 tree vectype
= vectype_in
;
8828 tree wide_vectype
= vectype_out
;
8829 enum tree_code c1
, c2
;
8831 tree prev_type
, intermediate_type
;
8832 machine_mode intermediate_mode
, prev_mode
;
8833 optab optab3
, optab4
;
8835 *multi_step_cvt
= 0;
8837 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
8841 case WIDEN_MULT_EXPR
:
8842 /* The result of a vectorized widening operation usually requires
8843 two vectors (because the widened results do not fit into one vector).
8844 The generated vector results would normally be expected to be
8845 generated in the same order as in the original scalar computation,
8846 i.e. if 8 results are generated in each vector iteration, they are
8847 to be organized as follows:
8848 vect1: [res1,res2,res3,res4],
8849 vect2: [res5,res6,res7,res8].
8851 However, in the special case that the result of the widening
8852 operation is used in a reduction computation only, the order doesn't
8853 matter (because when vectorizing a reduction we change the order of
8854 the computation). Some targets can take advantage of this and
8855 generate more efficient code. For example, targets like Altivec,
8856 that support widen_mult using a sequence of {mult_even,mult_odd}
8857 generate the following vectors:
8858 vect1: [res1,res3,res5,res7],
8859 vect2: [res2,res4,res6,res8].
8861 When vectorizing outer-loops, we execute the inner-loop sequentially
8862 (each vectorized inner-loop iteration contributes to VF outer-loop
8863 iterations in parallel). We therefore don't allow to change the
8864 order of the computation in the inner-loop during outer-loop
8866 /* TODO: Another case in which order doesn't *really* matter is when we
8867 widen and then contract again, e.g. (short)((int)x * y >> 8).
8868 Normally, pack_trunc performs an even/odd permute, whereas the
8869 repack from an even/odd expansion would be an interleave, which
8870 would be significantly simpler for e.g. AVX2. */
8871 /* In any case, in order to avoid duplicating the code below, recurse
8872 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8873 are properly set up for the caller. If we fail, we'll continue with
8874 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8876 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
8877 && !nested_in_vect_loop_p (vect_loop
, stmt
)
8878 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
8879 stmt
, vectype_out
, vectype_in
,
8880 code1
, code2
, multi_step_cvt
,
8883 /* Elements in a vector with vect_used_by_reduction property cannot
8884 be reordered if the use chain with this property does not have the
8885 same operation. One such an example is s += a * b, where elements
8886 in a and b cannot be reordered. Here we check if the vector defined
8887 by STMT is only directly used in the reduction statement. */
8888 tree lhs
= gimple_assign_lhs (stmt
);
8889 use_operand_p dummy
;
8891 stmt_vec_info use_stmt_info
= NULL
;
8892 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
8893 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
8894 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
8897 c1
= VEC_WIDEN_MULT_LO_EXPR
;
8898 c2
= VEC_WIDEN_MULT_HI_EXPR
;
8911 case VEC_WIDEN_MULT_EVEN_EXPR
:
8912 /* Support the recursion induced just above. */
8913 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
8914 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
8917 case WIDEN_LSHIFT_EXPR
:
8918 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
8919 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
8923 c1
= VEC_UNPACK_LO_EXPR
;
8924 c2
= VEC_UNPACK_HI_EXPR
;
8928 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
8929 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
8932 case FIX_TRUNC_EXPR
:
8933 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8934 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8935 computing the operation. */
8942 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
8945 if (code
== FIX_TRUNC_EXPR
)
8947 /* The signedness is determined from output operand. */
8948 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8949 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
8953 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8954 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
8957 if (!optab1
|| !optab2
)
8960 vec_mode
= TYPE_MODE (vectype
);
8961 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
8962 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
8968 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8969 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8972 /* Check if it's a multi-step conversion that can be done using intermediate
8975 prev_type
= vectype
;
8976 prev_mode
= vec_mode
;
8978 if (!CONVERT_EXPR_CODE_P (code
))
8981 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8982 intermediate steps in promotion sequence. We try
8983 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8985 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8986 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8988 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8990 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
8991 TYPE_UNSIGNED (prev_type
));
8992 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8993 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
8995 if (!optab3
|| !optab4
8996 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
8997 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8998 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
8999 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
9000 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
9001 == CODE_FOR_nothing
)
9002 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
9003 == CODE_FOR_nothing
))
9006 interm_types
->quick_push (intermediate_type
);
9007 (*multi_step_cvt
)++;
9009 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
9010 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
9013 prev_type
= intermediate_type
;
9014 prev_mode
= intermediate_mode
;
9017 interm_types
->release ();
9022 /* Function supportable_narrowing_operation
9024 Check whether an operation represented by the code CODE is a
9025 narrowing operation that is supported by the target platform in
9026 vector form (i.e., when operating on arguments of type VECTYPE_IN
9027 and producing a result of type VECTYPE_OUT).
9029 Narrowing operations we currently support are NOP (CONVERT) and
9030 FIX_TRUNC. This function checks if these operations are supported by
9031 the target platform directly via vector tree-codes.
9034 - CODE1 is the code of a vector operation to be used when
9035 vectorizing the operation, if available.
9036 - MULTI_STEP_CVT determines the number of required intermediate steps in
9037 case of multi-step conversion (like int->short->char - in that case
9038 MULTI_STEP_CVT will be 1).
9039 - INTERM_TYPES contains the intermediate type required to perform the
9040 narrowing operation (short in the above example). */
9043 supportable_narrowing_operation (enum tree_code code
,
9044 tree vectype_out
, tree vectype_in
,
9045 enum tree_code
*code1
, int *multi_step_cvt
,
9046 vec
<tree
> *interm_types
)
9048 machine_mode vec_mode
;
9049 enum insn_code icode1
;
9050 optab optab1
, interm_optab
;
9051 tree vectype
= vectype_in
;
9052 tree narrow_vectype
= vectype_out
;
9054 tree intermediate_type
;
9055 machine_mode intermediate_mode
, prev_mode
;
9059 *multi_step_cvt
= 0;
9063 c1
= VEC_PACK_TRUNC_EXPR
;
9066 case FIX_TRUNC_EXPR
:
9067 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
9071 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9072 tree code and optabs used for computing the operation. */
9079 if (code
== FIX_TRUNC_EXPR
)
9080 /* The signedness is determined from output operand. */
9081 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
9083 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
9088 vec_mode
= TYPE_MODE (vectype
);
9089 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
9094 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9097 /* Check if it's a multi-step conversion that can be done using intermediate
9099 prev_mode
= vec_mode
;
9100 if (code
== FIX_TRUNC_EXPR
)
9101 uns
= TYPE_UNSIGNED (vectype_out
);
9103 uns
= TYPE_UNSIGNED (vectype
);
9105 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9106 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9107 costly than signed. */
9108 if (code
== FIX_TRUNC_EXPR
&& uns
)
9110 enum insn_code icode2
;
9113 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
9115 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
9116 if (interm_optab
!= unknown_optab
9117 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
9118 && insn_data
[icode1
].operand
[0].mode
9119 == insn_data
[icode2
].operand
[0].mode
)
9122 optab1
= interm_optab
;
9127 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9128 intermediate steps in promotion sequence. We try
9129 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9130 interm_types
->create (MAX_INTERM_CVT_STEPS
);
9131 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
9133 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
9135 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
9137 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
9140 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
9141 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
9142 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
9143 == CODE_FOR_nothing
))
9146 interm_types
->quick_push (intermediate_type
);
9147 (*multi_step_cvt
)++;
9149 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
9152 prev_mode
= intermediate_mode
;
9153 optab1
= interm_optab
;
9156 interm_types
->release ();