1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
34 #include "hard-reg-set.h"
36 #include "dominance.h"
38 #include "basic-block.h"
39 #include "gimple-pretty-print.h"
40 #include "tree-ssa-alias.h"
41 #include "internal-fn.h"
43 #include "gimple-expr.h"
46 #include "gimple-iterator.h"
47 #include "gimplify-me.h"
48 #include "gimple-ssa.h"
50 #include "tree-phinodes.h"
51 #include "ssa-iterators.h"
52 #include "stringpool.h"
53 #include "tree-ssanames.h"
54 #include "tree-ssa-loop-manip.h"
56 #include "tree-ssa-loop.h"
57 #include "tree-scalar-evolution.h"
60 #include "insn-config.h"
69 #include "recog.h" /* FIXME: for insn_data */
70 #include "insn-codes.h"
72 #include "diagnostic-core.h"
73 #include "tree-vectorizer.h"
77 /* For lang_hooks.types.type_for_mode. */
78 #include "langhooks.h"
80 /* Return the vectorized type for the given statement. */
83 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
85 return STMT_VINFO_VECTYPE (stmt_info
);
88 /* Return TRUE iff the given statement is in an inner loop relative to
89 the loop being vectorized. */
91 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
93 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
94 basic_block bb
= gimple_bb (stmt
);
95 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
101 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
103 return (bb
->loop_father
== loop
->inner
);
106 /* Record the cost of a statement, either by directly informing the
107 target model or by saving it in a vector for later processing.
108 Return a preliminary estimate of the statement's cost. */
111 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
112 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
113 int misalign
, enum vect_cost_model_location where
)
117 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
118 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
119 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
122 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
127 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
128 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
129 void *target_cost_data
;
132 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
134 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
136 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
141 /* Return a variable of type ELEM_TYPE[NELEMS]. */
144 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
146 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
150 /* ARRAY is an array of vectors created by create_vector_array.
151 Return an SSA_NAME for the vector in index N. The reference
152 is part of the vectorization of STMT and the vector is associated
153 with scalar destination SCALAR_DEST. */
156 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
157 tree array
, unsigned HOST_WIDE_INT n
)
159 tree vect_type
, vect
, vect_name
, array_ref
;
162 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
163 vect_type
= TREE_TYPE (TREE_TYPE (array
));
164 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
165 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
166 build_int_cst (size_type_node
, n
),
167 NULL_TREE
, NULL_TREE
);
169 new_stmt
= gimple_build_assign (vect
, array_ref
);
170 vect_name
= make_ssa_name (vect
, new_stmt
);
171 gimple_assign_set_lhs (new_stmt
, vect_name
);
172 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
177 /* ARRAY is an array of vectors created by create_vector_array.
178 Emit code to store SSA_NAME VECT in index N of the array.
179 The store is part of the vectorization of STMT. */
182 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
183 tree array
, unsigned HOST_WIDE_INT n
)
188 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
189 build_int_cst (size_type_node
, n
),
190 NULL_TREE
, NULL_TREE
);
192 new_stmt
= gimple_build_assign (array_ref
, vect
);
193 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
196 /* PTR is a pointer to an array of type TYPE. Return a representation
197 of *PTR. The memory reference replaces those in FIRST_DR
201 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
203 tree mem_ref
, alias_ptr_type
;
205 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
206 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
207 /* Arrays have the same alignment as their type. */
208 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
212 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
214 /* Function vect_mark_relevant.
216 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
219 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
220 enum vect_relevant relevant
, bool live_p
,
221 bool used_in_pattern
)
223 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
224 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
225 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
228 if (dump_enabled_p ())
229 dump_printf_loc (MSG_NOTE
, vect_location
,
230 "mark relevant %d, live %d.\n", relevant
, live_p
);
232 /* If this stmt is an original stmt in a pattern, we might need to mark its
233 related pattern stmt instead of the original stmt. However, such stmts
234 may have their own uses that are not in any pattern, in such cases the
235 stmt itself should be marked. */
236 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
239 if (!used_in_pattern
)
241 imm_use_iterator imm_iter
;
245 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
246 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
248 if (is_gimple_assign (stmt
))
249 lhs
= gimple_assign_lhs (stmt
);
251 lhs
= gimple_call_lhs (stmt
);
253 /* This use is out of pattern use, if LHS has other uses that are
254 pattern uses, we should mark the stmt itself, and not the pattern
256 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
257 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
259 if (is_gimple_debug (USE_STMT (use_p
)))
261 use_stmt
= USE_STMT (use_p
);
263 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
266 if (vinfo_for_stmt (use_stmt
)
267 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
277 /* This is the last stmt in a sequence that was detected as a
278 pattern that can potentially be vectorized. Don't mark the stmt
279 as relevant/live because it's not going to be vectorized.
280 Instead mark the pattern-stmt that replaces it. */
282 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
284 if (dump_enabled_p ())
285 dump_printf_loc (MSG_NOTE
, vect_location
,
286 "last stmt in pattern. don't mark"
287 " relevant/live.\n");
288 stmt_info
= vinfo_for_stmt (pattern_stmt
);
289 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
290 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
291 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
296 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
297 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
298 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
300 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
301 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
303 if (dump_enabled_p ())
304 dump_printf_loc (MSG_NOTE
, vect_location
,
305 "already marked relevant/live.\n");
309 worklist
->safe_push (stmt
);
313 /* Function vect_stmt_relevant_p.
315 Return true if STMT in loop that is represented by LOOP_VINFO is
316 "relevant for vectorization".
318 A stmt is considered "relevant for vectorization" if:
319 - it has uses outside the loop.
320 - it has vdefs (it alters memory).
321 - control stmts in the loop (except for the exit condition).
323 CHECKME: what other side effects would the vectorizer allow? */
326 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
327 enum vect_relevant
*relevant
, bool *live_p
)
329 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
331 imm_use_iterator imm_iter
;
335 *relevant
= vect_unused_in_scope
;
338 /* cond stmt other than loop exit cond. */
339 if (is_ctrl_stmt (stmt
)
340 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
341 != loop_exit_ctrl_vec_info_type
)
342 *relevant
= vect_used_in_scope
;
344 /* changing memory. */
345 if (gimple_code (stmt
) != GIMPLE_PHI
)
346 if (gimple_vdef (stmt
)
347 && !gimple_clobber_p (stmt
))
349 if (dump_enabled_p ())
350 dump_printf_loc (MSG_NOTE
, vect_location
,
351 "vec_stmt_relevant_p: stmt has vdefs.\n");
352 *relevant
= vect_used_in_scope
;
355 /* uses outside the loop. */
356 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
358 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
360 basic_block bb
= gimple_bb (USE_STMT (use_p
));
361 if (!flow_bb_inside_loop_p (loop
, bb
))
363 if (dump_enabled_p ())
364 dump_printf_loc (MSG_NOTE
, vect_location
,
365 "vec_stmt_relevant_p: used out of loop.\n");
367 if (is_gimple_debug (USE_STMT (use_p
)))
370 /* We expect all such uses to be in the loop exit phis
371 (because of loop closed form) */
372 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
373 gcc_assert (bb
== single_exit (loop
)->dest
);
380 return (*live_p
|| *relevant
);
384 /* Function exist_non_indexing_operands_for_use_p
386 USE is one of the uses attached to STMT. Check if USE is
387 used in STMT for anything other than indexing an array. */
390 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
393 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
395 /* USE corresponds to some operand in STMT. If there is no data
396 reference in STMT, then any operand that corresponds to USE
397 is not indexing an array. */
398 if (!STMT_VINFO_DATA_REF (stmt_info
))
401 /* STMT has a data_ref. FORNOW this means that its of one of
405 (This should have been verified in analyze_data_refs).
407 'var' in the second case corresponds to a def, not a use,
408 so USE cannot correspond to any operands that are not used
411 Therefore, all we need to check is if STMT falls into the
412 first case, and whether var corresponds to USE. */
414 if (!gimple_assign_copy_p (stmt
))
416 if (is_gimple_call (stmt
)
417 && gimple_call_internal_p (stmt
))
418 switch (gimple_call_internal_fn (stmt
))
421 operand
= gimple_call_arg (stmt
, 3);
426 operand
= gimple_call_arg (stmt
, 2);
436 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
438 operand
= gimple_assign_rhs1 (stmt
);
439 if (TREE_CODE (operand
) != SSA_NAME
)
450 Function process_use.
453 - a USE in STMT in a loop represented by LOOP_VINFO
454 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
455 that defined USE. This is done by calling mark_relevant and passing it
456 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
457 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
461 Generally, LIVE_P and RELEVANT are used to define the liveness and
462 relevance info of the DEF_STMT of this USE:
463 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
464 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
466 - case 1: If USE is used only for address computations (e.g. array indexing),
467 which does not need to be directly vectorized, then the liveness/relevance
468 of the respective DEF_STMT is left unchanged.
469 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
470 skip DEF_STMT cause it had already been processed.
471 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
472 be modified accordingly.
474 Return true if everything is as expected. Return false otherwise. */
477 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
478 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
481 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
482 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
483 stmt_vec_info dstmt_vinfo
;
484 basic_block bb
, def_bb
;
487 enum vect_def_type dt
;
489 /* case 1: we are only interested in uses that need to be vectorized. Uses
490 that are used for address computation are not considered relevant. */
491 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
494 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
496 if (dump_enabled_p ())
497 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
498 "not vectorized: unsupported use in stmt.\n");
502 if (!def_stmt
|| gimple_nop_p (def_stmt
))
505 def_bb
= gimple_bb (def_stmt
);
506 if (!flow_bb_inside_loop_p (loop
, def_bb
))
508 if (dump_enabled_p ())
509 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
513 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
514 DEF_STMT must have already been processed, because this should be the
515 only way that STMT, which is a reduction-phi, was put in the worklist,
516 as there should be no other uses for DEF_STMT in the loop. So we just
517 check that everything is as expected, and we are done. */
518 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
519 bb
= gimple_bb (stmt
);
520 if (gimple_code (stmt
) == GIMPLE_PHI
521 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
522 && gimple_code (def_stmt
) != GIMPLE_PHI
523 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
524 && bb
->loop_father
== def_bb
->loop_father
)
526 if (dump_enabled_p ())
527 dump_printf_loc (MSG_NOTE
, vect_location
,
528 "reduc-stmt defining reduc-phi in the same nest.\n");
529 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
530 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
531 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
532 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
533 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
537 /* case 3a: outer-loop stmt defining an inner-loop stmt:
538 outer-loop-header-bb:
544 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
546 if (dump_enabled_p ())
547 dump_printf_loc (MSG_NOTE
, vect_location
,
548 "outer-loop def-stmt defining inner-loop stmt.\n");
552 case vect_unused_in_scope
:
553 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
554 vect_used_in_scope
: vect_unused_in_scope
;
557 case vect_used_in_outer_by_reduction
:
558 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
559 relevant
= vect_used_by_reduction
;
562 case vect_used_in_outer
:
563 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
564 relevant
= vect_used_in_scope
;
567 case vect_used_in_scope
:
575 /* case 3b: inner-loop stmt defining an outer-loop stmt:
576 outer-loop-header-bb:
580 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
582 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
584 if (dump_enabled_p ())
585 dump_printf_loc (MSG_NOTE
, vect_location
,
586 "inner-loop def-stmt defining outer-loop stmt.\n");
590 case vect_unused_in_scope
:
591 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
592 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
593 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
596 case vect_used_by_reduction
:
597 relevant
= vect_used_in_outer_by_reduction
;
600 case vect_used_in_scope
:
601 relevant
= vect_used_in_outer
;
609 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
610 is_pattern_stmt_p (stmt_vinfo
));
615 /* Function vect_mark_stmts_to_be_vectorized.
617 Not all stmts in the loop need to be vectorized. For example:
626 Stmt 1 and 3 do not need to be vectorized, because loop control and
627 addressing of vectorized data-refs are handled differently.
629 This pass detects such stmts. */
632 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
634 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
635 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
636 unsigned int nbbs
= loop
->num_nodes
;
637 gimple_stmt_iterator si
;
640 stmt_vec_info stmt_vinfo
;
644 enum vect_relevant relevant
, tmp_relevant
;
645 enum vect_def_type def_type
;
647 if (dump_enabled_p ())
648 dump_printf_loc (MSG_NOTE
, vect_location
,
649 "=== vect_mark_stmts_to_be_vectorized ===\n");
651 auto_vec
<gimple
, 64> worklist
;
653 /* 1. Init worklist. */
654 for (i
= 0; i
< nbbs
; i
++)
657 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
660 if (dump_enabled_p ())
662 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
663 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
666 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
667 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
669 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
671 stmt
= gsi_stmt (si
);
672 if (dump_enabled_p ())
674 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
675 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
678 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
679 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
683 /* 2. Process_worklist */
684 while (worklist
.length () > 0)
689 stmt
= worklist
.pop ();
690 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
693 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
696 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
697 (DEF_STMT) as relevant/irrelevant and live/dead according to the
698 liveness and relevance properties of STMT. */
699 stmt_vinfo
= vinfo_for_stmt (stmt
);
700 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
701 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
703 /* Generally, the liveness and relevance properties of STMT are
704 propagated as is to the DEF_STMTs of its USEs:
705 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
706 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
708 One exception is when STMT has been identified as defining a reduction
709 variable; in this case we set the liveness/relevance as follows:
711 relevant = vect_used_by_reduction
712 This is because we distinguish between two kinds of relevant stmts -
713 those that are used by a reduction computation, and those that are
714 (also) used by a regular computation. This allows us later on to
715 identify stmts that are used solely by a reduction, and therefore the
716 order of the results that they produce does not have to be kept. */
718 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
719 tmp_relevant
= relevant
;
722 case vect_reduction_def
:
723 switch (tmp_relevant
)
725 case vect_unused_in_scope
:
726 relevant
= vect_used_by_reduction
;
729 case vect_used_by_reduction
:
730 if (gimple_code (stmt
) == GIMPLE_PHI
)
735 if (dump_enabled_p ())
736 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
737 "unsupported use of reduction.\n");
744 case vect_nested_cycle
:
745 if (tmp_relevant
!= vect_unused_in_scope
746 && tmp_relevant
!= vect_used_in_outer_by_reduction
747 && tmp_relevant
!= vect_used_in_outer
)
749 if (dump_enabled_p ())
750 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
751 "unsupported use of nested cycle.\n");
759 case vect_double_reduction_def
:
760 if (tmp_relevant
!= vect_unused_in_scope
761 && tmp_relevant
!= vect_used_by_reduction
)
763 if (dump_enabled_p ())
764 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
765 "unsupported use of double reduction.\n");
777 if (is_pattern_stmt_p (stmt_vinfo
))
779 /* Pattern statements are not inserted into the code, so
780 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
781 have to scan the RHS or function arguments instead. */
782 if (is_gimple_assign (stmt
))
784 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
785 tree op
= gimple_assign_rhs1 (stmt
);
788 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
790 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
791 live_p
, relevant
, &worklist
, false)
792 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
793 live_p
, relevant
, &worklist
, false))
797 for (; i
< gimple_num_ops (stmt
); i
++)
799 op
= gimple_op (stmt
, i
);
800 if (TREE_CODE (op
) == SSA_NAME
801 && !process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
806 else if (is_gimple_call (stmt
))
808 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
810 tree arg
= gimple_call_arg (stmt
, i
);
811 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
818 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
820 tree op
= USE_FROM_PTR (use_p
);
821 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
826 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
829 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
831 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
835 } /* while worklist */
841 /* Function vect_model_simple_cost.
843 Models cost for simple operations, i.e. those that only emit ncopies of a
844 single op. Right now, this does not account for multiple insns that could
845 be generated for the single vector op. We will handle that shortly. */
848 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
849 enum vect_def_type
*dt
,
850 stmt_vector_for_cost
*prologue_cost_vec
,
851 stmt_vector_for_cost
*body_cost_vec
)
854 int inside_cost
= 0, prologue_cost
= 0;
856 /* The SLP costs were already calculated during SLP tree build. */
857 if (PURE_SLP_STMT (stmt_info
))
860 /* FORNOW: Assuming maximum 2 args per stmts. */
861 for (i
= 0; i
< 2; i
++)
862 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
863 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
864 stmt_info
, 0, vect_prologue
);
866 /* Pass the inside-of-loop statements to the target-specific cost model. */
867 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
868 stmt_info
, 0, vect_body
);
870 if (dump_enabled_p ())
871 dump_printf_loc (MSG_NOTE
, vect_location
,
872 "vect_model_simple_cost: inside_cost = %d, "
873 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
877 /* Model cost for type demotion and promotion operations. PWR is normally
878 zero for single-step promotions and demotions. It will be one if
879 two-step promotion/demotion is required, and so on. Each additional
880 step doubles the number of instructions required. */
883 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
884 enum vect_def_type
*dt
, int pwr
)
887 int inside_cost
= 0, prologue_cost
= 0;
888 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
889 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
890 void *target_cost_data
;
892 /* The SLP costs were already calculated during SLP tree build. */
893 if (PURE_SLP_STMT (stmt_info
))
897 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
899 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
901 for (i
= 0; i
< pwr
+ 1; i
++)
903 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
905 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
906 vec_promote_demote
, stmt_info
, 0,
910 /* FORNOW: Assuming maximum 2 args per stmts. */
911 for (i
= 0; i
< 2; i
++)
912 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
913 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
914 stmt_info
, 0, vect_prologue
);
916 if (dump_enabled_p ())
917 dump_printf_loc (MSG_NOTE
, vect_location
,
918 "vect_model_promotion_demotion_cost: inside_cost = %d, "
919 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
922 /* Function vect_cost_group_size
924 For grouped load or store, return the group_size only if it is the first
925 load or store of a group, else return 1. This ensures that group size is
926 only returned once per group. */
929 vect_cost_group_size (stmt_vec_info stmt_info
)
931 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
933 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
934 return GROUP_SIZE (stmt_info
);
940 /* Function vect_model_store_cost
942 Models cost for stores. In the case of grouped accesses, one access
943 has the overhead of the grouped access attributed to it. */
946 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
947 bool store_lanes_p
, enum vect_def_type dt
,
949 stmt_vector_for_cost
*prologue_cost_vec
,
950 stmt_vector_for_cost
*body_cost_vec
)
953 unsigned int inside_cost
= 0, prologue_cost
= 0;
954 struct data_reference
*first_dr
;
957 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
958 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
959 stmt_info
, 0, vect_prologue
);
961 /* Grouped access? */
962 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
966 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
971 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
972 group_size
= vect_cost_group_size (stmt_info
);
975 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
977 /* Not a grouped access. */
981 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
984 /* We assume that the cost of a single store-lanes instruction is
985 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
986 access is instead being provided by a permute-and-store operation,
987 include the cost of the permutes. */
988 if (!store_lanes_p
&& group_size
> 1
989 && !STMT_VINFO_STRIDED_P (stmt_info
))
991 /* Uses a high and low interleave or shuffle operations for each
993 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
994 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
995 stmt_info
, 0, vect_body
);
997 if (dump_enabled_p ())
998 dump_printf_loc (MSG_NOTE
, vect_location
,
999 "vect_model_store_cost: strided group_size = %d .\n",
1003 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1004 /* Costs of the stores. */
1005 if (STMT_VINFO_STRIDED_P (stmt_info
)
1006 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1008 /* N scalar stores plus extracting the elements. */
1009 inside_cost
+= record_stmt_cost (body_cost_vec
,
1010 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1011 scalar_store
, stmt_info
, 0, vect_body
);
1014 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
1016 if (STMT_VINFO_STRIDED_P (stmt_info
))
1017 inside_cost
+= record_stmt_cost (body_cost_vec
,
1018 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1019 vec_to_scalar
, stmt_info
, 0, vect_body
);
1021 if (dump_enabled_p ())
1022 dump_printf_loc (MSG_NOTE
, vect_location
,
1023 "vect_model_store_cost: inside_cost = %d, "
1024 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1028 /* Calculate cost of DR's memory access. */
1030 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
1031 unsigned int *inside_cost
,
1032 stmt_vector_for_cost
*body_cost_vec
)
1034 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1035 gimple stmt
= DR_STMT (dr
);
1036 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1038 switch (alignment_support_scheme
)
1042 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1043 vector_store
, stmt_info
, 0,
1046 if (dump_enabled_p ())
1047 dump_printf_loc (MSG_NOTE
, vect_location
,
1048 "vect_model_store_cost: aligned.\n");
1052 case dr_unaligned_supported
:
1054 /* Here, we assign an additional cost for the unaligned store. */
1055 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1056 unaligned_store
, stmt_info
,
1057 DR_MISALIGNMENT (dr
), vect_body
);
1058 if (dump_enabled_p ())
1059 dump_printf_loc (MSG_NOTE
, vect_location
,
1060 "vect_model_store_cost: unaligned supported by "
1065 case dr_unaligned_unsupported
:
1067 *inside_cost
= VECT_MAX_COST
;
1069 if (dump_enabled_p ())
1070 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1071 "vect_model_store_cost: unsupported access.\n");
1081 /* Function vect_model_load_cost
1083 Models cost for loads. In the case of grouped accesses, the last access
1084 has the overhead of the grouped access attributed to it. Since unaligned
1085 accesses are supported for loads, we also account for the costs of the
1086 access scheme chosen. */
1089 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1090 bool load_lanes_p
, slp_tree slp_node
,
1091 stmt_vector_for_cost
*prologue_cost_vec
,
1092 stmt_vector_for_cost
*body_cost_vec
)
1096 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1097 unsigned int inside_cost
= 0, prologue_cost
= 0;
1099 /* Grouped accesses? */
1100 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1101 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1103 group_size
= vect_cost_group_size (stmt_info
);
1104 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1106 /* Not a grouped access. */
1113 /* We assume that the cost of a single load-lanes instruction is
1114 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1115 access is instead being provided by a load-and-permute operation,
1116 include the cost of the permutes. */
1117 if (!load_lanes_p
&& group_size
> 1
1118 && !STMT_VINFO_STRIDED_P (stmt_info
))
1120 /* Uses an even and odd extract operations or shuffle operations
1121 for each needed permute. */
1122 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1123 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1124 stmt_info
, 0, vect_body
);
1126 if (dump_enabled_p ())
1127 dump_printf_loc (MSG_NOTE
, vect_location
,
1128 "vect_model_load_cost: strided group_size = %d .\n",
1132 /* The loads themselves. */
1133 if (STMT_VINFO_STRIDED_P (stmt_info
)
1134 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1136 /* N scalar loads plus gathering them into a vector. */
1137 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1138 inside_cost
+= record_stmt_cost (body_cost_vec
,
1139 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1140 scalar_load
, stmt_info
, 0, vect_body
);
1143 vect_get_load_cost (first_dr
, ncopies
,
1144 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1145 || group_size
> 1 || slp_node
),
1146 &inside_cost
, &prologue_cost
,
1147 prologue_cost_vec
, body_cost_vec
, true);
1148 if (STMT_VINFO_STRIDED_P (stmt_info
))
1149 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1150 stmt_info
, 0, vect_body
);
1152 if (dump_enabled_p ())
1153 dump_printf_loc (MSG_NOTE
, vect_location
,
1154 "vect_model_load_cost: inside_cost = %d, "
1155 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1159 /* Calculate cost of DR's memory access. */
1161 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1162 bool add_realign_cost
, unsigned int *inside_cost
,
1163 unsigned int *prologue_cost
,
1164 stmt_vector_for_cost
*prologue_cost_vec
,
1165 stmt_vector_for_cost
*body_cost_vec
,
1166 bool record_prologue_costs
)
1168 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1169 gimple stmt
= DR_STMT (dr
);
1170 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1172 switch (alignment_support_scheme
)
1176 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1177 stmt_info
, 0, vect_body
);
1179 if (dump_enabled_p ())
1180 dump_printf_loc (MSG_NOTE
, vect_location
,
1181 "vect_model_load_cost: aligned.\n");
1185 case dr_unaligned_supported
:
1187 /* Here, we assign an additional cost for the unaligned load. */
1188 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1189 unaligned_load
, stmt_info
,
1190 DR_MISALIGNMENT (dr
), vect_body
);
1192 if (dump_enabled_p ())
1193 dump_printf_loc (MSG_NOTE
, vect_location
,
1194 "vect_model_load_cost: unaligned supported by "
1199 case dr_explicit_realign
:
1201 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1202 vector_load
, stmt_info
, 0, vect_body
);
1203 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1204 vec_perm
, stmt_info
, 0, vect_body
);
1206 /* FIXME: If the misalignment remains fixed across the iterations of
1207 the containing loop, the following cost should be added to the
1209 if (targetm
.vectorize
.builtin_mask_for_load
)
1210 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1211 stmt_info
, 0, vect_body
);
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_NOTE
, vect_location
,
1215 "vect_model_load_cost: explicit realign\n");
1219 case dr_explicit_realign_optimized
:
1221 if (dump_enabled_p ())
1222 dump_printf_loc (MSG_NOTE
, vect_location
,
1223 "vect_model_load_cost: unaligned software "
1226 /* Unaligned software pipeline has a load of an address, an initial
1227 load, and possibly a mask operation to "prime" the loop. However,
1228 if this is an access in a group of loads, which provide grouped
1229 access, then the above cost should only be considered for one
1230 access in the group. Inside the loop, there is a load op
1231 and a realignment op. */
1233 if (add_realign_cost
&& record_prologue_costs
)
1235 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1236 vector_stmt
, stmt_info
,
1238 if (targetm
.vectorize
.builtin_mask_for_load
)
1239 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1240 vector_stmt
, stmt_info
,
1244 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1245 stmt_info
, 0, vect_body
);
1246 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1247 stmt_info
, 0, vect_body
);
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE
, vect_location
,
1251 "vect_model_load_cost: explicit realign optimized"
1257 case dr_unaligned_unsupported
:
1259 *inside_cost
= VECT_MAX_COST
;
1261 if (dump_enabled_p ())
1262 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1263 "vect_model_load_cost: unsupported access.\n");
1272 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1273 the loop preheader for the vectorized stmt STMT. */
1276 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1279 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1282 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1283 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1287 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1291 if (nested_in_vect_loop_p (loop
, stmt
))
1294 pe
= loop_preheader_edge (loop
);
1295 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1296 gcc_assert (!new_bb
);
1300 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1302 gimple_stmt_iterator gsi_bb_start
;
1304 gcc_assert (bb_vinfo
);
1305 bb
= BB_VINFO_BB (bb_vinfo
);
1306 gsi_bb_start
= gsi_after_labels (bb
);
1307 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1311 if (dump_enabled_p ())
1313 dump_printf_loc (MSG_NOTE
, vect_location
,
1314 "created new init_stmt: ");
1315 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1319 /* Function vect_init_vector.
1321 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1322 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1323 vector type a vector with all elements equal to VAL is created first.
1324 Place the initialization at BSI if it is not NULL. Otherwise, place the
1325 initialization at the loop preheader.
1326 Return the DEF of INIT_STMT.
1327 It will be used in the vectorization of STMT. */
1330 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1337 if (TREE_CODE (type
) == VECTOR_TYPE
1338 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1340 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1342 if (CONSTANT_CLASS_P (val
))
1343 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1346 new_temp
= make_ssa_name (TREE_TYPE (type
));
1347 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1348 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1352 val
= build_vector_from_val (type
, val
);
1355 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1356 init_stmt
= gimple_build_assign (new_var
, val
);
1357 new_temp
= make_ssa_name (new_var
, init_stmt
);
1358 gimple_assign_set_lhs (init_stmt
, new_temp
);
1359 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1360 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1365 /* Function vect_get_vec_def_for_operand.
1367 OP is an operand in STMT. This function returns a (vector) def that will be
1368 used in the vectorized stmt for STMT.
1370 In the case that OP is an SSA_NAME which is defined in the loop, then
1371 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1373 In case OP is an invariant or constant, a new stmt that creates a vector def
1374 needs to be introduced. */
1377 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1382 stmt_vec_info def_stmt_info
= NULL
;
1383 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1384 unsigned int nunits
;
1385 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1387 enum vect_def_type dt
;
1391 if (dump_enabled_p ())
1393 dump_printf_loc (MSG_NOTE
, vect_location
,
1394 "vect_get_vec_def_for_operand: ");
1395 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1396 dump_printf (MSG_NOTE
, "\n");
1399 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1400 &def_stmt
, &def
, &dt
);
1401 gcc_assert (is_simple_use
);
1402 if (dump_enabled_p ())
1404 int loc_printed
= 0;
1407 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1409 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1410 dump_printf (MSG_NOTE
, "\n");
1415 dump_printf (MSG_NOTE
, " def_stmt = ");
1417 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1418 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1424 /* Case 1: operand is a constant. */
1425 case vect_constant_def
:
1427 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1428 gcc_assert (vector_type
);
1429 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1434 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1435 if (dump_enabled_p ())
1436 dump_printf_loc (MSG_NOTE
, vect_location
,
1437 "Create vector_cst. nunits = %d\n", nunits
);
1439 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1442 /* Case 2: operand is defined outside the loop - loop invariant. */
1443 case vect_external_def
:
1445 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1446 gcc_assert (vector_type
);
1451 /* Create 'vec_inv = {inv,inv,..,inv}' */
1452 if (dump_enabled_p ())
1453 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1455 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1458 /* Case 3: operand is defined inside the loop. */
1459 case vect_internal_def
:
1462 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1464 /* Get the def from the vectorized stmt. */
1465 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1467 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1468 /* Get vectorized pattern statement. */
1470 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1471 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1472 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1473 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1474 gcc_assert (vec_stmt
);
1475 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1476 vec_oprnd
= PHI_RESULT (vec_stmt
);
1477 else if (is_gimple_call (vec_stmt
))
1478 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1480 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1484 /* Case 4: operand is defined by a loop header phi - reduction */
1485 case vect_reduction_def
:
1486 case vect_double_reduction_def
:
1487 case vect_nested_cycle
:
1491 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1492 loop
= (gimple_bb (def_stmt
))->loop_father
;
1494 /* Get the def before the loop */
1495 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1496 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1499 /* Case 5: operand is defined by loop-header phi - induction. */
1500 case vect_induction_def
:
1502 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1504 /* Get the def from the vectorized stmt. */
1505 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1506 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1507 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1508 vec_oprnd
= PHI_RESULT (vec_stmt
);
1510 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1520 /* Function vect_get_vec_def_for_stmt_copy
1522 Return a vector-def for an operand. This function is used when the
1523 vectorized stmt to be created (by the caller to this function) is a "copy"
1524 created in case the vectorized result cannot fit in one vector, and several
1525 copies of the vector-stmt are required. In this case the vector-def is
1526 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1527 of the stmt that defines VEC_OPRND.
1528 DT is the type of the vector def VEC_OPRND.
1531 In case the vectorization factor (VF) is bigger than the number
1532 of elements that can fit in a vectype (nunits), we have to generate
1533 more than one vector stmt to vectorize the scalar stmt. This situation
1534 arises when there are multiple data-types operated upon in the loop; the
1535 smallest data-type determines the VF, and as a result, when vectorizing
1536 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1537 vector stmt (each computing a vector of 'nunits' results, and together
1538 computing 'VF' results in each iteration). This function is called when
1539 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1540 which VF=16 and nunits=4, so the number of copies required is 4):
1542 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1544 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1545 VS1.1: vx.1 = memref1 VS1.2
1546 VS1.2: vx.2 = memref2 VS1.3
1547 VS1.3: vx.3 = memref3
1549 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1550 VSnew.1: vz1 = vx.1 + ... VSnew.2
1551 VSnew.2: vz2 = vx.2 + ... VSnew.3
1552 VSnew.3: vz3 = vx.3 + ...
1554 The vectorization of S1 is explained in vectorizable_load.
1555 The vectorization of S2:
1556 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1557 the function 'vect_get_vec_def_for_operand' is called to
1558 get the relevant vector-def for each operand of S2. For operand x it
1559 returns the vector-def 'vx.0'.
1561 To create the remaining copies of the vector-stmt (VSnew.j), this
1562 function is called to get the relevant vector-def for each operand. It is
1563 obtained from the respective VS1.j stmt, which is recorded in the
1564 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1566 For example, to obtain the vector-def 'vx.1' in order to create the
1567 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1568 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1569 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1570 and return its def ('vx.1').
1571 Overall, to create the above sequence this function will be called 3 times:
1572 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1573 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1574 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1577 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1579 gimple vec_stmt_for_operand
;
1580 stmt_vec_info def_stmt_info
;
1582 /* Do nothing; can reuse same def. */
1583 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1586 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1587 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1588 gcc_assert (def_stmt_info
);
1589 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1590 gcc_assert (vec_stmt_for_operand
);
1591 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1592 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1593 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1595 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1600 /* Get vectorized definitions for the operands to create a copy of an original
1601 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1604 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1605 vec
<tree
> *vec_oprnds0
,
1606 vec
<tree
> *vec_oprnds1
)
1608 tree vec_oprnd
= vec_oprnds0
->pop ();
1610 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1611 vec_oprnds0
->quick_push (vec_oprnd
);
1613 if (vec_oprnds1
&& vec_oprnds1
->length ())
1615 vec_oprnd
= vec_oprnds1
->pop ();
1616 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1617 vec_oprnds1
->quick_push (vec_oprnd
);
1622 /* Get vectorized definitions for OP0 and OP1.
1623 REDUC_INDEX is the index of reduction operand in case of reduction,
1624 and -1 otherwise. */
1627 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1628 vec
<tree
> *vec_oprnds0
,
1629 vec
<tree
> *vec_oprnds1
,
1630 slp_tree slp_node
, int reduc_index
)
1634 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1635 auto_vec
<tree
> ops (nops
);
1636 auto_vec
<vec
<tree
> > vec_defs (nops
);
1638 ops
.quick_push (op0
);
1640 ops
.quick_push (op1
);
1642 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1644 *vec_oprnds0
= vec_defs
[0];
1646 *vec_oprnds1
= vec_defs
[1];
1652 vec_oprnds0
->create (1);
1653 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1654 vec_oprnds0
->quick_push (vec_oprnd
);
1658 vec_oprnds1
->create (1);
1659 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1660 vec_oprnds1
->quick_push (vec_oprnd
);
1666 /* Function vect_finish_stmt_generation.
1668 Insert a new stmt. */
1671 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1672 gimple_stmt_iterator
*gsi
)
1674 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1675 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1676 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1678 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1680 if (!gsi_end_p (*gsi
)
1681 && gimple_has_mem_ops (vec_stmt
))
1683 gimple at_stmt
= gsi_stmt (*gsi
);
1684 tree vuse
= gimple_vuse (at_stmt
);
1685 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1687 tree vdef
= gimple_vdef (at_stmt
);
1688 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1689 /* If we have an SSA vuse and insert a store, update virtual
1690 SSA form to avoid triggering the renamer. Do so only
1691 if we can easily see all uses - which is what almost always
1692 happens with the way vectorized stmts are inserted. */
1693 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1694 && ((is_gimple_assign (vec_stmt
)
1695 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1696 || (is_gimple_call (vec_stmt
)
1697 && !(gimple_call_flags (vec_stmt
)
1698 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1700 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1701 gimple_set_vdef (vec_stmt
, new_vdef
);
1702 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1706 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1708 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1711 if (dump_enabled_p ())
1713 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1714 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1717 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1719 /* While EH edges will generally prevent vectorization, stmt might
1720 e.g. be in a must-not-throw region. Ensure newly created stmts
1721 that could throw are part of the same region. */
1722 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1723 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1724 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1727 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1728 a function declaration if the target has a vectorized version
1729 of the function, or NULL_TREE if the function cannot be vectorized. */
1732 vectorizable_function (gcall
*call
, tree vectype_out
, tree vectype_in
)
1734 tree fndecl
= gimple_call_fndecl (call
);
1736 /* We only handle functions that do not read or clobber memory -- i.e.
1737 const or novops ones. */
1738 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1742 || TREE_CODE (fndecl
) != FUNCTION_DECL
1743 || !DECL_BUILT_IN (fndecl
))
1746 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1751 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
1752 gimple_stmt_iterator
*);
1755 /* Function vectorizable_mask_load_store.
1757 Check if STMT performs a conditional load or store that can be vectorized.
1758 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1759 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1760 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1763 vectorizable_mask_load_store (gimple stmt
, gimple_stmt_iterator
*gsi
,
1764 gimple
*vec_stmt
, slp_tree slp_node
)
1766 tree vec_dest
= NULL
;
1767 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1768 stmt_vec_info prev_stmt_info
;
1769 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1770 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1771 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1772 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1773 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1777 tree dataref_ptr
= NULL_TREE
;
1779 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1783 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1784 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1785 int gather_scale
= 1;
1786 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1791 enum vect_def_type dt
;
1793 if (slp_node
!= NULL
)
1796 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1797 gcc_assert (ncopies
>= 1);
1799 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1800 mask
= gimple_call_arg (stmt
, 2);
1801 if (TYPE_PRECISION (TREE_TYPE (mask
))
1802 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))))
1805 /* FORNOW. This restriction should be relaxed. */
1806 if (nested_in_vect_loop
&& ncopies
> 1)
1808 if (dump_enabled_p ())
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1810 "multiple types in nested loop.");
1814 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1817 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1820 if (!STMT_VINFO_DATA_REF (stmt_info
))
1823 elem_type
= TREE_TYPE (vectype
);
1825 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1828 if (STMT_VINFO_STRIDED_P (stmt_info
))
1831 if (STMT_VINFO_GATHER_P (stmt_info
))
1835 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
1836 &gather_off
, &gather_scale
);
1837 gcc_assert (gather_decl
);
1838 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, NULL
,
1839 &def_stmt
, &def
, &gather_dt
,
1840 &gather_off_vectype
))
1842 if (dump_enabled_p ())
1843 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1844 "gather index use not simple.");
1848 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1850 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1851 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1853 if (dump_enabled_p ())
1854 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1855 "masked gather with integer mask not supported.");
1859 else if (tree_int_cst_compare (nested_in_vect_loop
1860 ? STMT_VINFO_DR_STEP (stmt_info
)
1861 : DR_STEP (dr
), size_zero_node
) <= 0)
1863 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1864 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
), !is_store
))
1867 if (TREE_CODE (mask
) != SSA_NAME
)
1870 if (!vect_is_simple_use (mask
, stmt
, loop_vinfo
, NULL
,
1871 &def_stmt
, &def
, &dt
))
1876 tree rhs
= gimple_call_arg (stmt
, 3);
1877 if (!vect_is_simple_use (rhs
, stmt
, loop_vinfo
, NULL
,
1878 &def_stmt
, &def
, &dt
))
1882 if (!vec_stmt
) /* transformation not required. */
1884 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1886 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1889 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1895 if (STMT_VINFO_GATHER_P (stmt_info
))
1897 tree vec_oprnd0
= NULL_TREE
, op
;
1898 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1899 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1900 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1901 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1902 tree mask_perm_mask
= NULL_TREE
;
1903 edge pe
= loop_preheader_edge (loop
);
1906 enum { NARROW
, NONE
, WIDEN
} modifier
;
1907 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1909 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1910 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1911 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1912 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1913 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1914 scaletype
= TREE_VALUE (arglist
);
1915 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1916 && types_compatible_p (srctype
, masktype
));
1918 if (nunits
== gather_off_nunits
)
1920 else if (nunits
== gather_off_nunits
/ 2)
1922 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1925 for (i
= 0; i
< gather_off_nunits
; ++i
)
1926 sel
[i
] = i
| nunits
;
1928 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
1930 else if (nunits
== gather_off_nunits
* 2)
1932 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1935 for (i
= 0; i
< nunits
; ++i
)
1936 sel
[i
] = i
< gather_off_nunits
1937 ? i
: i
+ nunits
- gather_off_nunits
;
1939 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
1941 for (i
= 0; i
< nunits
; ++i
)
1942 sel
[i
] = i
| gather_off_nunits
;
1943 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
1948 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1950 ptr
= fold_convert (ptrtype
, gather_base
);
1951 if (!is_gimple_min_invariant (ptr
))
1953 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1954 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1955 gcc_assert (!new_bb
);
1958 scale
= build_int_cst (scaletype
, gather_scale
);
1960 prev_stmt_info
= NULL
;
1961 for (j
= 0; j
< ncopies
; ++j
)
1963 if (modifier
== WIDEN
&& (j
& 1))
1964 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1965 perm_mask
, stmt
, gsi
);
1968 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
1971 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1973 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1975 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1976 == TYPE_VECTOR_SUBPARTS (idxtype
));
1977 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
1978 var
= make_ssa_name (var
);
1979 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1981 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1982 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1986 if (mask_perm_mask
&& (j
& 1))
1987 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1988 mask_perm_mask
, stmt
, gsi
);
1992 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
1995 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
,
1996 &def_stmt
, &def
, &dt
);
1997 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2001 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2003 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
2004 == TYPE_VECTOR_SUBPARTS (masktype
));
2005 var
= vect_get_new_vect_var (masktype
, vect_simple_var
,
2007 var
= make_ssa_name (var
);
2008 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2010 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2011 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2017 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
2020 if (!useless_type_conversion_p (vectype
, rettype
))
2022 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2023 == TYPE_VECTOR_SUBPARTS (rettype
));
2024 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
2025 op
= make_ssa_name (var
, new_stmt
);
2026 gimple_call_set_lhs (new_stmt
, op
);
2027 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2028 var
= make_ssa_name (vec_dest
);
2029 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2030 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2034 var
= make_ssa_name (vec_dest
, new_stmt
);
2035 gimple_call_set_lhs (new_stmt
, var
);
2038 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2040 if (modifier
== NARROW
)
2047 var
= permute_vec_elements (prev_res
, var
,
2048 perm_mask
, stmt
, gsi
);
2049 new_stmt
= SSA_NAME_DEF_STMT (var
);
2052 if (prev_stmt_info
== NULL
)
2053 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2055 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2056 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2059 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2061 tree lhs
= gimple_call_lhs (stmt
);
2062 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2063 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2064 set_vinfo_for_stmt (stmt
, NULL
);
2065 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2066 gsi_replace (gsi
, new_stmt
, true);
2071 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2072 prev_stmt_info
= NULL
;
2073 for (i
= 0; i
< ncopies
; i
++)
2075 unsigned align
, misalign
;
2079 tree rhs
= gimple_call_arg (stmt
, 3);
2080 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
, NULL
);
2081 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2082 /* We should have catched mismatched types earlier. */
2083 gcc_assert (useless_type_conversion_p (vectype
,
2084 TREE_TYPE (vec_rhs
)));
2085 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2086 NULL_TREE
, &dummy
, gsi
,
2087 &ptr_incr
, false, &inv_p
);
2088 gcc_assert (!inv_p
);
2092 vect_is_simple_use (vec_rhs
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2094 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2095 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2097 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2098 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2099 TYPE_SIZE_UNIT (vectype
));
2102 align
= TYPE_ALIGN_UNIT (vectype
);
2103 if (aligned_access_p (dr
))
2105 else if (DR_MISALIGNMENT (dr
) == -1)
2107 align
= TYPE_ALIGN_UNIT (elem_type
);
2111 misalign
= DR_MISALIGNMENT (dr
);
2112 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2115 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2116 gimple_call_arg (stmt
, 1),
2118 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2120 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2122 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2123 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2128 tree vec_mask
= NULL_TREE
;
2129 prev_stmt_info
= NULL
;
2130 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2131 for (i
= 0; i
< ncopies
; i
++)
2133 unsigned align
, misalign
;
2137 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2138 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2139 NULL_TREE
, &dummy
, gsi
,
2140 &ptr_incr
, false, &inv_p
);
2141 gcc_assert (!inv_p
);
2145 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2147 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2148 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2149 TYPE_SIZE_UNIT (vectype
));
2152 align
= TYPE_ALIGN_UNIT (vectype
);
2153 if (aligned_access_p (dr
))
2155 else if (DR_MISALIGNMENT (dr
) == -1)
2157 align
= TYPE_ALIGN_UNIT (elem_type
);
2161 misalign
= DR_MISALIGNMENT (dr
);
2162 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2165 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2166 gimple_call_arg (stmt
, 1),
2168 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2169 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2171 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2173 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2174 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2180 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2182 tree lhs
= gimple_call_lhs (stmt
);
2183 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2184 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2185 set_vinfo_for_stmt (stmt
, NULL
);
2186 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2187 gsi_replace (gsi
, new_stmt
, true);
2194 /* Function vectorizable_call.
2196 Check if GS performs a function call that can be vectorized.
2197 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2198 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2199 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2202 vectorizable_call (gimple gs
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
2209 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2210 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2211 tree vectype_out
, vectype_in
;
2214 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2215 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2216 tree fndecl
, new_temp
, def
, rhs_type
;
2218 enum vect_def_type dt
[3]
2219 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2220 gimple new_stmt
= NULL
;
2222 vec
<tree
> vargs
= vNULL
;
2223 enum { NARROW
, NONE
, WIDEN
} modifier
;
2227 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2230 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2233 /* Is GS a vectorizable call? */
2234 stmt
= dyn_cast
<gcall
*> (gs
);
2238 if (gimple_call_internal_p (stmt
)
2239 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2240 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2241 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2244 if (gimple_call_lhs (stmt
) == NULL_TREE
2245 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2248 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2250 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2252 /* Process function arguments. */
2253 rhs_type
= NULL_TREE
;
2254 vectype_in
= NULL_TREE
;
2255 nargs
= gimple_call_num_args (stmt
);
2257 /* Bail out if the function has more than three arguments, we do not have
2258 interesting builtin functions to vectorize with more than two arguments
2259 except for fma. No arguments is also not good. */
2260 if (nargs
== 0 || nargs
> 3)
2263 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2264 if (gimple_call_internal_p (stmt
)
2265 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2268 rhs_type
= unsigned_type_node
;
2271 for (i
= 0; i
< nargs
; i
++)
2275 op
= gimple_call_arg (stmt
, i
);
2277 /* We can only handle calls with arguments of the same type. */
2279 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2281 if (dump_enabled_p ())
2282 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2283 "argument types differ.\n");
2287 rhs_type
= TREE_TYPE (op
);
2289 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2290 &def_stmt
, &def
, &dt
[i
], &opvectype
))
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2294 "use not simple.\n");
2299 vectype_in
= opvectype
;
2301 && opvectype
!= vectype_in
)
2303 if (dump_enabled_p ())
2304 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2305 "argument vector types differ.\n");
2309 /* If all arguments are external or constant defs use a vector type with
2310 the same size as the output vector type. */
2312 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2314 gcc_assert (vectype_in
);
2317 if (dump_enabled_p ())
2319 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2320 "no vectype for scalar type ");
2321 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2322 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2329 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2330 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2331 if (nunits_in
== nunits_out
/ 2)
2333 else if (nunits_out
== nunits_in
)
2335 else if (nunits_out
== nunits_in
/ 2)
2340 /* For now, we only vectorize functions if a target specific builtin
2341 is available. TODO -- in some cases, it might be profitable to
2342 insert the calls for pieces of the vector, in order to be able
2343 to vectorize other operations in the loop. */
2344 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
2345 if (fndecl
== NULL_TREE
)
2347 if (gimple_call_internal_p (stmt
)
2348 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
2351 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2352 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2353 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2354 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2356 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2357 { 0, 1, 2, ... vf - 1 } vector. */
2358 gcc_assert (nargs
== 0);
2362 if (dump_enabled_p ())
2363 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2364 "function is not vectorizable.\n");
2369 gcc_assert (!gimple_vuse (stmt
));
2371 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2373 else if (modifier
== NARROW
)
2374 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2376 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2378 /* Sanity check: make sure that at least one copy of the vectorized stmt
2379 needs to be generated. */
2380 gcc_assert (ncopies
>= 1);
2382 if (!vec_stmt
) /* transformation not required. */
2384 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2388 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2394 if (dump_enabled_p ())
2395 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2398 scalar_dest
= gimple_call_lhs (stmt
);
2399 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2401 prev_stmt_info
= NULL
;
2405 for (j
= 0; j
< ncopies
; ++j
)
2407 /* Build argument list for the vectorized call. */
2409 vargs
.create (nargs
);
2415 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2416 vec
<tree
> vec_oprnds0
;
2418 for (i
= 0; i
< nargs
; i
++)
2419 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2420 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2421 vec_oprnds0
= vec_defs
[0];
2423 /* Arguments are ready. Create the new vector stmt. */
2424 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2427 for (k
= 0; k
< nargs
; k
++)
2429 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2430 vargs
[k
] = vec_oprndsk
[i
];
2432 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2433 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2434 gimple_call_set_lhs (new_stmt
, new_temp
);
2435 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2436 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2439 for (i
= 0; i
< nargs
; i
++)
2441 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2442 vec_oprndsi
.release ();
2447 for (i
= 0; i
< nargs
; i
++)
2449 op
= gimple_call_arg (stmt
, i
);
2452 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2455 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2457 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2460 vargs
.quick_push (vec_oprnd0
);
2463 if (gimple_call_internal_p (stmt
)
2464 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2466 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2468 for (k
= 0; k
< nunits_out
; ++k
)
2469 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2470 tree cst
= build_vector (vectype_out
, v
);
2472 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
2473 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
2474 new_temp
= make_ssa_name (new_var
, init_stmt
);
2475 gimple_assign_set_lhs (init_stmt
, new_temp
);
2476 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2477 new_temp
= make_ssa_name (vec_dest
);
2478 new_stmt
= gimple_build_assign (new_temp
,
2479 gimple_assign_lhs (init_stmt
));
2483 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2484 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2485 gimple_call_set_lhs (new_stmt
, new_temp
);
2487 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2490 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2492 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2494 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2500 for (j
= 0; j
< ncopies
; ++j
)
2502 /* Build argument list for the vectorized call. */
2504 vargs
.create (nargs
* 2);
2510 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2511 vec
<tree
> vec_oprnds0
;
2513 for (i
= 0; i
< nargs
; i
++)
2514 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2515 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2516 vec_oprnds0
= vec_defs
[0];
2518 /* Arguments are ready. Create the new vector stmt. */
2519 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2523 for (k
= 0; k
< nargs
; k
++)
2525 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2526 vargs
.quick_push (vec_oprndsk
[i
]);
2527 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2529 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2530 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2531 gimple_call_set_lhs (new_stmt
, new_temp
);
2532 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2533 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2536 for (i
= 0; i
< nargs
; i
++)
2538 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2539 vec_oprndsi
.release ();
2544 for (i
= 0; i
< nargs
; i
++)
2546 op
= gimple_call_arg (stmt
, i
);
2550 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2552 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2556 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2558 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2560 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2563 vargs
.quick_push (vec_oprnd0
);
2564 vargs
.quick_push (vec_oprnd1
);
2567 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2568 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2569 gimple_call_set_lhs (new_stmt
, new_temp
);
2570 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2573 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2575 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2577 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2580 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2585 /* No current target implements this case. */
2591 /* The call in STMT might prevent it from being removed in dce.
2592 We however cannot remove it here, due to the way the ssa name
2593 it defines is mapped to the new definition. So just replace
2594 rhs of the statement with something harmless. */
2599 type
= TREE_TYPE (scalar_dest
);
2600 if (is_pattern_stmt_p (stmt_info
))
2601 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2603 lhs
= gimple_call_lhs (stmt
);
2604 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2605 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2606 set_vinfo_for_stmt (stmt
, NULL
);
2607 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2608 gsi_replace (gsi
, new_stmt
, false);
2614 struct simd_call_arg_info
2618 enum vect_def_type dt
;
2619 HOST_WIDE_INT linear_step
;
2623 /* Function vectorizable_simd_clone_call.
2625 Check if STMT performs a function call that can be vectorized
2626 by calling a simd clone of the function.
2627 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2628 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2629 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2632 vectorizable_simd_clone_call (gimple stmt
, gimple_stmt_iterator
*gsi
,
2633 gimple
*vec_stmt
, slp_tree slp_node
)
2638 tree vec_oprnd0
= NULL_TREE
;
2639 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2641 unsigned int nunits
;
2642 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2643 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2644 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2645 tree fndecl
, new_temp
, def
;
2647 gimple new_stmt
= NULL
;
2649 vec
<simd_call_arg_info
> arginfo
= vNULL
;
2650 vec
<tree
> vargs
= vNULL
;
2652 tree lhs
, rtype
, ratype
;
2653 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2655 /* Is STMT a vectorizable call? */
2656 if (!is_gimple_call (stmt
))
2659 fndecl
= gimple_call_fndecl (stmt
);
2660 if (fndecl
== NULL_TREE
)
2663 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2664 if (node
== NULL
|| node
->simd_clones
== NULL
)
2667 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2670 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2673 if (gimple_call_lhs (stmt
)
2674 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2677 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2679 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2681 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2685 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2688 /* Process function arguments. */
2689 nargs
= gimple_call_num_args (stmt
);
2691 /* Bail out if the function has zero arguments. */
2695 arginfo
.create (nargs
);
2697 for (i
= 0; i
< nargs
; i
++)
2699 simd_call_arg_info thisarginfo
;
2702 thisarginfo
.linear_step
= 0;
2703 thisarginfo
.align
= 0;
2704 thisarginfo
.op
= NULL_TREE
;
2706 op
= gimple_call_arg (stmt
, i
);
2707 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2708 &def_stmt
, &def
, &thisarginfo
.dt
,
2709 &thisarginfo
.vectype
)
2710 || thisarginfo
.dt
== vect_uninitialized_def
)
2712 if (dump_enabled_p ())
2713 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2714 "use not simple.\n");
2719 if (thisarginfo
.dt
== vect_constant_def
2720 || thisarginfo
.dt
== vect_external_def
)
2721 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2723 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2725 /* For linear arguments, the analyze phase should have saved
2726 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2727 if (i
* 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
2728 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2])
2730 gcc_assert (vec_stmt
);
2731 thisarginfo
.linear_step
2732 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2]);
2734 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 1];
2735 /* If loop has been peeled for alignment, we need to adjust it. */
2736 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
2737 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
2740 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
2741 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2];
2742 tree opt
= TREE_TYPE (thisarginfo
.op
);
2743 bias
= fold_convert (TREE_TYPE (step
), bias
);
2744 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
2746 = fold_build2 (POINTER_TYPE_P (opt
)
2747 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
2748 thisarginfo
.op
, bias
);
2752 && thisarginfo
.dt
!= vect_constant_def
2753 && thisarginfo
.dt
!= vect_external_def
2755 && TREE_CODE (op
) == SSA_NAME
2756 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
2758 && tree_fits_shwi_p (iv
.step
))
2760 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2761 thisarginfo
.op
= iv
.base
;
2763 else if ((thisarginfo
.dt
== vect_constant_def
2764 || thisarginfo
.dt
== vect_external_def
)
2765 && POINTER_TYPE_P (TREE_TYPE (op
)))
2766 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2768 arginfo
.quick_push (thisarginfo
);
2771 unsigned int badness
= 0;
2772 struct cgraph_node
*bestn
= NULL
;
2773 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
2774 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
2776 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2777 n
= n
->simdclone
->next_clone
)
2779 unsigned int this_badness
= 0;
2780 if (n
->simdclone
->simdlen
2781 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2782 || n
->simdclone
->nargs
!= nargs
)
2784 if (n
->simdclone
->simdlen
2785 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2786 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2787 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2788 if (n
->simdclone
->inbranch
)
2789 this_badness
+= 2048;
2790 int target_badness
= targetm
.simd_clone
.usable (n
);
2791 if (target_badness
< 0)
2793 this_badness
+= target_badness
* 512;
2794 /* FORNOW: Have to add code to add the mask argument. */
2795 if (n
->simdclone
->inbranch
)
2797 for (i
= 0; i
< nargs
; i
++)
2799 switch (n
->simdclone
->args
[i
].arg_type
)
2801 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2802 if (!useless_type_conversion_p
2803 (n
->simdclone
->args
[i
].orig_type
,
2804 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2806 else if (arginfo
[i
].dt
== vect_constant_def
2807 || arginfo
[i
].dt
== vect_external_def
2808 || arginfo
[i
].linear_step
)
2811 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2812 if (arginfo
[i
].dt
!= vect_constant_def
2813 && arginfo
[i
].dt
!= vect_external_def
)
2816 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2817 if (arginfo
[i
].dt
== vect_constant_def
2818 || arginfo
[i
].dt
== vect_external_def
2819 || (arginfo
[i
].linear_step
2820 != n
->simdclone
->args
[i
].linear_step
))
2823 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2827 case SIMD_CLONE_ARG_TYPE_MASK
:
2830 if (i
== (size_t) -1)
2832 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
2837 if (arginfo
[i
].align
)
2838 this_badness
+= (exact_log2 (arginfo
[i
].align
)
2839 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
2841 if (i
== (size_t) -1)
2843 if (bestn
== NULL
|| this_badness
< badness
)
2846 badness
= this_badness
;
2856 for (i
= 0; i
< nargs
; i
++)
2857 if ((arginfo
[i
].dt
== vect_constant_def
2858 || arginfo
[i
].dt
== vect_external_def
)
2859 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
2862 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
2864 if (arginfo
[i
].vectype
== NULL
2865 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2866 > bestn
->simdclone
->simdlen
))
2873 fndecl
= bestn
->decl
;
2874 nunits
= bestn
->simdclone
->simdlen
;
2875 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2877 /* If the function isn't const, only allow it in simd loops where user
2878 has asserted that at least nunits consecutive iterations can be
2879 performed using SIMD instructions. */
2880 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
2881 && gimple_vuse (stmt
))
2887 /* Sanity check: make sure that at least one copy of the vectorized stmt
2888 needs to be generated. */
2889 gcc_assert (ncopies
>= 1);
2891 if (!vec_stmt
) /* transformation not required. */
2893 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
2894 for (i
= 0; i
< nargs
; i
++)
2895 if (bestn
->simdclone
->args
[i
].arg_type
2896 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
2898 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 2
2900 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
2901 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
2902 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
2903 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
2904 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
2906 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
2907 if (dump_enabled_p ())
2908 dump_printf_loc (MSG_NOTE
, vect_location
,
2909 "=== vectorizable_simd_clone_call ===\n");
2910 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2917 if (dump_enabled_p ())
2918 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2921 scalar_dest
= gimple_call_lhs (stmt
);
2922 vec_dest
= NULL_TREE
;
2927 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2928 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
2929 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
2932 rtype
= TREE_TYPE (ratype
);
2936 prev_stmt_info
= NULL
;
2937 for (j
= 0; j
< ncopies
; ++j
)
2939 /* Build argument list for the vectorized call. */
2941 vargs
.create (nargs
);
2945 for (i
= 0; i
< nargs
; i
++)
2947 unsigned int k
, l
, m
, o
;
2949 op
= gimple_call_arg (stmt
, i
);
2950 switch (bestn
->simdclone
->args
[i
].arg_type
)
2952 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2953 atype
= bestn
->simdclone
->args
[i
].vector_type
;
2954 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
2955 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
2957 if (TYPE_VECTOR_SUBPARTS (atype
)
2958 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
2960 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
2961 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2962 / TYPE_VECTOR_SUBPARTS (atype
));
2963 gcc_assert ((k
& (k
- 1)) == 0);
2966 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2969 vec_oprnd0
= arginfo
[i
].op
;
2970 if ((m
& (k
- 1)) == 0)
2972 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2975 arginfo
[i
].op
= vec_oprnd0
;
2977 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
2979 bitsize_int ((m
& (k
- 1)) * prec
));
2981 = gimple_build_assign (make_ssa_name (atype
),
2983 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2984 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
2988 k
= (TYPE_VECTOR_SUBPARTS (atype
)
2989 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
2990 gcc_assert ((k
& (k
- 1)) == 0);
2991 vec
<constructor_elt
, va_gc
> *ctor_elts
;
2993 vec_alloc (ctor_elts
, k
);
2996 for (l
= 0; l
< k
; l
++)
2998 if (m
== 0 && l
== 0)
3000 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
3003 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3005 arginfo
[i
].op
= vec_oprnd0
;
3008 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3012 vargs
.safe_push (vec_oprnd0
);
3015 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3017 = gimple_build_assign (make_ssa_name (atype
),
3019 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3020 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3025 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3026 vargs
.safe_push (op
);
3028 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3033 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3038 edge pe
= loop_preheader_edge (loop
);
3039 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3040 gcc_assert (!new_bb
);
3042 tree phi_res
= copy_ssa_name (op
);
3043 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3044 set_vinfo_for_stmt (new_phi
,
3045 new_stmt_vec_info (new_phi
, loop_vinfo
,
3047 add_phi_arg (new_phi
, arginfo
[i
].op
,
3048 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3050 = POINTER_TYPE_P (TREE_TYPE (op
))
3051 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3052 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3053 ? sizetype
: TREE_TYPE (op
);
3055 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3057 tree tcst
= wide_int_to_tree (type
, cst
);
3058 tree phi_arg
= copy_ssa_name (op
);
3060 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3061 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3062 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3063 set_vinfo_for_stmt (new_stmt
,
3064 new_stmt_vec_info (new_stmt
, loop_vinfo
,
3066 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3068 arginfo
[i
].op
= phi_res
;
3069 vargs
.safe_push (phi_res
);
3074 = POINTER_TYPE_P (TREE_TYPE (op
))
3075 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3076 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3077 ? sizetype
: TREE_TYPE (op
);
3079 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3081 tree tcst
= wide_int_to_tree (type
, cst
);
3082 new_temp
= make_ssa_name (TREE_TYPE (op
));
3083 new_stmt
= gimple_build_assign (new_temp
, code
,
3084 arginfo
[i
].op
, tcst
);
3085 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3086 vargs
.safe_push (new_temp
);
3089 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3095 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3098 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3100 new_temp
= create_tmp_var (ratype
);
3101 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3102 == TYPE_VECTOR_SUBPARTS (rtype
))
3103 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3105 new_temp
= make_ssa_name (rtype
, new_stmt
);
3106 gimple_call_set_lhs (new_stmt
, new_temp
);
3108 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3112 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3115 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3116 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3117 gcc_assert ((k
& (k
- 1)) == 0);
3118 for (l
= 0; l
< k
; l
++)
3123 t
= build_fold_addr_expr (new_temp
);
3124 t
= build2 (MEM_REF
, vectype
, t
,
3125 build_int_cst (TREE_TYPE (t
),
3126 l
* prec
/ BITS_PER_UNIT
));
3129 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3130 size_int (prec
), bitsize_int (l
* prec
));
3132 = gimple_build_assign (make_ssa_name (vectype
), t
);
3133 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3134 if (j
== 0 && l
== 0)
3135 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3137 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3139 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3144 tree clobber
= build_constructor (ratype
, NULL
);
3145 TREE_THIS_VOLATILE (clobber
) = 1;
3146 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3147 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3151 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3153 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3154 / TYPE_VECTOR_SUBPARTS (rtype
));
3155 gcc_assert ((k
& (k
- 1)) == 0);
3156 if ((j
& (k
- 1)) == 0)
3157 vec_alloc (ret_ctor_elts
, k
);
3160 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3161 for (m
= 0; m
< o
; m
++)
3163 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3164 size_int (m
), NULL_TREE
, NULL_TREE
);
3166 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3167 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3168 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3169 gimple_assign_lhs (new_stmt
));
3171 tree clobber
= build_constructor (ratype
, NULL
);
3172 TREE_THIS_VOLATILE (clobber
) = 1;
3173 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3174 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3177 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3178 if ((j
& (k
- 1)) != k
- 1)
3180 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3182 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3183 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3185 if ((unsigned) j
== k
- 1)
3186 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3188 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3190 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3195 tree t
= build_fold_addr_expr (new_temp
);
3196 t
= build2 (MEM_REF
, vectype
, t
,
3197 build_int_cst (TREE_TYPE (t
), 0));
3199 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3200 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3201 tree clobber
= build_constructor (ratype
, NULL
);
3202 TREE_THIS_VOLATILE (clobber
) = 1;
3203 vect_finish_stmt_generation (stmt
,
3204 gimple_build_assign (new_temp
,
3210 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3212 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3214 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3219 /* The call in STMT might prevent it from being removed in dce.
3220 We however cannot remove it here, due to the way the ssa name
3221 it defines is mapped to the new definition. So just replace
3222 rhs of the statement with something harmless. */
3229 type
= TREE_TYPE (scalar_dest
);
3230 if (is_pattern_stmt_p (stmt_info
))
3231 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3233 lhs
= gimple_call_lhs (stmt
);
3234 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3237 new_stmt
= gimple_build_nop ();
3238 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3239 set_vinfo_for_stmt (stmt
, NULL
);
3240 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3241 gsi_replace (gsi
, new_stmt
, true);
3242 unlink_stmt_vdef (stmt
);
3248 /* Function vect_gen_widened_results_half
3250 Create a vector stmt whose code, type, number of arguments, and result
3251 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3252 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3253 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3254 needs to be created (DECL is a function-decl of a target-builtin).
3255 STMT is the original scalar stmt that we are vectorizing. */
3258 vect_gen_widened_results_half (enum tree_code code
,
3260 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3261 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3267 /* Generate half of the widened result: */
3268 if (code
== CALL_EXPR
)
3270 /* Target specific support */
3271 if (op_type
== binary_op
)
3272 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3274 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3275 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3276 gimple_call_set_lhs (new_stmt
, new_temp
);
3280 /* Generic support */
3281 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3282 if (op_type
!= binary_op
)
3284 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3285 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3286 gimple_assign_set_lhs (new_stmt
, new_temp
);
3288 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3294 /* Get vectorized definitions for loop-based vectorization. For the first
3295 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3296 scalar operand), and for the rest we get a copy with
3297 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3298 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3299 The vectors are collected into VEC_OPRNDS. */
3302 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
3303 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3307 /* Get first vector operand. */
3308 /* All the vector operands except the very first one (that is scalar oprnd)
3310 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3311 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
3313 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3315 vec_oprnds
->quick_push (vec_oprnd
);
3317 /* Get second vector operand. */
3318 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3319 vec_oprnds
->quick_push (vec_oprnd
);
3323 /* For conversion in multiple steps, continue to get operands
3326 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3330 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3331 For multi-step conversions store the resulting vectors and call the function
3335 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3336 int multi_step_cvt
, gimple stmt
,
3338 gimple_stmt_iterator
*gsi
,
3339 slp_tree slp_node
, enum tree_code code
,
3340 stmt_vec_info
*prev_stmt_info
)
3343 tree vop0
, vop1
, new_tmp
, vec_dest
;
3345 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3347 vec_dest
= vec_dsts
.pop ();
3349 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3351 /* Create demotion operation. */
3352 vop0
= (*vec_oprnds
)[i
];
3353 vop1
= (*vec_oprnds
)[i
+ 1];
3354 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3355 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3356 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3357 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3360 /* Store the resulting vector for next recursive call. */
3361 (*vec_oprnds
)[i
/2] = new_tmp
;
3364 /* This is the last step of the conversion sequence. Store the
3365 vectors in SLP_NODE or in vector info of the scalar statement
3366 (or in STMT_VINFO_RELATED_STMT chain). */
3368 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3371 if (!*prev_stmt_info
)
3372 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3374 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3376 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3381 /* For multi-step demotion operations we first generate demotion operations
3382 from the source type to the intermediate types, and then combine the
3383 results (stored in VEC_OPRNDS) in demotion operation to the destination
3387 /* At each level of recursion we have half of the operands we had at the
3389 vec_oprnds
->truncate ((i
+1)/2);
3390 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3391 stmt
, vec_dsts
, gsi
, slp_node
,
3392 VEC_PACK_TRUNC_EXPR
,
3396 vec_dsts
.quick_push (vec_dest
);
3400 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3401 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3402 the resulting vectors and call the function recursively. */
3405 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3406 vec
<tree
> *vec_oprnds1
,
3407 gimple stmt
, tree vec_dest
,
3408 gimple_stmt_iterator
*gsi
,
3409 enum tree_code code1
,
3410 enum tree_code code2
, tree decl1
,
3411 tree decl2
, int op_type
)
3414 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3415 gimple new_stmt1
, new_stmt2
;
3416 vec
<tree
> vec_tmp
= vNULL
;
3418 vec_tmp
.create (vec_oprnds0
->length () * 2);
3419 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3421 if (op_type
== binary_op
)
3422 vop1
= (*vec_oprnds1
)[i
];
3426 /* Generate the two halves of promotion operation. */
3427 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3428 op_type
, vec_dest
, gsi
, stmt
);
3429 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3430 op_type
, vec_dest
, gsi
, stmt
);
3431 if (is_gimple_call (new_stmt1
))
3433 new_tmp1
= gimple_call_lhs (new_stmt1
);
3434 new_tmp2
= gimple_call_lhs (new_stmt2
);
3438 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3439 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3442 /* Store the results for the next step. */
3443 vec_tmp
.quick_push (new_tmp1
);
3444 vec_tmp
.quick_push (new_tmp2
);
3447 vec_oprnds0
->release ();
3448 *vec_oprnds0
= vec_tmp
;
3452 /* Check if STMT performs a conversion operation, that can be vectorized.
3453 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3454 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3455 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3458 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
3459 gimple
*vec_stmt
, slp_tree slp_node
)
3463 tree op0
, op1
= NULL_TREE
;
3464 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3465 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3466 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3467 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3468 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3469 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3473 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3474 gimple new_stmt
= NULL
;
3475 stmt_vec_info prev_stmt_info
;
3478 tree vectype_out
, vectype_in
;
3480 tree lhs_type
, rhs_type
;
3481 enum { NARROW
, NONE
, WIDEN
} modifier
;
3482 vec
<tree
> vec_oprnds0
= vNULL
;
3483 vec
<tree
> vec_oprnds1
= vNULL
;
3485 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3486 int multi_step_cvt
= 0;
3487 vec
<tree
> vec_dsts
= vNULL
;
3488 vec
<tree
> interm_types
= vNULL
;
3489 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3491 machine_mode rhs_mode
;
3492 unsigned short fltsz
;
3494 /* Is STMT a vectorizable conversion? */
3496 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3499 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3502 if (!is_gimple_assign (stmt
))
3505 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3508 code
= gimple_assign_rhs_code (stmt
);
3509 if (!CONVERT_EXPR_CODE_P (code
)
3510 && code
!= FIX_TRUNC_EXPR
3511 && code
!= FLOAT_EXPR
3512 && code
!= WIDEN_MULT_EXPR
3513 && code
!= WIDEN_LSHIFT_EXPR
)
3516 op_type
= TREE_CODE_LENGTH (code
);
3518 /* Check types of lhs and rhs. */
3519 scalar_dest
= gimple_assign_lhs (stmt
);
3520 lhs_type
= TREE_TYPE (scalar_dest
);
3521 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3523 op0
= gimple_assign_rhs1 (stmt
);
3524 rhs_type
= TREE_TYPE (op0
);
3526 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3527 && !((INTEGRAL_TYPE_P (lhs_type
)
3528 && INTEGRAL_TYPE_P (rhs_type
))
3529 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3530 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3533 if ((INTEGRAL_TYPE_P (lhs_type
)
3534 && (TYPE_PRECISION (lhs_type
)
3535 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3536 || (INTEGRAL_TYPE_P (rhs_type
)
3537 && (TYPE_PRECISION (rhs_type
)
3538 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
3540 if (dump_enabled_p ())
3541 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3542 "type conversion to/from bit-precision unsupported."
3547 /* Check the operands of the operation. */
3548 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3549 &def_stmt
, &def
, &dt
[0], &vectype_in
))
3551 if (dump_enabled_p ())
3552 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3553 "use not simple.\n");
3556 if (op_type
== binary_op
)
3560 op1
= gimple_assign_rhs2 (stmt
);
3561 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3562 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3564 if (CONSTANT_CLASS_P (op0
))
3565 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
3566 &def_stmt
, &def
, &dt
[1], &vectype_in
);
3568 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3573 if (dump_enabled_p ())
3574 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3575 "use not simple.\n");
3580 /* If op0 is an external or constant defs use a vector type of
3581 the same size as the output vector type. */
3583 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3585 gcc_assert (vectype_in
);
3588 if (dump_enabled_p ())
3590 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3591 "no vectype for scalar type ");
3592 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3593 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3599 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3600 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3601 if (nunits_in
< nunits_out
)
3603 else if (nunits_out
== nunits_in
)
3608 /* Multiple types in SLP are handled by creating the appropriate number of
3609 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3611 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3613 else if (modifier
== NARROW
)
3614 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3616 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3618 /* Sanity check: make sure that at least one copy of the vectorized stmt
3619 needs to be generated. */
3620 gcc_assert (ncopies
>= 1);
3622 /* Supportable by target? */
3626 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3628 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3633 if (dump_enabled_p ())
3634 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3635 "conversion not supported by target.\n");
3639 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3640 &code1
, &code2
, &multi_step_cvt
,
3643 /* Binary widening operation can only be supported directly by the
3645 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3649 if (code
!= FLOAT_EXPR
3650 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3651 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3654 rhs_mode
= TYPE_MODE (rhs_type
);
3655 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3656 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3657 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3658 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3661 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3662 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3663 if (cvt_type
== NULL_TREE
)
3666 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3668 if (!supportable_convert_operation (code
, vectype_out
,
3669 cvt_type
, &decl1
, &codecvt1
))
3672 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3673 cvt_type
, &codecvt1
,
3674 &codecvt2
, &multi_step_cvt
,
3678 gcc_assert (multi_step_cvt
== 0);
3680 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3681 vectype_in
, &code1
, &code2
,
3682 &multi_step_cvt
, &interm_types
))
3686 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3689 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3690 codecvt2
= ERROR_MARK
;
3694 interm_types
.safe_push (cvt_type
);
3695 cvt_type
= NULL_TREE
;
3700 gcc_assert (op_type
== unary_op
);
3701 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3702 &code1
, &multi_step_cvt
,
3706 if (code
!= FIX_TRUNC_EXPR
3707 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3708 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3711 rhs_mode
= TYPE_MODE (rhs_type
);
3713 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3714 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3715 if (cvt_type
== NULL_TREE
)
3717 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3720 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3721 &code1
, &multi_step_cvt
,
3730 if (!vec_stmt
) /* transformation not required. */
3732 if (dump_enabled_p ())
3733 dump_printf_loc (MSG_NOTE
, vect_location
,
3734 "=== vectorizable_conversion ===\n");
3735 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3737 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3738 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3740 else if (modifier
== NARROW
)
3742 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3743 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3747 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3748 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3750 interm_types
.release ();
3755 if (dump_enabled_p ())
3756 dump_printf_loc (MSG_NOTE
, vect_location
,
3757 "transform conversion. ncopies = %d.\n", ncopies
);
3759 if (op_type
== binary_op
)
3761 if (CONSTANT_CLASS_P (op0
))
3762 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3763 else if (CONSTANT_CLASS_P (op1
))
3764 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3767 /* In case of multi-step conversion, we first generate conversion operations
3768 to the intermediate types, and then from that types to the final one.
3769 We create vector destinations for the intermediate type (TYPES) received
3770 from supportable_*_operation, and store them in the correct order
3771 for future use in vect_create_vectorized_*_stmts (). */
3772 vec_dsts
.create (multi_step_cvt
+ 1);
3773 vec_dest
= vect_create_destination_var (scalar_dest
,
3774 (cvt_type
&& modifier
== WIDEN
)
3775 ? cvt_type
: vectype_out
);
3776 vec_dsts
.quick_push (vec_dest
);
3780 for (i
= interm_types
.length () - 1;
3781 interm_types
.iterate (i
, &intermediate_type
); i
--)
3783 vec_dest
= vect_create_destination_var (scalar_dest
,
3785 vec_dsts
.quick_push (vec_dest
);
3790 vec_dest
= vect_create_destination_var (scalar_dest
,
3792 ? vectype_out
: cvt_type
);
3796 if (modifier
== WIDEN
)
3798 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3799 if (op_type
== binary_op
)
3800 vec_oprnds1
.create (1);
3802 else if (modifier
== NARROW
)
3803 vec_oprnds0
.create (
3804 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3806 else if (code
== WIDEN_LSHIFT_EXPR
)
3807 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
3810 prev_stmt_info
= NULL
;
3814 for (j
= 0; j
< ncopies
; j
++)
3817 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
3820 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3822 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3824 /* Arguments are ready, create the new vector stmt. */
3825 if (code1
== CALL_EXPR
)
3827 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3828 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3829 gimple_call_set_lhs (new_stmt
, new_temp
);
3833 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
3834 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
3835 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3836 gimple_assign_set_lhs (new_stmt
, new_temp
);
3839 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3841 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3844 if (!prev_stmt_info
)
3845 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3847 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3848 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3855 /* In case the vectorization factor (VF) is bigger than the number
3856 of elements that we can fit in a vectype (nunits), we have to
3857 generate more than one vector stmt - i.e - we need to "unroll"
3858 the vector stmt by a factor VF/nunits. */
3859 for (j
= 0; j
< ncopies
; j
++)
3866 if (code
== WIDEN_LSHIFT_EXPR
)
3871 /* Store vec_oprnd1 for every vector stmt to be created
3872 for SLP_NODE. We check during the analysis that all
3873 the shift arguments are the same. */
3874 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3875 vec_oprnds1
.quick_push (vec_oprnd1
);
3877 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3881 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
3882 &vec_oprnds1
, slp_node
, -1);
3886 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3887 vec_oprnds0
.quick_push (vec_oprnd0
);
3888 if (op_type
== binary_op
)
3890 if (code
== WIDEN_LSHIFT_EXPR
)
3893 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
3895 vec_oprnds1
.quick_push (vec_oprnd1
);
3901 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3902 vec_oprnds0
.truncate (0);
3903 vec_oprnds0
.quick_push (vec_oprnd0
);
3904 if (op_type
== binary_op
)
3906 if (code
== WIDEN_LSHIFT_EXPR
)
3909 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
3911 vec_oprnds1
.truncate (0);
3912 vec_oprnds1
.quick_push (vec_oprnd1
);
3916 /* Arguments are ready. Create the new vector stmts. */
3917 for (i
= multi_step_cvt
; i
>= 0; i
--)
3919 tree this_dest
= vec_dsts
[i
];
3920 enum tree_code c1
= code1
, c2
= code2
;
3921 if (i
== 0 && codecvt2
!= ERROR_MARK
)
3926 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
3928 stmt
, this_dest
, gsi
,
3929 c1
, c2
, decl1
, decl2
,
3933 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3937 if (codecvt1
== CALL_EXPR
)
3939 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3940 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3941 gimple_call_set_lhs (new_stmt
, new_temp
);
3945 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3946 new_temp
= make_ssa_name (vec_dest
);
3947 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
3951 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3954 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
3957 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3960 if (!prev_stmt_info
)
3961 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3963 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3964 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3969 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3973 /* In case the vectorization factor (VF) is bigger than the number
3974 of elements that we can fit in a vectype (nunits), we have to
3975 generate more than one vector stmt - i.e - we need to "unroll"
3976 the vector stmt by a factor VF/nunits. */
3977 for (j
= 0; j
< ncopies
; j
++)
3981 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3985 vec_oprnds0
.truncate (0);
3986 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
3987 vect_pow2 (multi_step_cvt
) - 1);
3990 /* Arguments are ready. Create the new vector stmts. */
3992 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3994 if (codecvt1
== CALL_EXPR
)
3996 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3997 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3998 gimple_call_set_lhs (new_stmt
, new_temp
);
4002 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4003 new_temp
= make_ssa_name (vec_dest
);
4004 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4008 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4009 vec_oprnds0
[i
] = new_temp
;
4012 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4013 stmt
, vec_dsts
, gsi
,
4018 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4022 vec_oprnds0
.release ();
4023 vec_oprnds1
.release ();
4024 vec_dsts
.release ();
4025 interm_types
.release ();
4031 /* Function vectorizable_assignment.
4033 Check if STMT performs an assignment (copy) that can be vectorized.
4034 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4035 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4036 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4039 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
4040 gimple
*vec_stmt
, slp_tree slp_node
)
4045 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4046 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4047 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4051 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4052 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4055 vec
<tree
> vec_oprnds
= vNULL
;
4057 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4058 gimple new_stmt
= NULL
;
4059 stmt_vec_info prev_stmt_info
= NULL
;
4060 enum tree_code code
;
4063 /* Multiple types in SLP are handled by creating the appropriate number of
4064 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4066 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4069 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4071 gcc_assert (ncopies
>= 1);
4073 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4076 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4079 /* Is vectorizable assignment? */
4080 if (!is_gimple_assign (stmt
))
4083 scalar_dest
= gimple_assign_lhs (stmt
);
4084 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4087 code
= gimple_assign_rhs_code (stmt
);
4088 if (gimple_assign_single_p (stmt
)
4089 || code
== PAREN_EXPR
4090 || CONVERT_EXPR_CODE_P (code
))
4091 op
= gimple_assign_rhs1 (stmt
);
4095 if (code
== VIEW_CONVERT_EXPR
)
4096 op
= TREE_OPERAND (op
, 0);
4098 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
4099 &def_stmt
, &def
, &dt
[0], &vectype_in
))
4101 if (dump_enabled_p ())
4102 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4103 "use not simple.\n");
4107 /* We can handle NOP_EXPR conversions that do not change the number
4108 of elements or the vector size. */
4109 if ((CONVERT_EXPR_CODE_P (code
)
4110 || code
== VIEW_CONVERT_EXPR
)
4112 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4113 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4114 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4117 /* We do not handle bit-precision changes. */
4118 if ((CONVERT_EXPR_CODE_P (code
)
4119 || code
== VIEW_CONVERT_EXPR
)
4120 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4121 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4122 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4123 || ((TYPE_PRECISION (TREE_TYPE (op
))
4124 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4125 /* But a conversion that does not change the bit-pattern is ok. */
4126 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4127 > TYPE_PRECISION (TREE_TYPE (op
)))
4128 && TYPE_UNSIGNED (TREE_TYPE (op
))))
4130 if (dump_enabled_p ())
4131 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4132 "type conversion to/from bit-precision "
4137 if (!vec_stmt
) /* transformation not required. */
4139 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4140 if (dump_enabled_p ())
4141 dump_printf_loc (MSG_NOTE
, vect_location
,
4142 "=== vectorizable_assignment ===\n");
4143 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4148 if (dump_enabled_p ())
4149 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4152 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4155 for (j
= 0; j
< ncopies
; j
++)
4159 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4161 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4163 /* Arguments are ready. create the new vector stmt. */
4164 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4166 if (CONVERT_EXPR_CODE_P (code
)
4167 || code
== VIEW_CONVERT_EXPR
)
4168 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4169 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4170 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4171 gimple_assign_set_lhs (new_stmt
, new_temp
);
4172 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4174 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4181 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4183 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4185 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4188 vec_oprnds
.release ();
4193 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4194 either as shift by a scalar or by a vector. */
4197 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4200 machine_mode vec_mode
;
4205 vectype
= get_vectype_for_scalar_type (scalar_type
);
4209 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4211 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4213 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4215 || (optab_handler (optab
, TYPE_MODE (vectype
))
4216 == CODE_FOR_nothing
))
4220 vec_mode
= TYPE_MODE (vectype
);
4221 icode
= (int) optab_handler (optab
, vec_mode
);
4222 if (icode
== CODE_FOR_nothing
)
4229 /* Function vectorizable_shift.
4231 Check if STMT performs a shift operation that can be vectorized.
4232 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4233 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4234 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4237 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
4238 gimple
*vec_stmt
, slp_tree slp_node
)
4242 tree op0
, op1
= NULL
;
4243 tree vec_oprnd1
= NULL_TREE
;
4244 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4246 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4247 enum tree_code code
;
4248 machine_mode vec_mode
;
4252 machine_mode optab_op2_mode
;
4255 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4256 gimple new_stmt
= NULL
;
4257 stmt_vec_info prev_stmt_info
;
4264 vec
<tree
> vec_oprnds0
= vNULL
;
4265 vec
<tree
> vec_oprnds1
= vNULL
;
4268 bool scalar_shift_arg
= true;
4269 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4272 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4275 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4278 /* Is STMT a vectorizable binary/unary operation? */
4279 if (!is_gimple_assign (stmt
))
4282 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4285 code
= gimple_assign_rhs_code (stmt
);
4287 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4288 || code
== RROTATE_EXPR
))
4291 scalar_dest
= gimple_assign_lhs (stmt
);
4292 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4293 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4294 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4296 if (dump_enabled_p ())
4297 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4298 "bit-precision shifts not supported.\n");
4302 op0
= gimple_assign_rhs1 (stmt
);
4303 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4304 &def_stmt
, &def
, &dt
[0], &vectype
))
4306 if (dump_enabled_p ())
4307 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4308 "use not simple.\n");
4311 /* If op0 is an external or constant def use a vector type with
4312 the same size as the output vector type. */
4314 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4316 gcc_assert (vectype
);
4319 if (dump_enabled_p ())
4320 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4321 "no vectype for scalar type\n");
4325 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4326 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4327 if (nunits_out
!= nunits_in
)
4330 op1
= gimple_assign_rhs2 (stmt
);
4331 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4332 &def
, &dt
[1], &op1_vectype
))
4334 if (dump_enabled_p ())
4335 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4336 "use not simple.\n");
4341 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4345 /* Multiple types in SLP are handled by creating the appropriate number of
4346 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4348 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4351 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4353 gcc_assert (ncopies
>= 1);
4355 /* Determine whether the shift amount is a vector, or scalar. If the
4356 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4358 if (dt
[1] == vect_internal_def
&& !slp_node
)
4359 scalar_shift_arg
= false;
4360 else if (dt
[1] == vect_constant_def
4361 || dt
[1] == vect_external_def
4362 || dt
[1] == vect_internal_def
)
4364 /* In SLP, need to check whether the shift count is the same,
4365 in loops if it is a constant or invariant, it is always
4369 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4372 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4373 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4374 scalar_shift_arg
= false;
4379 if (dump_enabled_p ())
4380 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4381 "operand mode requires invariant argument.\n");
4385 /* Vector shifted by vector. */
4386 if (!scalar_shift_arg
)
4388 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4389 if (dump_enabled_p ())
4390 dump_printf_loc (MSG_NOTE
, vect_location
,
4391 "vector/vector shift/rotate found.\n");
4394 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4395 if (op1_vectype
== NULL_TREE
4396 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4398 if (dump_enabled_p ())
4399 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4400 "unusable type for last operand in"
4401 " vector/vector shift/rotate.\n");
4405 /* See if the machine has a vector shifted by scalar insn and if not
4406 then see if it has a vector shifted by vector insn. */
4409 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4411 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4413 if (dump_enabled_p ())
4414 dump_printf_loc (MSG_NOTE
, vect_location
,
4415 "vector/scalar shift/rotate found.\n");
4419 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4421 && (optab_handler (optab
, TYPE_MODE (vectype
))
4422 != CODE_FOR_nothing
))
4424 scalar_shift_arg
= false;
4426 if (dump_enabled_p ())
4427 dump_printf_loc (MSG_NOTE
, vect_location
,
4428 "vector/vector shift/rotate found.\n");
4430 /* Unlike the other binary operators, shifts/rotates have
4431 the rhs being int, instead of the same type as the lhs,
4432 so make sure the scalar is the right type if we are
4433 dealing with vectors of long long/long/short/char. */
4434 if (dt
[1] == vect_constant_def
)
4435 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4436 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4440 && TYPE_MODE (TREE_TYPE (vectype
))
4441 != TYPE_MODE (TREE_TYPE (op1
)))
4443 if (dump_enabled_p ())
4444 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4445 "unusable type for last operand in"
4446 " vector/vector shift/rotate.\n");
4449 if (vec_stmt
&& !slp_node
)
4451 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4452 op1
= vect_init_vector (stmt
, op1
,
4453 TREE_TYPE (vectype
), NULL
);
4460 /* Supportable by target? */
4463 if (dump_enabled_p ())
4464 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4468 vec_mode
= TYPE_MODE (vectype
);
4469 icode
= (int) optab_handler (optab
, vec_mode
);
4470 if (icode
== CODE_FOR_nothing
)
4472 if (dump_enabled_p ())
4473 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4474 "op not supported by target.\n");
4475 /* Check only during analysis. */
4476 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4477 || (vf
< vect_min_worthwhile_factor (code
)
4480 if (dump_enabled_p ())
4481 dump_printf_loc (MSG_NOTE
, vect_location
,
4482 "proceeding using word mode.\n");
4485 /* Worthwhile without SIMD support? Check only during analysis. */
4486 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4487 && vf
< vect_min_worthwhile_factor (code
)
4490 if (dump_enabled_p ())
4491 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4492 "not worthwhile without SIMD support.\n");
4496 if (!vec_stmt
) /* transformation not required. */
4498 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4499 if (dump_enabled_p ())
4500 dump_printf_loc (MSG_NOTE
, vect_location
,
4501 "=== vectorizable_shift ===\n");
4502 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4508 if (dump_enabled_p ())
4509 dump_printf_loc (MSG_NOTE
, vect_location
,
4510 "transform binary/unary operation.\n");
4513 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4515 prev_stmt_info
= NULL
;
4516 for (j
= 0; j
< ncopies
; j
++)
4521 if (scalar_shift_arg
)
4523 /* Vector shl and shr insn patterns can be defined with scalar
4524 operand 2 (shift operand). In this case, use constant or loop
4525 invariant op1 directly, without extending it to vector mode
4527 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4528 if (!VECTOR_MODE_P (optab_op2_mode
))
4530 if (dump_enabled_p ())
4531 dump_printf_loc (MSG_NOTE
, vect_location
,
4532 "operand 1 using scalar mode.\n");
4534 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4535 vec_oprnds1
.quick_push (vec_oprnd1
);
4538 /* Store vec_oprnd1 for every vector stmt to be created
4539 for SLP_NODE. We check during the analysis that all
4540 the shift arguments are the same.
4541 TODO: Allow different constants for different vector
4542 stmts generated for an SLP instance. */
4543 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4544 vec_oprnds1
.quick_push (vec_oprnd1
);
4549 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4550 (a special case for certain kind of vector shifts); otherwise,
4551 operand 1 should be of a vector type (the usual case). */
4553 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4556 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4560 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4562 /* Arguments are ready. Create the new vector stmt. */
4563 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4565 vop1
= vec_oprnds1
[i
];
4566 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4567 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4568 gimple_assign_set_lhs (new_stmt
, new_temp
);
4569 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4571 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4578 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4580 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4581 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4584 vec_oprnds0
.release ();
4585 vec_oprnds1
.release ();
4591 /* Function vectorizable_operation.
4593 Check if STMT performs a binary, unary or ternary operation that can
4595 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4596 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4597 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4600 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
4601 gimple
*vec_stmt
, slp_tree slp_node
)
4605 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4606 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4608 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4609 enum tree_code code
;
4610 machine_mode vec_mode
;
4617 enum vect_def_type dt
[3]
4618 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4619 gimple new_stmt
= NULL
;
4620 stmt_vec_info prev_stmt_info
;
4626 vec
<tree
> vec_oprnds0
= vNULL
;
4627 vec
<tree
> vec_oprnds1
= vNULL
;
4628 vec
<tree
> vec_oprnds2
= vNULL
;
4629 tree vop0
, vop1
, vop2
;
4630 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4633 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4636 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4639 /* Is STMT a vectorizable binary/unary operation? */
4640 if (!is_gimple_assign (stmt
))
4643 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4646 code
= gimple_assign_rhs_code (stmt
);
4648 /* For pointer addition, we should use the normal plus for
4649 the vector addition. */
4650 if (code
== POINTER_PLUS_EXPR
)
4653 /* Support only unary or binary operations. */
4654 op_type
= TREE_CODE_LENGTH (code
);
4655 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4657 if (dump_enabled_p ())
4658 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4659 "num. args = %d (not unary/binary/ternary op).\n",
4664 scalar_dest
= gimple_assign_lhs (stmt
);
4665 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4667 /* Most operations cannot handle bit-precision types without extra
4669 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4670 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4671 /* Exception are bitwise binary operations. */
4672 && code
!= BIT_IOR_EXPR
4673 && code
!= BIT_XOR_EXPR
4674 && code
!= BIT_AND_EXPR
)
4676 if (dump_enabled_p ())
4677 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4678 "bit-precision arithmetic not supported.\n");
4682 op0
= gimple_assign_rhs1 (stmt
);
4683 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4684 &def_stmt
, &def
, &dt
[0], &vectype
))
4686 if (dump_enabled_p ())
4687 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4688 "use not simple.\n");
4691 /* If op0 is an external or constant def use a vector type with
4692 the same size as the output vector type. */
4694 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4696 gcc_assert (vectype
);
4699 if (dump_enabled_p ())
4701 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4702 "no vectype for scalar type ");
4703 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4705 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4711 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4712 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4713 if (nunits_out
!= nunits_in
)
4716 if (op_type
== binary_op
|| op_type
== ternary_op
)
4718 op1
= gimple_assign_rhs2 (stmt
);
4719 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4722 if (dump_enabled_p ())
4723 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4724 "use not simple.\n");
4728 if (op_type
== ternary_op
)
4730 op2
= gimple_assign_rhs3 (stmt
);
4731 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4734 if (dump_enabled_p ())
4735 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4736 "use not simple.\n");
4742 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4746 /* Multiple types in SLP are handled by creating the appropriate number of
4747 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4749 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4752 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4754 gcc_assert (ncopies
>= 1);
4756 /* Shifts are handled in vectorizable_shift (). */
4757 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4758 || code
== RROTATE_EXPR
)
4761 /* Supportable by target? */
4763 vec_mode
= TYPE_MODE (vectype
);
4764 if (code
== MULT_HIGHPART_EXPR
)
4766 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
4767 icode
= LAST_INSN_CODE
;
4769 icode
= CODE_FOR_nothing
;
4773 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4776 if (dump_enabled_p ())
4777 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4781 icode
= (int) optab_handler (optab
, vec_mode
);
4784 if (icode
== CODE_FOR_nothing
)
4786 if (dump_enabled_p ())
4787 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4788 "op not supported by target.\n");
4789 /* Check only during analysis. */
4790 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4791 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
4793 if (dump_enabled_p ())
4794 dump_printf_loc (MSG_NOTE
, vect_location
,
4795 "proceeding using word mode.\n");
4798 /* Worthwhile without SIMD support? Check only during analysis. */
4799 if (!VECTOR_MODE_P (vec_mode
)
4801 && vf
< vect_min_worthwhile_factor (code
))
4803 if (dump_enabled_p ())
4804 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4805 "not worthwhile without SIMD support.\n");
4809 if (!vec_stmt
) /* transformation not required. */
4811 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
4812 if (dump_enabled_p ())
4813 dump_printf_loc (MSG_NOTE
, vect_location
,
4814 "=== vectorizable_operation ===\n");
4815 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4821 if (dump_enabled_p ())
4822 dump_printf_loc (MSG_NOTE
, vect_location
,
4823 "transform binary/unary operation.\n");
4826 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4828 /* In case the vectorization factor (VF) is bigger than the number
4829 of elements that we can fit in a vectype (nunits), we have to generate
4830 more than one vector stmt - i.e - we need to "unroll" the
4831 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4832 from one copy of the vector stmt to the next, in the field
4833 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4834 stages to find the correct vector defs to be used when vectorizing
4835 stmts that use the defs of the current stmt. The example below
4836 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4837 we need to create 4 vectorized stmts):
4839 before vectorization:
4840 RELATED_STMT VEC_STMT
4844 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4846 RELATED_STMT VEC_STMT
4847 VS1_0: vx0 = memref0 VS1_1 -
4848 VS1_1: vx1 = memref1 VS1_2 -
4849 VS1_2: vx2 = memref2 VS1_3 -
4850 VS1_3: vx3 = memref3 - -
4851 S1: x = load - VS1_0
4854 step2: vectorize stmt S2 (done here):
4855 To vectorize stmt S2 we first need to find the relevant vector
4856 def for the first operand 'x'. This is, as usual, obtained from
4857 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4858 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4859 relevant vector def 'vx0'. Having found 'vx0' we can generate
4860 the vector stmt VS2_0, and as usual, record it in the
4861 STMT_VINFO_VEC_STMT of stmt S2.
4862 When creating the second copy (VS2_1), we obtain the relevant vector
4863 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4864 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4865 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4866 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4867 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4868 chain of stmts and pointers:
4869 RELATED_STMT VEC_STMT
4870 VS1_0: vx0 = memref0 VS1_1 -
4871 VS1_1: vx1 = memref1 VS1_2 -
4872 VS1_2: vx2 = memref2 VS1_3 -
4873 VS1_3: vx3 = memref3 - -
4874 S1: x = load - VS1_0
4875 VS2_0: vz0 = vx0 + v1 VS2_1 -
4876 VS2_1: vz1 = vx1 + v1 VS2_2 -
4877 VS2_2: vz2 = vx2 + v1 VS2_3 -
4878 VS2_3: vz3 = vx3 + v1 - -
4879 S2: z = x + 1 - VS2_0 */
4881 prev_stmt_info
= NULL
;
4882 for (j
= 0; j
< ncopies
; j
++)
4887 if (op_type
== binary_op
|| op_type
== ternary_op
)
4888 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4891 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4893 if (op_type
== ternary_op
)
4895 vec_oprnds2
.create (1);
4896 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
4903 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4904 if (op_type
== ternary_op
)
4906 tree vec_oprnd
= vec_oprnds2
.pop ();
4907 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
4912 /* Arguments are ready. Create the new vector stmt. */
4913 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4915 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
4916 ? vec_oprnds1
[i
] : NULL_TREE
);
4917 vop2
= ((op_type
== ternary_op
)
4918 ? vec_oprnds2
[i
] : NULL_TREE
);
4919 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
4920 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4921 gimple_assign_set_lhs (new_stmt
, new_temp
);
4922 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4924 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4931 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4933 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4934 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4937 vec_oprnds0
.release ();
4938 vec_oprnds1
.release ();
4939 vec_oprnds2
.release ();
4944 /* A helper function to ensure data reference DR's base alignment
4948 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
4953 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
4955 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4956 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
4958 if (decl_in_symtab_p (base_decl
))
4959 symtab_node::get (base_decl
)->increase_alignment (TYPE_ALIGN (vectype
));
4962 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
4963 DECL_USER_ALIGN (base_decl
) = 1;
4965 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
4970 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4971 reversal of the vector elements. If that is impossible to do,
4975 perm_mask_for_reverse (tree vectype
)
4980 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4981 sel
= XALLOCAVEC (unsigned char, nunits
);
4983 for (i
= 0; i
< nunits
; ++i
)
4984 sel
[i
] = nunits
- 1 - i
;
4986 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4988 return vect_gen_perm_mask_checked (vectype
, sel
);
4991 /* Function vectorizable_store.
4993 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4995 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4996 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4997 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5000 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5006 tree vec_oprnd
= NULL_TREE
;
5007 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5008 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5009 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5011 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5012 struct loop
*loop
= NULL
;
5013 machine_mode vec_mode
;
5015 enum dr_alignment_support alignment_support_scheme
;
5018 enum vect_def_type dt
;
5019 stmt_vec_info prev_stmt_info
= NULL
;
5020 tree dataref_ptr
= NULL_TREE
;
5021 tree dataref_offset
= NULL_TREE
;
5022 gimple ptr_incr
= NULL
;
5023 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5026 gimple next_stmt
, first_stmt
= NULL
;
5027 bool grouped_store
= false;
5028 bool store_lanes_p
= false;
5029 unsigned int group_size
, i
;
5030 vec
<tree
> dr_chain
= vNULL
;
5031 vec
<tree
> oprnds
= vNULL
;
5032 vec
<tree
> result_chain
= vNULL
;
5034 bool negative
= false;
5035 tree offset
= NULL_TREE
;
5036 vec
<tree
> vec_oprnds
= vNULL
;
5037 bool slp
= (slp_node
!= NULL
);
5038 unsigned int vec_num
;
5039 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5043 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5045 /* Multiple types in SLP are handled by creating the appropriate number of
5046 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5048 if (slp
|| PURE_SLP_STMT (stmt_info
))
5051 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5053 gcc_assert (ncopies
>= 1);
5055 /* FORNOW. This restriction should be relaxed. */
5056 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5058 if (dump_enabled_p ())
5059 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5060 "multiple types in nested loop.\n");
5064 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5067 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5070 /* Is vectorizable store? */
5072 if (!is_gimple_assign (stmt
))
5075 scalar_dest
= gimple_assign_lhs (stmt
);
5076 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5077 && is_pattern_stmt_p (stmt_info
))
5078 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5079 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5080 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5081 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5082 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5083 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5084 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5085 && TREE_CODE (scalar_dest
) != MEM_REF
)
5088 gcc_assert (gimple_assign_single_p (stmt
));
5089 op
= gimple_assign_rhs1 (stmt
);
5090 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5093 if (dump_enabled_p ())
5094 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5095 "use not simple.\n");
5099 elem_type
= TREE_TYPE (vectype
);
5100 vec_mode
= TYPE_MODE (vectype
);
5102 /* FORNOW. In some cases can vectorize even if data-type not supported
5103 (e.g. - array initialization with 0). */
5104 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5107 if (!STMT_VINFO_DATA_REF (stmt_info
))
5110 if (!STMT_VINFO_STRIDED_P (stmt_info
))
5113 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5114 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5115 size_zero_node
) < 0;
5116 if (negative
&& ncopies
> 1)
5118 if (dump_enabled_p ())
5119 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5120 "multiple types with negative step.\n");
5125 gcc_assert (!grouped_store
);
5126 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5127 if (alignment_support_scheme
!= dr_aligned
5128 && alignment_support_scheme
!= dr_unaligned_supported
)
5130 if (dump_enabled_p ())
5131 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5132 "negative step but alignment required.\n");
5135 if (dt
!= vect_constant_def
5136 && dt
!= vect_external_def
5137 && !perm_mask_for_reverse (vectype
))
5139 if (dump_enabled_p ())
5140 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5141 "negative step and reversing not supported.\n");
5147 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5149 grouped_store
= true;
5150 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5151 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5153 && !PURE_SLP_STMT (stmt_info
)
5154 && !STMT_VINFO_STRIDED_P (stmt_info
))
5156 if (vect_store_lanes_supported (vectype
, group_size
))
5157 store_lanes_p
= true;
5158 else if (!vect_grouped_store_supported (vectype
, group_size
))
5162 if (STMT_VINFO_STRIDED_P (stmt_info
)
5163 && (slp
|| PURE_SLP_STMT (stmt_info
))
5164 && (group_size
> nunits
5165 || nunits
% group_size
!= 0))
5167 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5168 "unhandled strided group store\n");
5172 if (first_stmt
== stmt
)
5174 /* STMT is the leader of the group. Check the operands of all the
5175 stmts of the group. */
5176 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5179 gcc_assert (gimple_assign_single_p (next_stmt
));
5180 op
= gimple_assign_rhs1 (next_stmt
);
5181 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
5182 &def_stmt
, &def
, &dt
))
5184 if (dump_enabled_p ())
5185 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5186 "use not simple.\n");
5189 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5194 if (!vec_stmt
) /* transformation not required. */
5196 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5197 /* The SLP costs are calculated during SLP analysis. */
5198 if (!PURE_SLP_STMT (stmt_info
))
5199 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5206 ensure_base_align (stmt_info
, dr
);
5210 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5211 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5213 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5216 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5218 /* We vectorize all the stmts of the interleaving group when we
5219 reach the last stmt in the group. */
5220 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5221 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5230 grouped_store
= false;
5231 /* VEC_NUM is the number of vect stmts to be created for this
5233 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5234 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5235 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5236 op
= gimple_assign_rhs1 (first_stmt
);
5239 /* VEC_NUM is the number of vect stmts to be created for this
5241 vec_num
= group_size
;
5247 group_size
= vec_num
= 1;
5250 if (dump_enabled_p ())
5251 dump_printf_loc (MSG_NOTE
, vect_location
,
5252 "transform store. ncopies = %d\n", ncopies
);
5254 if (STMT_VINFO_STRIDED_P (stmt_info
))
5256 gimple_stmt_iterator incr_gsi
;
5262 gimple_seq stmts
= NULL
;
5263 tree stride_base
, stride_step
, alias_off
;
5267 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5270 = fold_build_pointer_plus
5271 (unshare_expr (DR_BASE_ADDRESS (first_dr
)),
5272 size_binop (PLUS_EXPR
,
5273 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr
))),
5274 convert_to_ptrofftype (DR_INIT(first_dr
))));
5275 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (first_dr
)));
5277 /* For a store with loop-invariant (but other than power-of-2)
5278 stride (i.e. not a grouped access) like so:
5280 for (i = 0; i < n; i += stride)
5283 we generate a new induction variable and new stores from
5284 the components of the (vectorized) rhs:
5286 for (j = 0; ; j += VF*stride)
5291 array[j + stride] = tmp2;
5295 unsigned nstores
= nunits
;
5296 tree ltype
= elem_type
;
5299 nstores
= nunits
/ group_size
;
5300 if (group_size
< nunits
)
5301 ltype
= build_vector_type (elem_type
, group_size
);
5304 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
5305 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5309 ivstep
= stride_step
;
5310 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
5311 build_int_cst (TREE_TYPE (ivstep
),
5312 ncopies
* nstores
));
5314 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
5316 create_iv (stride_base
, ivstep
, NULL
,
5317 loop
, &incr_gsi
, insert_after
,
5319 incr
= gsi_stmt (incr_gsi
);
5320 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
5322 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
5324 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
5326 prev_stmt_info
= NULL
;
5327 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (first_dr
)), 0);
5328 next_stmt
= first_stmt
;
5329 for (g
= 0; g
< group_size
; g
++)
5331 running_off
= offvar
;
5334 tree size
= TYPE_SIZE_UNIT (ltype
);
5335 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
5337 tree newoff
= copy_ssa_name (running_off
, NULL
);
5338 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5340 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5341 running_off
= newoff
;
5343 for (j
= 0; j
< ncopies
; j
++)
5345 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5346 and first_stmt == stmt. */
5351 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
, NULL
,
5353 vec_oprnd
= vec_oprnds
[0];
5357 gcc_assert (gimple_assign_single_p (next_stmt
));
5358 op
= gimple_assign_rhs1 (next_stmt
);
5359 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5366 vec_oprnd
= vec_oprnds
[j
];
5369 vect_is_simple_use (vec_oprnd
, NULL
, loop_vinfo
,
5370 bb_vinfo
, &def_stmt
, &def
, &dt
);
5371 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
5375 for (i
= 0; i
< nstores
; i
++)
5377 tree newref
, newoff
;
5378 gimple incr
, assign
;
5379 tree size
= TYPE_SIZE (ltype
);
5380 /* Extract the i'th component. */
5381 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
5382 bitsize_int (i
), size
);
5383 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
5386 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
5390 newref
= build2 (MEM_REF
, ltype
,
5391 running_off
, alias_off
);
5393 /* And store it to *running_off. */
5394 assign
= gimple_build_assign (newref
, elem
);
5395 vect_finish_stmt_generation (stmt
, assign
, gsi
);
5397 newoff
= copy_ssa_name (running_off
, NULL
);
5398 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5399 running_off
, stride_step
);
5400 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5402 running_off
= newoff
;
5403 if (g
== group_size
- 1
5406 if (j
== 0 && i
== 0)
5407 STMT_VINFO_VEC_STMT (stmt_info
)
5408 = *vec_stmt
= assign
;
5410 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
5411 prev_stmt_info
= vinfo_for_stmt (assign
);
5415 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5420 dr_chain
.create (group_size
);
5421 oprnds
.create (group_size
);
5423 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5424 gcc_assert (alignment_support_scheme
);
5425 /* Targets with store-lane instructions must not require explicit
5427 gcc_assert (!store_lanes_p
5428 || alignment_support_scheme
== dr_aligned
5429 || alignment_support_scheme
== dr_unaligned_supported
);
5432 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5435 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5437 aggr_type
= vectype
;
5439 /* In case the vectorization factor (VF) is bigger than the number
5440 of elements that we can fit in a vectype (nunits), we have to generate
5441 more than one vector stmt - i.e - we need to "unroll" the
5442 vector stmt by a factor VF/nunits. For more details see documentation in
5443 vect_get_vec_def_for_copy_stmt. */
5445 /* In case of interleaving (non-unit grouped access):
5452 We create vectorized stores starting from base address (the access of the
5453 first stmt in the chain (S2 in the above example), when the last store stmt
5454 of the chain (S4) is reached:
5457 VS2: &base + vec_size*1 = vx0
5458 VS3: &base + vec_size*2 = vx1
5459 VS4: &base + vec_size*3 = vx3
5461 Then permutation statements are generated:
5463 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5464 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5467 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5468 (the order of the data-refs in the output of vect_permute_store_chain
5469 corresponds to the order of scalar stmts in the interleaving chain - see
5470 the documentation of vect_permute_store_chain()).
5472 In case of both multiple types and interleaving, above vector stores and
5473 permutation stmts are created for every copy. The result vector stmts are
5474 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5475 STMT_VINFO_RELATED_STMT for the next copies.
5478 prev_stmt_info
= NULL
;
5479 for (j
= 0; j
< ncopies
; j
++)
5487 /* Get vectorized arguments for SLP_NODE. */
5488 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5489 NULL
, slp_node
, -1);
5491 vec_oprnd
= vec_oprnds
[0];
5495 /* For interleaved stores we collect vectorized defs for all the
5496 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5497 used as an input to vect_permute_store_chain(), and OPRNDS as
5498 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5500 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5501 OPRNDS are of size 1. */
5502 next_stmt
= first_stmt
;
5503 for (i
= 0; i
< group_size
; i
++)
5505 /* Since gaps are not supported for interleaved stores,
5506 GROUP_SIZE is the exact number of stmts in the chain.
5507 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5508 there is no interleaving, GROUP_SIZE is 1, and only one
5509 iteration of the loop will be executed. */
5510 gcc_assert (next_stmt
5511 && gimple_assign_single_p (next_stmt
));
5512 op
= gimple_assign_rhs1 (next_stmt
);
5514 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5516 dr_chain
.quick_push (vec_oprnd
);
5517 oprnds
.quick_push (vec_oprnd
);
5518 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5522 /* We should have catched mismatched types earlier. */
5523 gcc_assert (useless_type_conversion_p (vectype
,
5524 TREE_TYPE (vec_oprnd
)));
5525 bool simd_lane_access_p
5526 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5527 if (simd_lane_access_p
5528 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5529 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5530 && integer_zerop (DR_OFFSET (first_dr
))
5531 && integer_zerop (DR_INIT (first_dr
))
5532 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5533 get_alias_set (DR_REF (first_dr
))))
5535 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5536 dataref_offset
= build_int_cst (reference_alias_ptr_type
5537 (DR_REF (first_dr
)), 0);
5542 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5543 simd_lane_access_p
? loop
: NULL
,
5544 offset
, &dummy
, gsi
, &ptr_incr
,
5545 simd_lane_access_p
, &inv_p
);
5546 gcc_assert (bb_vinfo
|| !inv_p
);
5550 /* For interleaved stores we created vectorized defs for all the
5551 defs stored in OPRNDS in the previous iteration (previous copy).
5552 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5553 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5555 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5556 OPRNDS are of size 1. */
5557 for (i
= 0; i
< group_size
; i
++)
5560 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5562 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5563 dr_chain
[i
] = vec_oprnd
;
5564 oprnds
[i
] = vec_oprnd
;
5568 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5569 TYPE_SIZE_UNIT (aggr_type
));
5571 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5572 TYPE_SIZE_UNIT (aggr_type
));
5579 /* Combine all the vectors into an array. */
5580 vec_array
= create_vector_array (vectype
, vec_num
);
5581 for (i
= 0; i
< vec_num
; i
++)
5583 vec_oprnd
= dr_chain
[i
];
5584 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5588 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5589 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5590 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5591 gimple_call_set_lhs (new_stmt
, data_ref
);
5592 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5600 result_chain
.create (group_size
);
5602 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5606 next_stmt
= first_stmt
;
5607 for (i
= 0; i
< vec_num
; i
++)
5609 unsigned align
, misalign
;
5612 /* Bump the vector pointer. */
5613 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5617 vec_oprnd
= vec_oprnds
[i
];
5618 else if (grouped_store
)
5619 /* For grouped stores vectorized defs are interleaved in
5620 vect_permute_store_chain(). */
5621 vec_oprnd
= result_chain
[i
];
5623 data_ref
= fold_build2 (MEM_REF
, TREE_TYPE (vec_oprnd
),
5627 : build_int_cst (reference_alias_ptr_type
5628 (DR_REF (first_dr
)), 0));
5629 align
= TYPE_ALIGN_UNIT (vectype
);
5630 if (aligned_access_p (first_dr
))
5632 else if (DR_MISALIGNMENT (first_dr
) == -1)
5634 TREE_TYPE (data_ref
)
5635 = build_aligned_type (TREE_TYPE (data_ref
),
5636 TYPE_ALIGN (elem_type
));
5637 align
= TYPE_ALIGN_UNIT (elem_type
);
5642 TREE_TYPE (data_ref
)
5643 = build_aligned_type (TREE_TYPE (data_ref
),
5644 TYPE_ALIGN (elem_type
));
5645 misalign
= DR_MISALIGNMENT (first_dr
);
5647 if (dataref_offset
== NULL_TREE
5648 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
5649 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
5653 && dt
!= vect_constant_def
5654 && dt
!= vect_external_def
)
5656 tree perm_mask
= perm_mask_for_reverse (vectype
);
5658 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
5660 tree new_temp
= make_ssa_name (perm_dest
);
5662 /* Generate the permute statement. */
5664 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
5665 vec_oprnd
, perm_mask
);
5666 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5668 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5669 vec_oprnd
= new_temp
;
5672 /* Arguments are ready. Create the new vector stmt. */
5673 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
5674 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5679 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5687 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5689 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5690 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5694 dr_chain
.release ();
5696 result_chain
.release ();
5697 vec_oprnds
.release ();
5702 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5703 VECTOR_CST mask. No checks are made that the target platform supports the
5704 mask, so callers may wish to test can_vec_perm_p separately, or use
5705 vect_gen_perm_mask_checked. */
5708 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
5710 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
5713 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5715 mask_elt_type
= lang_hooks
.types
.type_for_mode
5716 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
5717 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
5719 mask_elts
= XALLOCAVEC (tree
, nunits
);
5720 for (i
= nunits
- 1; i
>= 0; i
--)
5721 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
5722 mask_vec
= build_vector (mask_type
, mask_elts
);
5727 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5728 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5731 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
5733 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
5734 return vect_gen_perm_mask_any (vectype
, sel
);
5737 /* Given a vector variable X and Y, that was generated for the scalar
5738 STMT, generate instructions to permute the vector elements of X and Y
5739 using permutation mask MASK_VEC, insert them at *GSI and return the
5740 permuted vector variable. */
5743 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
5744 gimple_stmt_iterator
*gsi
)
5746 tree vectype
= TREE_TYPE (x
);
5747 tree perm_dest
, data_ref
;
5750 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
5751 data_ref
= make_ssa_name (perm_dest
);
5753 /* Generate the permute statement. */
5754 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
5755 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5760 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5761 inserting them on the loops preheader edge. Returns true if we
5762 were successful in doing so (and thus STMT can be moved then),
5763 otherwise returns false. */
5766 hoist_defs_of_uses (gimple stmt
, struct loop
*loop
)
5772 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5774 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5775 if (!gimple_nop_p (def_stmt
)
5776 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5778 /* Make sure we don't need to recurse. While we could do
5779 so in simple cases when there are more complex use webs
5780 we don't have an easy way to preserve stmt order to fulfil
5781 dependencies within them. */
5784 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
5786 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
5788 gimple def_stmt2
= SSA_NAME_DEF_STMT (op2
);
5789 if (!gimple_nop_p (def_stmt2
)
5790 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
5800 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5802 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5803 if (!gimple_nop_p (def_stmt
)
5804 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5806 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
5807 gsi_remove (&gsi
, false);
5808 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
5815 /* vectorizable_load.
5817 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5819 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5820 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5821 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5824 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5825 slp_tree slp_node
, slp_instance slp_node_instance
)
5828 tree vec_dest
= NULL
;
5829 tree data_ref
= NULL
;
5830 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5831 stmt_vec_info prev_stmt_info
;
5832 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5833 struct loop
*loop
= NULL
;
5834 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
5835 bool nested_in_vect_loop
= false;
5836 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5837 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5841 gimple new_stmt
= NULL
;
5843 enum dr_alignment_support alignment_support_scheme
;
5844 tree dataref_ptr
= NULL_TREE
;
5845 tree dataref_offset
= NULL_TREE
;
5846 gimple ptr_incr
= NULL
;
5847 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5849 int i
, j
, group_size
= -1, group_gap_adj
;
5850 tree msq
= NULL_TREE
, lsq
;
5851 tree offset
= NULL_TREE
;
5852 tree byte_offset
= NULL_TREE
;
5853 tree realignment_token
= NULL_TREE
;
5855 vec
<tree
> dr_chain
= vNULL
;
5856 bool grouped_load
= false;
5857 bool load_lanes_p
= false;
5860 bool negative
= false;
5861 bool compute_in_loop
= false;
5862 struct loop
*at_loop
;
5864 bool slp
= (slp_node
!= NULL
);
5865 bool slp_perm
= false;
5866 enum tree_code code
;
5867 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5870 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
5871 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
5872 int gather_scale
= 1;
5873 enum vect_def_type gather_dt
= vect_unknown_def_type
;
5877 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5878 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
5879 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5884 /* Multiple types in SLP are handled by creating the appropriate number of
5885 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5887 if (slp
|| PURE_SLP_STMT (stmt_info
))
5890 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5892 gcc_assert (ncopies
>= 1);
5894 /* FORNOW. This restriction should be relaxed. */
5895 if (nested_in_vect_loop
&& ncopies
> 1)
5897 if (dump_enabled_p ())
5898 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5899 "multiple types in nested loop.\n");
5903 /* Invalidate assumptions made by dependence analysis when vectorization
5904 on the unrolled body effectively re-orders stmts. */
5906 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5907 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5908 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5910 if (dump_enabled_p ())
5911 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5912 "cannot perform implicit CSE when unrolling "
5913 "with negative dependence distance\n");
5917 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5920 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5923 /* Is vectorizable load? */
5924 if (!is_gimple_assign (stmt
))
5927 scalar_dest
= gimple_assign_lhs (stmt
);
5928 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5931 code
= gimple_assign_rhs_code (stmt
);
5932 if (code
!= ARRAY_REF
5933 && code
!= BIT_FIELD_REF
5934 && code
!= INDIRECT_REF
5935 && code
!= COMPONENT_REF
5936 && code
!= IMAGPART_EXPR
5937 && code
!= REALPART_EXPR
5939 && TREE_CODE_CLASS (code
) != tcc_declaration
)
5942 if (!STMT_VINFO_DATA_REF (stmt_info
))
5945 elem_type
= TREE_TYPE (vectype
);
5946 mode
= TYPE_MODE (vectype
);
5948 /* FORNOW. In some cases can vectorize even if data-type not supported
5949 (e.g. - data copies). */
5950 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
5952 if (dump_enabled_p ())
5953 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5954 "Aligned load, but unsupported type.\n");
5958 /* Check if the load is a part of an interleaving chain. */
5959 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5961 grouped_load
= true;
5963 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
5965 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5967 /* If this is single-element interleaving with an element distance
5968 that leaves unused vector loads around punt - we at least create
5969 very sub-optimal code in that case (and blow up memory,
5971 if (first_stmt
== stmt
5972 && !GROUP_NEXT_ELEMENT (stmt_info
)
5973 && GROUP_SIZE (stmt_info
) > TYPE_VECTOR_SUBPARTS (vectype
))
5975 if (dump_enabled_p ())
5976 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5977 "single-element interleaving not supported "
5978 "for not adjacent vector loads\n");
5982 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
5985 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5987 && !PURE_SLP_STMT (stmt_info
)
5988 && !STMT_VINFO_STRIDED_P (stmt_info
))
5990 if (vect_load_lanes_supported (vectype
, group_size
))
5991 load_lanes_p
= true;
5992 else if (!vect_grouped_load_supported (vectype
, group_size
))
5996 /* Invalidate assumptions made by dependence analysis when vectorization
5997 on the unrolled body effectively re-orders stmts. */
5998 if (!PURE_SLP_STMT (stmt_info
)
5999 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
6000 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
6001 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
6003 if (dump_enabled_p ())
6004 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6005 "cannot perform implicit CSE when performing "
6006 "group loads with negative dependence distance\n");
6010 /* Similarly when the stmt is a load that is both part of a SLP
6011 instance and a loop vectorized stmt via the same-dr mechanism
6012 we have to give up. */
6013 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
6014 && (STMT_SLP_TYPE (stmt_info
)
6015 != STMT_SLP_TYPE (vinfo_for_stmt
6016 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
6018 if (dump_enabled_p ())
6019 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6020 "conflicting SLP types for CSEd load\n");
6026 if (STMT_VINFO_GATHER_P (stmt_info
))
6030 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
6031 &gather_off
, &gather_scale
);
6032 gcc_assert (gather_decl
);
6033 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
6034 &def_stmt
, &def
, &gather_dt
,
6035 &gather_off_vectype
))
6037 if (dump_enabled_p ())
6038 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6039 "gather index use not simple.\n");
6043 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6046 && (slp
|| PURE_SLP_STMT (stmt_info
)))
6047 && (group_size
> nunits
6048 || nunits
% group_size
!= 0))
6050 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6051 "unhandled strided group load\n");
6057 negative
= tree_int_cst_compare (nested_in_vect_loop
6058 ? STMT_VINFO_DR_STEP (stmt_info
)
6060 size_zero_node
) < 0;
6061 if (negative
&& ncopies
> 1)
6063 if (dump_enabled_p ())
6064 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6065 "multiple types with negative step.\n");
6073 if (dump_enabled_p ())
6074 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6075 "negative step for group load not supported"
6079 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
6080 if (alignment_support_scheme
!= dr_aligned
6081 && alignment_support_scheme
!= dr_unaligned_supported
)
6083 if (dump_enabled_p ())
6084 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6085 "negative step but alignment required.\n");
6088 if (!perm_mask_for_reverse (vectype
))
6090 if (dump_enabled_p ())
6091 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6092 "negative step and reversing not supported."
6099 if (!vec_stmt
) /* transformation not required. */
6101 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6102 /* The SLP costs are calculated during SLP analysis. */
6103 if (!PURE_SLP_STMT (stmt_info
))
6104 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
,
6109 if (dump_enabled_p ())
6110 dump_printf_loc (MSG_NOTE
, vect_location
,
6111 "transform load. ncopies = %d\n", ncopies
);
6115 ensure_base_align (stmt_info
, dr
);
6117 if (STMT_VINFO_GATHER_P (stmt_info
))
6119 tree vec_oprnd0
= NULL_TREE
, op
;
6120 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
6121 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6122 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6123 edge pe
= loop_preheader_edge (loop
);
6126 enum { NARROW
, NONE
, WIDEN
} modifier
;
6127 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
6129 if (nunits
== gather_off_nunits
)
6131 else if (nunits
== gather_off_nunits
/ 2)
6133 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
6136 for (i
= 0; i
< gather_off_nunits
; ++i
)
6137 sel
[i
] = i
| nunits
;
6139 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
6141 else if (nunits
== gather_off_nunits
* 2)
6143 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
6146 for (i
= 0; i
< nunits
; ++i
)
6147 sel
[i
] = i
< gather_off_nunits
6148 ? i
: i
+ nunits
- gather_off_nunits
;
6150 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6156 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
6157 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6158 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6159 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6160 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6161 scaletype
= TREE_VALUE (arglist
);
6162 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6164 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6166 ptr
= fold_convert (ptrtype
, gather_base
);
6167 if (!is_gimple_min_invariant (ptr
))
6169 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6170 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6171 gcc_assert (!new_bb
);
6174 /* Currently we support only unconditional gather loads,
6175 so mask should be all ones. */
6176 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6177 mask
= build_int_cst (masktype
, -1);
6178 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6180 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6181 mask
= build_vector_from_val (masktype
, mask
);
6182 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6184 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6188 for (j
= 0; j
< 6; ++j
)
6190 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6191 mask
= build_real (TREE_TYPE (masktype
), r
);
6192 mask
= build_vector_from_val (masktype
, mask
);
6193 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6198 scale
= build_int_cst (scaletype
, gather_scale
);
6200 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6201 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6202 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6206 for (j
= 0; j
< 6; ++j
)
6208 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6209 merge
= build_real (TREE_TYPE (rettype
), r
);
6213 merge
= build_vector_from_val (rettype
, merge
);
6214 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6216 prev_stmt_info
= NULL
;
6217 for (j
= 0; j
< ncopies
; ++j
)
6219 if (modifier
== WIDEN
&& (j
& 1))
6220 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6221 perm_mask
, stmt
, gsi
);
6224 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
6227 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
6229 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6231 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6232 == TYPE_VECTOR_SUBPARTS (idxtype
));
6233 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
6234 var
= make_ssa_name (var
);
6235 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6237 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6238 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6243 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
6245 if (!useless_type_conversion_p (vectype
, rettype
))
6247 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6248 == TYPE_VECTOR_SUBPARTS (rettype
));
6249 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
6250 op
= make_ssa_name (var
, new_stmt
);
6251 gimple_call_set_lhs (new_stmt
, op
);
6252 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6253 var
= make_ssa_name (vec_dest
);
6254 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6256 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6260 var
= make_ssa_name (vec_dest
, new_stmt
);
6261 gimple_call_set_lhs (new_stmt
, var
);
6264 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6266 if (modifier
== NARROW
)
6273 var
= permute_vec_elements (prev_res
, var
,
6274 perm_mask
, stmt
, gsi
);
6275 new_stmt
= SSA_NAME_DEF_STMT (var
);
6278 if (prev_stmt_info
== NULL
)
6279 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6281 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6282 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6286 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6288 gimple_stmt_iterator incr_gsi
;
6294 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6295 gimple_seq stmts
= NULL
;
6296 tree stride_base
, stride_step
, alias_off
;
6298 gcc_assert (!nested_in_vect_loop
);
6300 if (slp
&& grouped_load
)
6301 first_dr
= STMT_VINFO_DATA_REF
6302 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
)));
6307 = fold_build_pointer_plus
6308 (DR_BASE_ADDRESS (first_dr
),
6309 size_binop (PLUS_EXPR
,
6310 convert_to_ptrofftype (DR_OFFSET (first_dr
)),
6311 convert_to_ptrofftype (DR_INIT (first_dr
))));
6312 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr
));
6314 /* For a load with loop-invariant (but other than power-of-2)
6315 stride (i.e. not a grouped access) like so:
6317 for (i = 0; i < n; i += stride)
6320 we generate a new induction variable and new accesses to
6321 form a new vector (or vectors, depending on ncopies):
6323 for (j = 0; ; j += VF*stride)
6325 tmp2 = array[j + stride];
6327 vectemp = {tmp1, tmp2, ...}
6330 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
6331 build_int_cst (TREE_TYPE (stride_step
), vf
));
6333 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6335 create_iv (unshare_expr (stride_base
), unshare_expr (ivstep
), NULL
,
6336 loop
, &incr_gsi
, insert_after
,
6338 incr
= gsi_stmt (incr_gsi
);
6339 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
6341 stride_step
= force_gimple_operand (unshare_expr (stride_step
),
6342 &stmts
, true, NULL_TREE
);
6344 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6346 prev_stmt_info
= NULL
;
6347 running_off
= offvar
;
6348 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (first_dr
)), 0);
6349 int nloads
= nunits
;
6350 tree ltype
= TREE_TYPE (vectype
);
6351 auto_vec
<tree
> dr_chain
;
6354 nloads
= nunits
/ group_size
;
6355 if (group_size
< nunits
)
6356 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
6359 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
6360 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6362 dr_chain
.create (ncopies
);
6364 for (j
= 0; j
< ncopies
; j
++)
6370 vec_alloc (v
, nloads
);
6371 for (i
= 0; i
< nloads
; i
++)
6373 tree newref
, newoff
;
6375 newref
= build2 (MEM_REF
, ltype
, running_off
, alias_off
);
6377 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
6380 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
6381 newoff
= copy_ssa_name (running_off
);
6382 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6383 running_off
, stride_step
);
6384 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6386 running_off
= newoff
;
6389 vec_inv
= build_constructor (vectype
, v
);
6390 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6391 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6395 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
6396 build2 (MEM_REF
, ltype
,
6397 running_off
, alias_off
));
6398 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6400 tree newoff
= copy_ssa_name (running_off
);
6401 gimple incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6402 running_off
, stride_step
);
6403 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6405 running_off
= newoff
;
6410 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6412 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
6417 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6419 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6420 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6424 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6425 slp_node_instance
, false);
6431 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6433 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
6434 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
6435 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6437 /* Check if the chain of loads is already vectorized. */
6438 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6439 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6440 ??? But we can only do so if there is exactly one
6441 as we have no way to get at the rest. Leave the CSE
6443 ??? With the group load eventually participating
6444 in multiple different permutations (having multiple
6445 slp nodes which refer to the same group) the CSE
6446 is even wrong code. See PR56270. */
6449 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6452 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6453 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6456 /* VEC_NUM is the number of vect stmts to be created for this group. */
6459 grouped_load
= false;
6460 /* For SLP permutation support we need to load the whole group,
6461 not only the number of vector stmts the permutation result
6464 vec_num
= (group_size
* vf
+ nunits
- 1) / nunits
;
6466 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6467 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
6470 vec_num
= group_size
;
6476 group_size
= vec_num
= 1;
6480 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6481 gcc_assert (alignment_support_scheme
);
6482 /* Targets with load-lane instructions must not require explicit
6484 gcc_assert (!load_lanes_p
6485 || alignment_support_scheme
== dr_aligned
6486 || alignment_support_scheme
== dr_unaligned_supported
);
6488 /* In case the vectorization factor (VF) is bigger than the number
6489 of elements that we can fit in a vectype (nunits), we have to generate
6490 more than one vector stmt - i.e - we need to "unroll" the
6491 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6492 from one copy of the vector stmt to the next, in the field
6493 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6494 stages to find the correct vector defs to be used when vectorizing
6495 stmts that use the defs of the current stmt. The example below
6496 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6497 need to create 4 vectorized stmts):
6499 before vectorization:
6500 RELATED_STMT VEC_STMT
6504 step 1: vectorize stmt S1:
6505 We first create the vector stmt VS1_0, and, as usual, record a
6506 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6507 Next, we create the vector stmt VS1_1, and record a pointer to
6508 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6509 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6511 RELATED_STMT VEC_STMT
6512 VS1_0: vx0 = memref0 VS1_1 -
6513 VS1_1: vx1 = memref1 VS1_2 -
6514 VS1_2: vx2 = memref2 VS1_3 -
6515 VS1_3: vx3 = memref3 - -
6516 S1: x = load - VS1_0
6519 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6520 information we recorded in RELATED_STMT field is used to vectorize
6523 /* In case of interleaving (non-unit grouped access):
6530 Vectorized loads are created in the order of memory accesses
6531 starting from the access of the first stmt of the chain:
6534 VS2: vx1 = &base + vec_size*1
6535 VS3: vx3 = &base + vec_size*2
6536 VS4: vx4 = &base + vec_size*3
6538 Then permutation statements are generated:
6540 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6541 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6544 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6545 (the order of the data-refs in the output of vect_permute_load_chain
6546 corresponds to the order of scalar stmts in the interleaving chain - see
6547 the documentation of vect_permute_load_chain()).
6548 The generation of permutation stmts and recording them in
6549 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6551 In case of both multiple types and interleaving, the vector loads and
6552 permutation stmts above are created for every copy. The result vector
6553 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6554 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6556 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6557 on a target that supports unaligned accesses (dr_unaligned_supported)
6558 we generate the following code:
6562 p = p + indx * vectype_size;
6567 Otherwise, the data reference is potentially unaligned on a target that
6568 does not support unaligned accesses (dr_explicit_realign_optimized) -
6569 then generate the following code, in which the data in each iteration is
6570 obtained by two vector loads, one from the previous iteration, and one
6571 from the current iteration:
6573 msq_init = *(floor(p1))
6574 p2 = initial_addr + VS - 1;
6575 realignment_token = call target_builtin;
6578 p2 = p2 + indx * vectype_size
6580 vec_dest = realign_load (msq, lsq, realignment_token)
6585 /* If the misalignment remains the same throughout the execution of the
6586 loop, we can create the init_addr and permutation mask at the loop
6587 preheader. Otherwise, it needs to be created inside the loop.
6588 This can only occur when vectorizing memory accesses in the inner-loop
6589 nested within an outer-loop that is being vectorized. */
6591 if (nested_in_vect_loop
6592 && (TREE_INT_CST_LOW (DR_STEP (dr
))
6593 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
6595 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
6596 compute_in_loop
= true;
6599 if ((alignment_support_scheme
== dr_explicit_realign_optimized
6600 || alignment_support_scheme
== dr_explicit_realign
)
6601 && !compute_in_loop
)
6603 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
6604 alignment_support_scheme
, NULL_TREE
,
6606 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6608 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
6609 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
6617 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6620 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6622 aggr_type
= vectype
;
6624 prev_stmt_info
= NULL
;
6625 for (j
= 0; j
< ncopies
; j
++)
6627 /* 1. Create the vector or array pointer update chain. */
6630 bool simd_lane_access_p
6631 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6632 if (simd_lane_access_p
6633 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6634 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6635 && integer_zerop (DR_OFFSET (first_dr
))
6636 && integer_zerop (DR_INIT (first_dr
))
6637 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6638 get_alias_set (DR_REF (first_dr
)))
6639 && (alignment_support_scheme
== dr_aligned
6640 || alignment_support_scheme
== dr_unaligned_supported
))
6642 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6643 dataref_offset
= build_int_cst (reference_alias_ptr_type
6644 (DR_REF (first_dr
)), 0);
6649 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
6650 offset
, &dummy
, gsi
, &ptr_incr
,
6651 simd_lane_access_p
, &inv_p
,
6654 else if (dataref_offset
)
6655 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
6656 TYPE_SIZE_UNIT (aggr_type
));
6658 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6659 TYPE_SIZE_UNIT (aggr_type
));
6661 if (grouped_load
|| slp_perm
)
6662 dr_chain
.create (vec_num
);
6668 vec_array
= create_vector_array (vectype
, vec_num
);
6671 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6672 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
6673 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
6674 gimple_call_set_lhs (new_stmt
, vec_array
);
6675 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6677 /* Extract each vector into an SSA_NAME. */
6678 for (i
= 0; i
< vec_num
; i
++)
6680 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
6682 dr_chain
.quick_push (new_temp
);
6685 /* Record the mapping between SSA_NAMEs and statements. */
6686 vect_record_grouped_load_vectors (stmt
, dr_chain
);
6690 for (i
= 0; i
< vec_num
; i
++)
6693 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6696 /* 2. Create the vector-load in the loop. */
6697 switch (alignment_support_scheme
)
6700 case dr_unaligned_supported
:
6702 unsigned int align
, misalign
;
6705 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
6708 : build_int_cst (reference_alias_ptr_type
6709 (DR_REF (first_dr
)), 0));
6710 align
= TYPE_ALIGN_UNIT (vectype
);
6711 if (alignment_support_scheme
== dr_aligned
)
6713 gcc_assert (aligned_access_p (first_dr
));
6716 else if (DR_MISALIGNMENT (first_dr
) == -1)
6718 TREE_TYPE (data_ref
)
6719 = build_aligned_type (TREE_TYPE (data_ref
),
6720 TYPE_ALIGN (elem_type
));
6721 align
= TYPE_ALIGN_UNIT (elem_type
);
6726 TREE_TYPE (data_ref
)
6727 = build_aligned_type (TREE_TYPE (data_ref
),
6728 TYPE_ALIGN (elem_type
));
6729 misalign
= DR_MISALIGNMENT (first_dr
);
6731 if (dataref_offset
== NULL_TREE
6732 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
6733 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
6737 case dr_explicit_realign
:
6741 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
6743 if (compute_in_loop
)
6744 msq
= vect_setup_realignment (first_stmt
, gsi
,
6746 dr_explicit_realign
,
6749 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
6750 ptr
= copy_ssa_name (dataref_ptr
);
6752 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
6753 new_stmt
= gimple_build_assign
6754 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
6756 (TREE_TYPE (dataref_ptr
),
6757 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6758 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6760 = build2 (MEM_REF
, vectype
, ptr
,
6761 build_int_cst (reference_alias_ptr_type
6762 (DR_REF (first_dr
)), 0));
6763 vec_dest
= vect_create_destination_var (scalar_dest
,
6765 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6766 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6767 gimple_assign_set_lhs (new_stmt
, new_temp
);
6768 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
6769 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
6770 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6773 bump
= size_binop (MULT_EXPR
, vs
,
6774 TYPE_SIZE_UNIT (elem_type
));
6775 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
6776 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
6777 new_stmt
= gimple_build_assign
6778 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
6781 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6782 ptr
= copy_ssa_name (ptr
, new_stmt
);
6783 gimple_assign_set_lhs (new_stmt
, ptr
);
6784 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6786 = build2 (MEM_REF
, vectype
, ptr
,
6787 build_int_cst (reference_alias_ptr_type
6788 (DR_REF (first_dr
)), 0));
6791 case dr_explicit_realign_optimized
:
6792 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
6793 new_temp
= copy_ssa_name (dataref_ptr
);
6795 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
6796 new_stmt
= gimple_build_assign
6797 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
6799 (TREE_TYPE (dataref_ptr
),
6800 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6801 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6803 = build2 (MEM_REF
, vectype
, new_temp
,
6804 build_int_cst (reference_alias_ptr_type
6805 (DR_REF (first_dr
)), 0));
6810 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6811 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6812 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6813 gimple_assign_set_lhs (new_stmt
, new_temp
);
6814 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6816 /* 3. Handle explicit realignment if necessary/supported.
6818 vec_dest = realign_load (msq, lsq, realignment_token) */
6819 if (alignment_support_scheme
== dr_explicit_realign_optimized
6820 || alignment_support_scheme
== dr_explicit_realign
)
6822 lsq
= gimple_assign_lhs (new_stmt
);
6823 if (!realignment_token
)
6824 realignment_token
= dataref_ptr
;
6825 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6826 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
6827 msq
, lsq
, realignment_token
);
6828 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6829 gimple_assign_set_lhs (new_stmt
, new_temp
);
6830 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6832 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6835 if (i
== vec_num
- 1 && j
== ncopies
- 1)
6836 add_phi_arg (phi
, lsq
,
6837 loop_latch_edge (containing_loop
),
6843 /* 4. Handle invariant-load. */
6844 if (inv_p
&& !bb_vinfo
)
6846 gcc_assert (!grouped_load
);
6847 /* If we have versioned for aliasing or the loop doesn't
6848 have any data dependencies that would preclude this,
6849 then we are sure this is a loop invariant load and
6850 thus we can insert it on the preheader edge. */
6851 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
6852 && !nested_in_vect_loop
6853 && hoist_defs_of_uses (stmt
, loop
))
6855 if (dump_enabled_p ())
6857 dump_printf_loc (MSG_NOTE
, vect_location
,
6858 "hoisting out of the vectorized "
6860 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6862 tree tem
= copy_ssa_name (scalar_dest
);
6863 gsi_insert_on_edge_immediate
6864 (loop_preheader_edge (loop
),
6865 gimple_build_assign (tem
,
6867 (gimple_assign_rhs1 (stmt
))));
6868 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
6872 gimple_stmt_iterator gsi2
= *gsi
;
6874 new_temp
= vect_init_vector (stmt
, scalar_dest
,
6877 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6878 set_vinfo_for_stmt (new_stmt
,
6879 new_stmt_vec_info (new_stmt
, loop_vinfo
,
6885 tree perm_mask
= perm_mask_for_reverse (vectype
);
6886 new_temp
= permute_vec_elements (new_temp
, new_temp
,
6887 perm_mask
, stmt
, gsi
);
6888 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6891 /* Collect vector loads and later create their permutation in
6892 vect_transform_grouped_load (). */
6893 if (grouped_load
|| slp_perm
)
6894 dr_chain
.quick_push (new_temp
);
6896 /* Store vector loads in the corresponding SLP_NODE. */
6897 if (slp
&& !slp_perm
)
6898 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6900 /* Bump the vector pointer to account for a gap or for excess
6901 elements loaded for a permuted SLP load. */
6902 if (group_gap_adj
!= 0)
6906 = wide_int_to_tree (sizetype
,
6907 wi::smul (TYPE_SIZE_UNIT (elem_type
),
6908 group_gap_adj
, &ovf
));
6909 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6914 if (slp
&& !slp_perm
)
6919 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6920 slp_node_instance
, false))
6922 dr_chain
.release ();
6931 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
6932 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6937 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6939 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6940 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6943 dr_chain
.release ();
6949 /* Function vect_is_simple_cond.
6952 LOOP - the loop that is being vectorized.
6953 COND - Condition that is checked for simple use.
6956 *COMP_VECTYPE - the vector type for the comparison.
6958 Returns whether a COND can be vectorized. Checks whether
6959 condition operands are supportable using vec_is_simple_use. */
6962 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
6963 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
6967 enum vect_def_type dt
;
6968 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
6970 if (!COMPARISON_CLASS_P (cond
))
6973 lhs
= TREE_OPERAND (cond
, 0);
6974 rhs
= TREE_OPERAND (cond
, 1);
6976 if (TREE_CODE (lhs
) == SSA_NAME
)
6978 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
6979 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
6980 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
6983 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
6984 && TREE_CODE (lhs
) != FIXED_CST
)
6987 if (TREE_CODE (rhs
) == SSA_NAME
)
6989 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6990 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
6991 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
6994 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
6995 && TREE_CODE (rhs
) != FIXED_CST
)
6998 *comp_vectype
= vectype1
? vectype1
: vectype2
;
7002 /* vectorizable_condition.
7004 Check if STMT is conditional modify expression that can be vectorized.
7005 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7006 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7009 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7010 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7011 else caluse if it is 2).
7013 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7016 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
7017 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
7020 tree scalar_dest
= NULL_TREE
;
7021 tree vec_dest
= NULL_TREE
;
7022 tree cond_expr
, then_clause
, else_clause
;
7023 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7024 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7025 tree comp_vectype
= NULL_TREE
;
7026 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
7027 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
7028 tree vec_compare
, vec_cond_expr
;
7030 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7032 enum vect_def_type dt
, dts
[4];
7033 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7035 enum tree_code code
;
7036 stmt_vec_info prev_stmt_info
= NULL
;
7038 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7039 vec
<tree
> vec_oprnds0
= vNULL
;
7040 vec
<tree
> vec_oprnds1
= vNULL
;
7041 vec
<tree
> vec_oprnds2
= vNULL
;
7042 vec
<tree
> vec_oprnds3
= vNULL
;
7045 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
7048 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
7050 gcc_assert (ncopies
>= 1);
7051 if (reduc_index
&& ncopies
> 1)
7052 return false; /* FORNOW */
7054 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
7057 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7060 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7061 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
7065 /* FORNOW: not yet supported. */
7066 if (STMT_VINFO_LIVE_P (stmt_info
))
7068 if (dump_enabled_p ())
7069 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7070 "value used after loop.\n");
7074 /* Is vectorizable conditional operation? */
7075 if (!is_gimple_assign (stmt
))
7078 code
= gimple_assign_rhs_code (stmt
);
7080 if (code
!= COND_EXPR
)
7083 cond_expr
= gimple_assign_rhs1 (stmt
);
7084 then_clause
= gimple_assign_rhs2 (stmt
);
7085 else_clause
= gimple_assign_rhs3 (stmt
);
7087 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
7092 if (TREE_CODE (then_clause
) == SSA_NAME
)
7094 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
7095 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
7096 &then_def_stmt
, &def
, &dt
))
7099 else if (TREE_CODE (then_clause
) != INTEGER_CST
7100 && TREE_CODE (then_clause
) != REAL_CST
7101 && TREE_CODE (then_clause
) != FIXED_CST
)
7104 if (TREE_CODE (else_clause
) == SSA_NAME
)
7106 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
7107 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
7108 &else_def_stmt
, &def
, &dt
))
7111 else if (TREE_CODE (else_clause
) != INTEGER_CST
7112 && TREE_CODE (else_clause
) != REAL_CST
7113 && TREE_CODE (else_clause
) != FIXED_CST
)
7116 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
7117 /* The result of a vector comparison should be signed type. */
7118 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
7119 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
7120 if (vec_cmp_type
== NULL_TREE
)
7125 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
7126 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
7133 vec_oprnds0
.create (1);
7134 vec_oprnds1
.create (1);
7135 vec_oprnds2
.create (1);
7136 vec_oprnds3
.create (1);
7140 scalar_dest
= gimple_assign_lhs (stmt
);
7141 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7143 /* Handle cond expr. */
7144 for (j
= 0; j
< ncopies
; j
++)
7146 gassign
*new_stmt
= NULL
;
7151 auto_vec
<tree
, 4> ops
;
7152 auto_vec
<vec
<tree
>, 4> vec_defs
;
7154 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
7155 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
7156 ops
.safe_push (then_clause
);
7157 ops
.safe_push (else_clause
);
7158 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7159 vec_oprnds3
= vec_defs
.pop ();
7160 vec_oprnds2
= vec_defs
.pop ();
7161 vec_oprnds1
= vec_defs
.pop ();
7162 vec_oprnds0
= vec_defs
.pop ();
7165 vec_defs
.release ();
7171 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
7173 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
7174 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
7177 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
7179 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
7180 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
7181 if (reduc_index
== 1)
7182 vec_then_clause
= reduc_def
;
7185 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
7187 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
7188 NULL
, >emp
, &def
, &dts
[2]);
7190 if (reduc_index
== 2)
7191 vec_else_clause
= reduc_def
;
7194 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
7196 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
7197 NULL
, >emp
, &def
, &dts
[3]);
7203 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
7204 vec_oprnds0
.pop ());
7205 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
7206 vec_oprnds1
.pop ());
7207 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
7208 vec_oprnds2
.pop ());
7209 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
7210 vec_oprnds3
.pop ());
7215 vec_oprnds0
.quick_push (vec_cond_lhs
);
7216 vec_oprnds1
.quick_push (vec_cond_rhs
);
7217 vec_oprnds2
.quick_push (vec_then_clause
);
7218 vec_oprnds3
.quick_push (vec_else_clause
);
7221 /* Arguments are ready. Create the new vector stmt. */
7222 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
7224 vec_cond_rhs
= vec_oprnds1
[i
];
7225 vec_then_clause
= vec_oprnds2
[i
];
7226 vec_else_clause
= vec_oprnds3
[i
];
7228 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
7229 vec_cond_lhs
, vec_cond_rhs
);
7230 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
7231 vec_compare
, vec_then_clause
, vec_else_clause
);
7233 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
7234 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7235 gimple_assign_set_lhs (new_stmt
, new_temp
);
7236 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7238 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7245 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7247 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7249 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7252 vec_oprnds0
.release ();
7253 vec_oprnds1
.release ();
7254 vec_oprnds2
.release ();
7255 vec_oprnds3
.release ();
7261 /* Make sure the statement is vectorizable. */
7264 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
7266 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7267 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7268 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
7270 tree scalar_type
, vectype
;
7271 gimple pattern_stmt
;
7272 gimple_seq pattern_def_seq
;
7274 if (dump_enabled_p ())
7276 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
7277 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7280 if (gimple_has_volatile_ops (stmt
))
7282 if (dump_enabled_p ())
7283 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7284 "not vectorized: stmt has volatile operands\n");
7289 /* Skip stmts that do not need to be vectorized. In loops this is expected
7291 - the COND_EXPR which is the loop exit condition
7292 - any LABEL_EXPRs in the loop
7293 - computations that are used only for array indexing or loop control.
7294 In basic blocks we only analyze statements that are a part of some SLP
7295 instance, therefore, all the statements are relevant.
7297 Pattern statement needs to be analyzed instead of the original statement
7298 if the original statement is not relevant. Otherwise, we analyze both
7299 statements. In basic blocks we are called from some SLP instance
7300 traversal, don't analyze pattern stmts instead, the pattern stmts
7301 already will be part of SLP instance. */
7303 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
7304 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
7305 && !STMT_VINFO_LIVE_P (stmt_info
))
7307 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7309 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7310 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7312 /* Analyze PATTERN_STMT instead of the original stmt. */
7313 stmt
= pattern_stmt
;
7314 stmt_info
= vinfo_for_stmt (pattern_stmt
);
7315 if (dump_enabled_p ())
7317 dump_printf_loc (MSG_NOTE
, vect_location
,
7318 "==> examining pattern statement: ");
7319 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7324 if (dump_enabled_p ())
7325 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
7330 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7333 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7334 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7336 /* Analyze PATTERN_STMT too. */
7337 if (dump_enabled_p ())
7339 dump_printf_loc (MSG_NOTE
, vect_location
,
7340 "==> examining pattern statement: ");
7341 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7344 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
7348 if (is_pattern_stmt_p (stmt_info
)
7350 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
7352 gimple_stmt_iterator si
;
7354 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
7356 gimple pattern_def_stmt
= gsi_stmt (si
);
7357 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
7358 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
7360 /* Analyze def stmt of STMT if it's a pattern stmt. */
7361 if (dump_enabled_p ())
7363 dump_printf_loc (MSG_NOTE
, vect_location
,
7364 "==> examining pattern def statement: ");
7365 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
7368 if (!vect_analyze_stmt (pattern_def_stmt
,
7369 need_to_vectorize
, node
))
7375 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
7377 case vect_internal_def
:
7380 case vect_reduction_def
:
7381 case vect_nested_cycle
:
7382 gcc_assert (!bb_vinfo
7383 && (relevance
== vect_used_in_outer
7384 || relevance
== vect_used_in_outer_by_reduction
7385 || relevance
== vect_used_by_reduction
7386 || relevance
== vect_unused_in_scope
));
7389 case vect_induction_def
:
7390 case vect_constant_def
:
7391 case vect_external_def
:
7392 case vect_unknown_def_type
:
7399 gcc_assert (PURE_SLP_STMT (stmt_info
));
7401 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
7402 if (dump_enabled_p ())
7404 dump_printf_loc (MSG_NOTE
, vect_location
,
7405 "get vectype for scalar type: ");
7406 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
7407 dump_printf (MSG_NOTE
, "\n");
7410 vectype
= get_vectype_for_scalar_type (scalar_type
);
7413 if (dump_enabled_p ())
7415 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7416 "not SLPed: unsupported data-type ");
7417 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
7419 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7424 if (dump_enabled_p ())
7426 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
7427 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
7428 dump_printf (MSG_NOTE
, "\n");
7431 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
7434 if (STMT_VINFO_RELEVANT_P (stmt_info
))
7436 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
7437 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
7438 || (is_gimple_call (stmt
)
7439 && gimple_call_lhs (stmt
) == NULL_TREE
));
7440 *need_to_vectorize
= true;
7443 if (PURE_SLP_STMT (stmt_info
) && !node
)
7445 dump_printf_loc (MSG_NOTE
, vect_location
,
7446 "handled only by SLP analysis\n");
7452 && (STMT_VINFO_RELEVANT_P (stmt_info
)
7453 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
7454 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
7455 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
7456 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
7457 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
7458 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
7459 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
7460 || vectorizable_call (stmt
, NULL
, NULL
, node
)
7461 || vectorizable_store (stmt
, NULL
, NULL
, node
)
7462 || vectorizable_reduction (stmt
, NULL
, NULL
, node
)
7463 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
7467 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
7468 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
7469 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
7470 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
7471 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
7472 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
7473 || vectorizable_call (stmt
, NULL
, NULL
, node
)
7474 || vectorizable_store (stmt
, NULL
, NULL
, node
)
7475 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
7480 if (dump_enabled_p ())
7482 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7483 "not vectorized: relevant stmt not ");
7484 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7485 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7494 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7495 need extra handling, except for vectorizable reductions. */
7496 if (STMT_VINFO_LIVE_P (stmt_info
)
7497 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7498 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
7502 if (dump_enabled_p ())
7504 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7505 "not vectorized: live stmt not ");
7506 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7507 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7517 /* Function vect_transform_stmt.
7519 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7522 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
7523 bool *grouped_store
, slp_tree slp_node
,
7524 slp_instance slp_node_instance
)
7526 bool is_store
= false;
7527 gimple vec_stmt
= NULL
;
7528 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7531 gimple old_vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7533 switch (STMT_VINFO_TYPE (stmt_info
))
7535 case type_demotion_vec_info_type
:
7536 case type_promotion_vec_info_type
:
7537 case type_conversion_vec_info_type
:
7538 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
7542 case induc_vec_info_type
:
7543 gcc_assert (!slp_node
);
7544 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
7548 case shift_vec_info_type
:
7549 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
7553 case op_vec_info_type
:
7554 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
7558 case assignment_vec_info_type
:
7559 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
7563 case load_vec_info_type
:
7564 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
7569 case store_vec_info_type
:
7570 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
7572 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
7574 /* In case of interleaving, the whole chain is vectorized when the
7575 last store in the chain is reached. Store stmts before the last
7576 one are skipped, and there vec_stmt_info shouldn't be freed
7578 *grouped_store
= true;
7579 if (STMT_VINFO_VEC_STMT (stmt_info
))
7586 case condition_vec_info_type
:
7587 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
7591 case call_vec_info_type
:
7592 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7593 stmt
= gsi_stmt (*gsi
);
7594 if (is_gimple_call (stmt
)
7595 && gimple_call_internal_p (stmt
)
7596 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
7600 case call_simd_clone_vec_info_type
:
7601 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7602 stmt
= gsi_stmt (*gsi
);
7605 case reduc_vec_info_type
:
7606 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
7611 if (!STMT_VINFO_LIVE_P (stmt_info
))
7613 if (dump_enabled_p ())
7614 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7615 "stmt not supported.\n");
7620 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
7621 This would break hybrid SLP vectorization. */
7623 gcc_assert (!vec_stmt
7624 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt
);
7626 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7627 is being vectorized, but outside the immediately enclosing loop. */
7629 && STMT_VINFO_LOOP_VINFO (stmt_info
)
7630 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7631 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
7632 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
7633 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
7634 || STMT_VINFO_RELEVANT (stmt_info
) ==
7635 vect_used_in_outer_by_reduction
))
7637 struct loop
*innerloop
= LOOP_VINFO_LOOP (
7638 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
7639 imm_use_iterator imm_iter
;
7640 use_operand_p use_p
;
7644 if (dump_enabled_p ())
7645 dump_printf_loc (MSG_NOTE
, vect_location
,
7646 "Record the vdef for outer-loop vectorization.\n");
7648 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7649 (to be used when vectorizing outer-loop stmts that use the DEF of
7651 if (gimple_code (stmt
) == GIMPLE_PHI
)
7652 scalar_dest
= PHI_RESULT (stmt
);
7654 scalar_dest
= gimple_assign_lhs (stmt
);
7656 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
7658 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
7660 exit_phi
= USE_STMT (use_p
);
7661 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
7666 /* Handle stmts whose DEF is used outside the loop-nest that is
7667 being vectorized. */
7668 if (STMT_VINFO_LIVE_P (stmt_info
)
7669 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7671 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
7676 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
7682 /* Remove a group of stores (for SLP or interleaving), free their
7686 vect_remove_stores (gimple first_stmt
)
7688 gimple next
= first_stmt
;
7690 gimple_stmt_iterator next_si
;
7694 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
7696 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
7697 if (is_pattern_stmt_p (stmt_info
))
7698 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
7699 /* Free the attached stmt_vec_info and remove the stmt. */
7700 next_si
= gsi_for_stmt (next
);
7701 unlink_stmt_vdef (next
);
7702 gsi_remove (&next_si
, true);
7703 release_defs (next
);
7704 free_stmt_vec_info (next
);
7710 /* Function new_stmt_vec_info.
7712 Create and initialize a new stmt_vec_info struct for STMT. */
7715 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
7716 bb_vec_info bb_vinfo
)
7719 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
7721 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
7722 STMT_VINFO_STMT (res
) = stmt
;
7723 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
7724 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
7725 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
7726 STMT_VINFO_LIVE_P (res
) = false;
7727 STMT_VINFO_VECTYPE (res
) = NULL
;
7728 STMT_VINFO_VEC_STMT (res
) = NULL
;
7729 STMT_VINFO_VECTORIZABLE (res
) = true;
7730 STMT_VINFO_IN_PATTERN_P (res
) = false;
7731 STMT_VINFO_RELATED_STMT (res
) = NULL
;
7732 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
7733 STMT_VINFO_DATA_REF (res
) = NULL
;
7735 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
7736 STMT_VINFO_DR_OFFSET (res
) = NULL
;
7737 STMT_VINFO_DR_INIT (res
) = NULL
;
7738 STMT_VINFO_DR_STEP (res
) = NULL
;
7739 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
7741 if (gimple_code (stmt
) == GIMPLE_PHI
7742 && is_loop_header_bb_p (gimple_bb (stmt
)))
7743 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
7745 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
7747 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
7748 STMT_SLP_TYPE (res
) = loop_vect
;
7749 GROUP_FIRST_ELEMENT (res
) = NULL
;
7750 GROUP_NEXT_ELEMENT (res
) = NULL
;
7751 GROUP_SIZE (res
) = 0;
7752 GROUP_STORE_COUNT (res
) = 0;
7753 GROUP_GAP (res
) = 0;
7754 GROUP_SAME_DR_STMT (res
) = NULL
;
7760 /* Create a hash table for stmt_vec_info. */
7763 init_stmt_vec_info_vec (void)
7765 gcc_assert (!stmt_vec_info_vec
.exists ());
7766 stmt_vec_info_vec
.create (50);
7770 /* Free hash table for stmt_vec_info. */
7773 free_stmt_vec_info_vec (void)
7777 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
7779 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
7780 gcc_assert (stmt_vec_info_vec
.exists ());
7781 stmt_vec_info_vec
.release ();
7785 /* Free stmt vectorization related info. */
7788 free_stmt_vec_info (gimple stmt
)
7790 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7795 /* Check if this statement has a related "pattern stmt"
7796 (introduced by the vectorizer during the pattern recognition
7797 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7799 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
7801 stmt_vec_info patt_info
7802 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7805 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
7806 gimple patt_stmt
= STMT_VINFO_STMT (patt_info
);
7807 gimple_set_bb (patt_stmt
, NULL
);
7808 tree lhs
= gimple_get_lhs (patt_stmt
);
7809 if (TREE_CODE (lhs
) == SSA_NAME
)
7810 release_ssa_name (lhs
);
7813 gimple_stmt_iterator si
;
7814 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
7816 gimple seq_stmt
= gsi_stmt (si
);
7817 gimple_set_bb (seq_stmt
, NULL
);
7818 lhs
= gimple_get_lhs (patt_stmt
);
7819 if (TREE_CODE (lhs
) == SSA_NAME
)
7820 release_ssa_name (lhs
);
7821 free_stmt_vec_info (seq_stmt
);
7824 free_stmt_vec_info (patt_stmt
);
7828 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
7829 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
7830 set_vinfo_for_stmt (stmt
, NULL
);
7835 /* Function get_vectype_for_scalar_type_and_size.
7837 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7841 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
7843 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
7844 machine_mode simd_mode
;
7845 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
7852 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
7853 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
7856 /* For vector types of elements whose mode precision doesn't
7857 match their types precision we use a element type of mode
7858 precision. The vectorization routines will have to make sure
7859 they support the proper result truncation/extension.
7860 We also make sure to build vector types with INTEGER_TYPE
7861 component type only. */
7862 if (INTEGRAL_TYPE_P (scalar_type
)
7863 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
7864 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
7865 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
7866 TYPE_UNSIGNED (scalar_type
));
7868 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7869 When the component mode passes the above test simply use a type
7870 corresponding to that mode. The theory is that any use that
7871 would cause problems with this will disable vectorization anyway. */
7872 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
7873 && !INTEGRAL_TYPE_P (scalar_type
))
7874 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
7876 /* We can't build a vector type of elements with alignment bigger than
7878 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
7879 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
7880 TYPE_UNSIGNED (scalar_type
));
7882 /* If we felt back to using the mode fail if there was
7883 no scalar type for it. */
7884 if (scalar_type
== NULL_TREE
)
7887 /* If no size was supplied use the mode the target prefers. Otherwise
7888 lookup a vector mode of the specified size. */
7890 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
7892 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
7893 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
7897 vectype
= build_vector_type (scalar_type
, nunits
);
7899 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7900 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
7906 unsigned int current_vector_size
;
7908 /* Function get_vectype_for_scalar_type.
7910 Returns the vector type corresponding to SCALAR_TYPE as supported
7914 get_vectype_for_scalar_type (tree scalar_type
)
7917 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
7918 current_vector_size
);
7920 && current_vector_size
== 0)
7921 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
7925 /* Function get_same_sized_vectype
7927 Returns a vector type corresponding to SCALAR_TYPE of size
7928 VECTOR_TYPE if supported by the target. */
7931 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
7933 return get_vectype_for_scalar_type_and_size
7934 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
7937 /* Function vect_is_simple_use.
7940 LOOP_VINFO - the vect info of the loop that is being vectorized.
7941 BB_VINFO - the vect info of the basic block that is being vectorized.
7942 OPERAND - operand of STMT in the loop or bb.
7943 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7945 Returns whether a stmt with OPERAND can be vectorized.
7946 For loops, supportable operands are constants, loop invariants, and operands
7947 that are defined by the current iteration of the loop. Unsupportable
7948 operands are those that are defined by a previous iteration of the loop (as
7949 is the case in reduction/induction computations).
7950 For basic blocks, supportable operands are constants and bb invariants.
7951 For now, operands defined outside the basic block are not supported. */
7954 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7955 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7956 tree
*def
, enum vect_def_type
*dt
)
7960 *dt
= vect_unknown_def_type
;
7962 if (dump_enabled_p ())
7964 dump_printf_loc (MSG_NOTE
, vect_location
,
7965 "vect_is_simple_use: operand ");
7966 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
7967 dump_printf (MSG_NOTE
, "\n");
7970 if (CONSTANT_CLASS_P (operand
))
7972 *dt
= vect_constant_def
;
7976 if (is_gimple_min_invariant (operand
))
7979 *dt
= vect_external_def
;
7983 if (TREE_CODE (operand
) != SSA_NAME
)
7985 if (dump_enabled_p ())
7986 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7991 if (SSA_NAME_IS_DEFAULT_DEF (operand
))
7994 *dt
= vect_external_def
;
7998 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
7999 if (dump_enabled_p ())
8001 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
8002 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
8005 basic_block bb
= gimple_bb (*def_stmt
);
8006 if ((loop_vinfo
&& !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo
), bb
))
8008 && (bb
!= BB_VINFO_BB (bb_vinfo
)
8009 || gimple_code (*def_stmt
) == GIMPLE_PHI
)))
8010 *dt
= vect_external_def
;
8013 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
8014 if (bb_vinfo
&& !STMT_VINFO_VECTORIZABLE (stmt_vinfo
))
8015 *dt
= vect_external_def
;
8017 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
8020 if (dump_enabled_p ())
8022 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
8025 case vect_uninitialized_def
:
8026 dump_printf (MSG_NOTE
, "uninitialized\n");
8028 case vect_constant_def
:
8029 dump_printf (MSG_NOTE
, "constant\n");
8031 case vect_external_def
:
8032 dump_printf (MSG_NOTE
, "external\n");
8034 case vect_internal_def
:
8035 dump_printf (MSG_NOTE
, "internal\n");
8037 case vect_induction_def
:
8038 dump_printf (MSG_NOTE
, "induction\n");
8040 case vect_reduction_def
:
8041 dump_printf (MSG_NOTE
, "reduction\n");
8043 case vect_double_reduction_def
:
8044 dump_printf (MSG_NOTE
, "double reduction\n");
8046 case vect_nested_cycle
:
8047 dump_printf (MSG_NOTE
, "nested cycle\n");
8049 case vect_unknown_def_type
:
8050 dump_printf (MSG_NOTE
, "unknown\n");
8055 if (*dt
== vect_unknown_def_type
8057 && *dt
== vect_double_reduction_def
8058 && gimple_code (stmt
) != GIMPLE_PHI
))
8060 if (dump_enabled_p ())
8061 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8062 "Unsupported pattern.\n");
8066 switch (gimple_code (*def_stmt
))
8069 *def
= gimple_phi_result (*def_stmt
);
8073 *def
= gimple_assign_lhs (*def_stmt
);
8077 *def
= gimple_call_lhs (*def_stmt
);
8082 if (dump_enabled_p ())
8083 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8084 "unsupported defining stmt:\n");
8091 /* Function vect_is_simple_use_1.
8093 Same as vect_is_simple_use_1 but also determines the vector operand
8094 type of OPERAND and stores it to *VECTYPE. If the definition of
8095 OPERAND is vect_uninitialized_def, vect_constant_def or
8096 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8097 is responsible to compute the best suited vector type for the
8101 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
8102 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
8103 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
8105 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
8109 /* Now get a vector type if the def is internal, otherwise supply
8110 NULL_TREE and leave it up to the caller to figure out a proper
8111 type for the use stmt. */
8112 if (*dt
== vect_internal_def
8113 || *dt
== vect_induction_def
8114 || *dt
== vect_reduction_def
8115 || *dt
== vect_double_reduction_def
8116 || *dt
== vect_nested_cycle
)
8118 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
8120 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8121 && !STMT_VINFO_RELEVANT (stmt_info
)
8122 && !STMT_VINFO_LIVE_P (stmt_info
))
8123 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8125 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8126 gcc_assert (*vectype
!= NULL_TREE
);
8128 else if (*dt
== vect_uninitialized_def
8129 || *dt
== vect_constant_def
8130 || *dt
== vect_external_def
)
8131 *vectype
= NULL_TREE
;
8139 /* Function supportable_widening_operation
8141 Check whether an operation represented by the code CODE is a
8142 widening operation that is supported by the target platform in
8143 vector form (i.e., when operating on arguments of type VECTYPE_IN
8144 producing a result of type VECTYPE_OUT).
8146 Widening operations we currently support are NOP (CONVERT), FLOAT
8147 and WIDEN_MULT. This function checks if these operations are supported
8148 by the target platform either directly (via vector tree-codes), or via
8152 - CODE1 and CODE2 are codes of vector operations to be used when
8153 vectorizing the operation, if available.
8154 - MULTI_STEP_CVT determines the number of required intermediate steps in
8155 case of multi-step conversion (like char->short->int - in that case
8156 MULTI_STEP_CVT will be 1).
8157 - INTERM_TYPES contains the intermediate type required to perform the
8158 widening operation (short in the above example). */
8161 supportable_widening_operation (enum tree_code code
, gimple stmt
,
8162 tree vectype_out
, tree vectype_in
,
8163 enum tree_code
*code1
, enum tree_code
*code2
,
8164 int *multi_step_cvt
,
8165 vec
<tree
> *interm_types
)
8167 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8168 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8169 struct loop
*vect_loop
= NULL
;
8170 machine_mode vec_mode
;
8171 enum insn_code icode1
, icode2
;
8172 optab optab1
, optab2
;
8173 tree vectype
= vectype_in
;
8174 tree wide_vectype
= vectype_out
;
8175 enum tree_code c1
, c2
;
8177 tree prev_type
, intermediate_type
;
8178 machine_mode intermediate_mode
, prev_mode
;
8179 optab optab3
, optab4
;
8181 *multi_step_cvt
= 0;
8183 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
8187 case WIDEN_MULT_EXPR
:
8188 /* The result of a vectorized widening operation usually requires
8189 two vectors (because the widened results do not fit into one vector).
8190 The generated vector results would normally be expected to be
8191 generated in the same order as in the original scalar computation,
8192 i.e. if 8 results are generated in each vector iteration, they are
8193 to be organized as follows:
8194 vect1: [res1,res2,res3,res4],
8195 vect2: [res5,res6,res7,res8].
8197 However, in the special case that the result of the widening
8198 operation is used in a reduction computation only, the order doesn't
8199 matter (because when vectorizing a reduction we change the order of
8200 the computation). Some targets can take advantage of this and
8201 generate more efficient code. For example, targets like Altivec,
8202 that support widen_mult using a sequence of {mult_even,mult_odd}
8203 generate the following vectors:
8204 vect1: [res1,res3,res5,res7],
8205 vect2: [res2,res4,res6,res8].
8207 When vectorizing outer-loops, we execute the inner-loop sequentially
8208 (each vectorized inner-loop iteration contributes to VF outer-loop
8209 iterations in parallel). We therefore don't allow to change the
8210 order of the computation in the inner-loop during outer-loop
8212 /* TODO: Another case in which order doesn't *really* matter is when we
8213 widen and then contract again, e.g. (short)((int)x * y >> 8).
8214 Normally, pack_trunc performs an even/odd permute, whereas the
8215 repack from an even/odd expansion would be an interleave, which
8216 would be significantly simpler for e.g. AVX2. */
8217 /* In any case, in order to avoid duplicating the code below, recurse
8218 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8219 are properly set up for the caller. If we fail, we'll continue with
8220 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8222 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
8223 && !nested_in_vect_loop_p (vect_loop
, stmt
)
8224 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
8225 stmt
, vectype_out
, vectype_in
,
8226 code1
, code2
, multi_step_cvt
,
8229 /* Elements in a vector with vect_used_by_reduction property cannot
8230 be reordered if the use chain with this property does not have the
8231 same operation. One such an example is s += a * b, where elements
8232 in a and b cannot be reordered. Here we check if the vector defined
8233 by STMT is only directly used in the reduction statement. */
8234 tree lhs
= gimple_assign_lhs (stmt
);
8235 use_operand_p dummy
;
8237 stmt_vec_info use_stmt_info
= NULL
;
8238 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
8239 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
8240 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
8243 c1
= VEC_WIDEN_MULT_LO_EXPR
;
8244 c2
= VEC_WIDEN_MULT_HI_EXPR
;
8247 case VEC_WIDEN_MULT_EVEN_EXPR
:
8248 /* Support the recursion induced just above. */
8249 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
8250 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
8253 case WIDEN_LSHIFT_EXPR
:
8254 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
8255 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
8259 c1
= VEC_UNPACK_LO_EXPR
;
8260 c2
= VEC_UNPACK_HI_EXPR
;
8264 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
8265 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
8268 case FIX_TRUNC_EXPR
:
8269 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8270 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8271 computing the operation. */
8278 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
8281 if (code
== FIX_TRUNC_EXPR
)
8283 /* The signedness is determined from output operand. */
8284 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8285 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
8289 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8290 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
8293 if (!optab1
|| !optab2
)
8296 vec_mode
= TYPE_MODE (vectype
);
8297 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
8298 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
8304 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8305 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8308 /* Check if it's a multi-step conversion that can be done using intermediate
8311 prev_type
= vectype
;
8312 prev_mode
= vec_mode
;
8314 if (!CONVERT_EXPR_CODE_P (code
))
8317 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8318 intermediate steps in promotion sequence. We try
8319 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8321 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8322 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8324 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8326 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
8327 TYPE_UNSIGNED (prev_type
));
8328 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8329 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
8331 if (!optab3
|| !optab4
8332 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
8333 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8334 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
8335 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
8336 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
8337 == CODE_FOR_nothing
)
8338 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
8339 == CODE_FOR_nothing
))
8342 interm_types
->quick_push (intermediate_type
);
8343 (*multi_step_cvt
)++;
8345 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8346 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8349 prev_type
= intermediate_type
;
8350 prev_mode
= intermediate_mode
;
8353 interm_types
->release ();
8358 /* Function supportable_narrowing_operation
8360 Check whether an operation represented by the code CODE is a
8361 narrowing operation that is supported by the target platform in
8362 vector form (i.e., when operating on arguments of type VECTYPE_IN
8363 and producing a result of type VECTYPE_OUT).
8365 Narrowing operations we currently support are NOP (CONVERT) and
8366 FIX_TRUNC. This function checks if these operations are supported by
8367 the target platform directly via vector tree-codes.
8370 - CODE1 is the code of a vector operation to be used when
8371 vectorizing the operation, if available.
8372 - MULTI_STEP_CVT determines the number of required intermediate steps in
8373 case of multi-step conversion (like int->short->char - in that case
8374 MULTI_STEP_CVT will be 1).
8375 - INTERM_TYPES contains the intermediate type required to perform the
8376 narrowing operation (short in the above example). */
8379 supportable_narrowing_operation (enum tree_code code
,
8380 tree vectype_out
, tree vectype_in
,
8381 enum tree_code
*code1
, int *multi_step_cvt
,
8382 vec
<tree
> *interm_types
)
8384 machine_mode vec_mode
;
8385 enum insn_code icode1
;
8386 optab optab1
, interm_optab
;
8387 tree vectype
= vectype_in
;
8388 tree narrow_vectype
= vectype_out
;
8390 tree intermediate_type
;
8391 machine_mode intermediate_mode
, prev_mode
;
8395 *multi_step_cvt
= 0;
8399 c1
= VEC_PACK_TRUNC_EXPR
;
8402 case FIX_TRUNC_EXPR
:
8403 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
8407 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8408 tree code and optabs used for computing the operation. */
8415 if (code
== FIX_TRUNC_EXPR
)
8416 /* The signedness is determined from output operand. */
8417 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8419 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8424 vec_mode
= TYPE_MODE (vectype
);
8425 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
8430 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8433 /* Check if it's a multi-step conversion that can be done using intermediate
8435 prev_mode
= vec_mode
;
8436 if (code
== FIX_TRUNC_EXPR
)
8437 uns
= TYPE_UNSIGNED (vectype_out
);
8439 uns
= TYPE_UNSIGNED (vectype
);
8441 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8442 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8443 costly than signed. */
8444 if (code
== FIX_TRUNC_EXPR
&& uns
)
8446 enum insn_code icode2
;
8449 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
8451 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8452 if (interm_optab
!= unknown_optab
8453 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
8454 && insn_data
[icode1
].operand
[0].mode
8455 == insn_data
[icode2
].operand
[0].mode
)
8458 optab1
= interm_optab
;
8463 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8464 intermediate steps in promotion sequence. We try
8465 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8466 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8467 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8469 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8471 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
8473 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
8476 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
8477 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8478 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
8479 == CODE_FOR_nothing
))
8482 interm_types
->quick_push (intermediate_type
);
8483 (*multi_step_cvt
)++;
8485 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8488 prev_mode
= intermediate_mode
;
8489 optab1
= interm_optab
;
8492 interm_types
->release ();