1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
28 #include "stor-layout.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-ssa-alias.h"
33 #include "internal-fn.h"
35 #include "gimple-expr.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "gimple-ssa.h"
43 #include "tree-phinodes.h"
44 #include "ssa-iterators.h"
45 #include "stringpool.h"
46 #include "tree-ssanames.h"
47 #include "tree-ssa-loop-manip.h"
49 #include "tree-ssa-loop.h"
50 #include "tree-scalar-evolution.h"
52 #include "recog.h" /* FIXME: for insn_data */
54 #include "diagnostic-core.h"
55 #include "tree-vectorizer.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
75 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
95 int misalign
, enum vect_cost_model_location where
)
99 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
100 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
101 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
104 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
109 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
110 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
111 void *target_cost_data
;
114 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
116 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
118 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
123 /* Return a variable of type ELEM_TYPE[NELEMS]. */
126 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
128 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
132 /* ARRAY is an array of vectors created by create_vector_array.
133 Return an SSA_NAME for the vector in index N. The reference
134 is part of the vectorization of STMT and the vector is associated
135 with scalar destination SCALAR_DEST. */
138 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
139 tree array
, unsigned HOST_WIDE_INT n
)
141 tree vect_type
, vect
, vect_name
, array_ref
;
144 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
145 vect_type
= TREE_TYPE (TREE_TYPE (array
));
146 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
147 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
148 build_int_cst (size_type_node
, n
),
149 NULL_TREE
, NULL_TREE
);
151 new_stmt
= gimple_build_assign (vect
, array_ref
);
152 vect_name
= make_ssa_name (vect
, new_stmt
);
153 gimple_assign_set_lhs (new_stmt
, vect_name
);
154 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
159 /* ARRAY is an array of vectors created by create_vector_array.
160 Emit code to store SSA_NAME VECT in index N of the array.
161 The store is part of the vectorization of STMT. */
164 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
165 tree array
, unsigned HOST_WIDE_INT n
)
170 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
171 build_int_cst (size_type_node
, n
),
172 NULL_TREE
, NULL_TREE
);
174 new_stmt
= gimple_build_assign (array_ref
, vect
);
175 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
178 /* PTR is a pointer to an array of type TYPE. Return a representation
179 of *PTR. The memory reference replaces those in FIRST_DR
183 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
185 tree mem_ref
, alias_ptr_type
;
187 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
188 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
189 /* Arrays have the same alignment as their type. */
190 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
196 /* Function vect_mark_relevant.
198 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
201 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
202 enum vect_relevant relevant
, bool live_p
,
203 bool used_in_pattern
)
205 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
206 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
207 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
210 if (dump_enabled_p ())
211 dump_printf_loc (MSG_NOTE
, vect_location
,
212 "mark relevant %d, live %d.\n", relevant
, live_p
);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
221 if (!used_in_pattern
)
223 imm_use_iterator imm_iter
;
227 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
228 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
230 if (is_gimple_assign (stmt
))
231 lhs
= gimple_assign_lhs (stmt
);
233 lhs
= gimple_call_lhs (stmt
);
235 /* This use is out of pattern use, if LHS has other uses that are
236 pattern uses, we should mark the stmt itself, and not the pattern
238 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
239 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
241 if (is_gimple_debug (USE_STMT (use_p
)))
243 use_stmt
= USE_STMT (use_p
);
245 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
248 if (vinfo_for_stmt (use_stmt
)
249 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
259 /* This is the last stmt in a sequence that was detected as a
260 pattern that can potentially be vectorized. Don't mark the stmt
261 as relevant/live because it's not going to be vectorized.
262 Instead mark the pattern-stmt that replaces it. */
264 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_NOTE
, vect_location
,
268 "last stmt in pattern. don't mark"
269 " relevant/live.\n");
270 stmt_info
= vinfo_for_stmt (pattern_stmt
);
271 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
272 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
273 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
278 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
279 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
280 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
282 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
283 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
285 if (dump_enabled_p ())
286 dump_printf_loc (MSG_NOTE
, vect_location
,
287 "already marked relevant/live.\n");
291 worklist
->safe_push (stmt
);
295 /* Function vect_stmt_relevant_p.
297 Return true if STMT in loop that is represented by LOOP_VINFO is
298 "relevant for vectorization".
300 A stmt is considered "relevant for vectorization" if:
301 - it has uses outside the loop.
302 - it has vdefs (it alters memory).
303 - control stmts in the loop (except for the exit condition).
305 CHECKME: what other side effects would the vectorizer allow? */
308 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
309 enum vect_relevant
*relevant
, bool *live_p
)
311 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
313 imm_use_iterator imm_iter
;
317 *relevant
= vect_unused_in_scope
;
320 /* cond stmt other than loop exit cond. */
321 if (is_ctrl_stmt (stmt
)
322 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
323 != loop_exit_ctrl_vec_info_type
)
324 *relevant
= vect_used_in_scope
;
326 /* changing memory. */
327 if (gimple_code (stmt
) != GIMPLE_PHI
)
328 if (gimple_vdef (stmt
))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE
, vect_location
,
332 "vec_stmt_relevant_p: stmt has vdefs.\n");
333 *relevant
= vect_used_in_scope
;
336 /* uses outside the loop. */
337 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
339 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
341 basic_block bb
= gimple_bb (USE_STMT (use_p
));
342 if (!flow_bb_inside_loop_p (loop
, bb
))
344 if (dump_enabled_p ())
345 dump_printf_loc (MSG_NOTE
, vect_location
,
346 "vec_stmt_relevant_p: used out of loop.\n");
348 if (is_gimple_debug (USE_STMT (use_p
)))
351 /* We expect all such uses to be in the loop exit phis
352 (because of loop closed form) */
353 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
354 gcc_assert (bb
== single_exit (loop
)->dest
);
361 return (*live_p
|| *relevant
);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT. Check if USE is
368 used in STMT for anything other than indexing an array. */
371 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
374 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
376 /* USE corresponds to some operand in STMT. If there is no data
377 reference in STMT, then any operand that corresponds to USE
378 is not indexing an array. */
379 if (!STMT_VINFO_DATA_REF (stmt_info
))
382 /* STMT has a data_ref. FORNOW this means that its of one of
386 (This should have been verified in analyze_data_refs).
388 'var' in the second case corresponds to a def, not a use,
389 so USE cannot correspond to any operands that are not used
392 Therefore, all we need to check is if STMT falls into the
393 first case, and whether var corresponds to USE. */
395 if (!gimple_assign_copy_p (stmt
))
397 if (is_gimple_call (stmt
)
398 && gimple_call_internal_p (stmt
))
399 switch (gimple_call_internal_fn (stmt
))
402 operand
= gimple_call_arg (stmt
, 3);
407 operand
= gimple_call_arg (stmt
, 2);
417 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
419 operand
= gimple_assign_rhs1 (stmt
);
420 if (TREE_CODE (operand
) != SSA_NAME
)
431 Function process_use.
434 - a USE in STMT in a loop represented by LOOP_VINFO
435 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
436 that defined USE. This is done by calling mark_relevant and passing it
437 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
438 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
442 Generally, LIVE_P and RELEVANT are used to define the liveness and
443 relevance info of the DEF_STMT of this USE:
444 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
445 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
447 - case 1: If USE is used only for address computations (e.g. array indexing),
448 which does not need to be directly vectorized, then the liveness/relevance
449 of the respective DEF_STMT is left unchanged.
450 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
451 skip DEF_STMT cause it had already been processed.
452 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
453 be modified accordingly.
455 Return true if everything is as expected. Return false otherwise. */
458 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
459 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
462 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
463 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
464 stmt_vec_info dstmt_vinfo
;
465 basic_block bb
, def_bb
;
468 enum vect_def_type dt
;
470 /* case 1: we are only interested in uses that need to be vectorized. Uses
471 that are used for address computation are not considered relevant. */
472 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
475 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
477 if (dump_enabled_p ())
478 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
479 "not vectorized: unsupported use in stmt.\n");
483 if (!def_stmt
|| gimple_nop_p (def_stmt
))
486 def_bb
= gimple_bb (def_stmt
);
487 if (!flow_bb_inside_loop_p (loop
, def_bb
))
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
500 bb
= gimple_bb (stmt
);
501 if (gimple_code (stmt
) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
503 && gimple_code (def_stmt
) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
505 && bb
->loop_father
== def_bb
->loop_father
)
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_NOTE
, vect_location
,
509 "reduc-stmt defining reduc-phi in the same nest.\n");
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
511 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
514 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
525 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
527 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE
, vect_location
,
529 "outer-loop def-stmt defining inner-loop stmt.\n");
533 case vect_unused_in_scope
:
534 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
535 vect_used_in_scope
: vect_unused_in_scope
;
538 case vect_used_in_outer_by_reduction
:
539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
540 relevant
= vect_used_by_reduction
;
543 case vect_used_in_outer
:
544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
545 relevant
= vect_used_in_scope
;
548 case vect_used_in_scope
:
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
563 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
565 if (dump_enabled_p ())
566 dump_printf_loc (MSG_NOTE
, vect_location
,
567 "inner-loop def-stmt defining outer-loop stmt.\n");
571 case vect_unused_in_scope
:
572 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
573 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
574 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
577 case vect_used_by_reduction
:
578 relevant
= vect_used_in_outer_by_reduction
;
581 case vect_used_in_scope
:
582 relevant
= vect_used_in_outer
;
590 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
591 is_pattern_stmt_p (stmt_vinfo
));
596 /* Function vect_mark_stmts_to_be_vectorized.
598 Not all stmts in the loop need to be vectorized. For example:
607 Stmt 1 and 3 do not need to be vectorized, because loop control and
608 addressing of vectorized data-refs are handled differently.
610 This pass detects such stmts. */
613 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
615 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
616 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
617 unsigned int nbbs
= loop
->num_nodes
;
618 gimple_stmt_iterator si
;
621 stmt_vec_info stmt_vinfo
;
625 enum vect_relevant relevant
, tmp_relevant
;
626 enum vect_def_type def_type
;
628 if (dump_enabled_p ())
629 dump_printf_loc (MSG_NOTE
, vect_location
,
630 "=== vect_mark_stmts_to_be_vectorized ===\n");
632 auto_vec
<gimple
, 64> worklist
;
634 /* 1. Init worklist. */
635 for (i
= 0; i
< nbbs
; i
++)
638 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
641 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
644 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
645 dump_printf (MSG_NOTE
, "\n");
648 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
649 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
651 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
653 stmt
= gsi_stmt (si
);
654 if (dump_enabled_p ())
656 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
657 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
658 dump_printf (MSG_NOTE
, "\n");
661 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
662 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
666 /* 2. Process_worklist */
667 while (worklist
.length () > 0)
672 stmt
= worklist
.pop ();
673 if (dump_enabled_p ())
675 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
676 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
677 dump_printf (MSG_NOTE
, "\n");
680 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
681 (DEF_STMT) as relevant/irrelevant and live/dead according to the
682 liveness and relevance properties of STMT. */
683 stmt_vinfo
= vinfo_for_stmt (stmt
);
684 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
685 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
687 /* Generally, the liveness and relevance properties of STMT are
688 propagated as is to the DEF_STMTs of its USEs:
689 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
690 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
692 One exception is when STMT has been identified as defining a reduction
693 variable; in this case we set the liveness/relevance as follows:
695 relevant = vect_used_by_reduction
696 This is because we distinguish between two kinds of relevant stmts -
697 those that are used by a reduction computation, and those that are
698 (also) used by a regular computation. This allows us later on to
699 identify stmts that are used solely by a reduction, and therefore the
700 order of the results that they produce does not have to be kept. */
702 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
703 tmp_relevant
= relevant
;
706 case vect_reduction_def
:
707 switch (tmp_relevant
)
709 case vect_unused_in_scope
:
710 relevant
= vect_used_by_reduction
;
713 case vect_used_by_reduction
:
714 if (gimple_code (stmt
) == GIMPLE_PHI
)
719 if (dump_enabled_p ())
720 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
721 "unsupported use of reduction.\n");
728 case vect_nested_cycle
:
729 if (tmp_relevant
!= vect_unused_in_scope
730 && tmp_relevant
!= vect_used_in_outer_by_reduction
731 && tmp_relevant
!= vect_used_in_outer
)
733 if (dump_enabled_p ())
734 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
735 "unsupported use of nested cycle.\n");
743 case vect_double_reduction_def
:
744 if (tmp_relevant
!= vect_unused_in_scope
745 && tmp_relevant
!= vect_used_by_reduction
)
747 if (dump_enabled_p ())
748 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
749 "unsupported use of double reduction.\n");
761 if (is_pattern_stmt_p (stmt_vinfo
))
763 /* Pattern statements are not inserted into the code, so
764 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
765 have to scan the RHS or function arguments instead. */
766 if (is_gimple_assign (stmt
))
768 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
769 tree op
= gimple_assign_rhs1 (stmt
);
772 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
774 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
775 live_p
, relevant
, &worklist
, false)
776 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
777 live_p
, relevant
, &worklist
, false))
781 for (; i
< gimple_num_ops (stmt
); i
++)
783 op
= gimple_op (stmt
, i
);
784 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
789 else if (is_gimple_call (stmt
))
791 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
793 tree arg
= gimple_call_arg (stmt
, i
);
794 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
801 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
803 tree op
= USE_FROM_PTR (use_p
);
804 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
809 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
812 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
814 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
818 } /* while worklist */
824 /* Function vect_model_simple_cost.
826 Models cost for simple operations, i.e. those that only emit ncopies of a
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
831 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
832 enum vect_def_type
*dt
,
833 stmt_vector_for_cost
*prologue_cost_vec
,
834 stmt_vector_for_cost
*body_cost_vec
)
837 int inside_cost
= 0, prologue_cost
= 0;
839 /* The SLP costs were already calculated during SLP tree build. */
840 if (PURE_SLP_STMT (stmt_info
))
843 /* FORNOW: Assuming maximum 2 args per stmts. */
844 for (i
= 0; i
< 2; i
++)
845 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
846 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
847 stmt_info
, 0, vect_prologue
);
849 /* Pass the inside-of-loop statements to the target-specific cost model. */
850 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
851 stmt_info
, 0, vect_body
);
853 if (dump_enabled_p ())
854 dump_printf_loc (MSG_NOTE
, vect_location
,
855 "vect_model_simple_cost: inside_cost = %d, "
856 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
860 /* Model cost for type demotion and promotion operations. PWR is normally
861 zero for single-step promotions and demotions. It will be one if
862 two-step promotion/demotion is required, and so on. Each additional
863 step doubles the number of instructions required. */
866 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
867 enum vect_def_type
*dt
, int pwr
)
870 int inside_cost
= 0, prologue_cost
= 0;
871 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
872 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
873 void *target_cost_data
;
875 /* The SLP costs were already calculated during SLP tree build. */
876 if (PURE_SLP_STMT (stmt_info
))
880 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
882 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
884 for (i
= 0; i
< pwr
+ 1; i
++)
886 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
888 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
889 vec_promote_demote
, stmt_info
, 0,
893 /* FORNOW: Assuming maximum 2 args per stmts. */
894 for (i
= 0; i
< 2; i
++)
895 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
896 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
897 stmt_info
, 0, vect_prologue
);
899 if (dump_enabled_p ())
900 dump_printf_loc (MSG_NOTE
, vect_location
,
901 "vect_model_promotion_demotion_cost: inside_cost = %d, "
902 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
905 /* Function vect_cost_group_size
907 For grouped load or store, return the group_size only if it is the first
908 load or store of a group, else return 1. This ensures that group size is
909 only returned once per group. */
912 vect_cost_group_size (stmt_vec_info stmt_info
)
914 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
916 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
917 return GROUP_SIZE (stmt_info
);
923 /* Function vect_model_store_cost
925 Models cost for stores. In the case of grouped accesses, one access
926 has the overhead of the grouped access attributed to it. */
929 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
930 bool store_lanes_p
, enum vect_def_type dt
,
932 stmt_vector_for_cost
*prologue_cost_vec
,
933 stmt_vector_for_cost
*body_cost_vec
)
936 unsigned int inside_cost
= 0, prologue_cost
= 0;
937 struct data_reference
*first_dr
;
940 /* The SLP costs were already calculated during SLP tree build. */
941 if (PURE_SLP_STMT (stmt_info
))
944 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
945 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
946 stmt_info
, 0, vect_prologue
);
948 /* Grouped access? */
949 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
953 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
958 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
959 group_size
= vect_cost_group_size (stmt_info
);
962 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
964 /* Not a grouped access. */
968 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
971 /* We assume that the cost of a single store-lanes instruction is
972 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
973 access is instead being provided by a permute-and-store operation,
974 include the cost of the permutes. */
975 if (!store_lanes_p
&& group_size
> 1)
977 /* Uses a high and low interleave operation for each needed permute. */
979 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
980 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
981 stmt_info
, 0, vect_body
);
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE
, vect_location
,
985 "vect_model_store_cost: strided group_size = %d .\n",
989 /* Costs of the stores. */
990 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
992 if (dump_enabled_p ())
993 dump_printf_loc (MSG_NOTE
, vect_location
,
994 "vect_model_store_cost: inside_cost = %d, "
995 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
999 /* Calculate cost of DR's memory access. */
1001 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
1002 unsigned int *inside_cost
,
1003 stmt_vector_for_cost
*body_cost_vec
)
1005 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1006 gimple stmt
= DR_STMT (dr
);
1007 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1009 switch (alignment_support_scheme
)
1013 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1014 vector_store
, stmt_info
, 0,
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_NOTE
, vect_location
,
1019 "vect_model_store_cost: aligned.\n");
1023 case dr_unaligned_supported
:
1025 /* Here, we assign an additional cost for the unaligned store. */
1026 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1027 unaligned_store
, stmt_info
,
1028 DR_MISALIGNMENT (dr
), vect_body
);
1029 if (dump_enabled_p ())
1030 dump_printf_loc (MSG_NOTE
, vect_location
,
1031 "vect_model_store_cost: unaligned supported by "
1036 case dr_unaligned_unsupported
:
1038 *inside_cost
= VECT_MAX_COST
;
1040 if (dump_enabled_p ())
1041 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1042 "vect_model_store_cost: unsupported access.\n");
1052 /* Function vect_model_load_cost
1054 Models cost for loads. In the case of grouped accesses, the last access
1055 has the overhead of the grouped access attributed to it. Since unaligned
1056 accesses are supported for loads, we also account for the costs of the
1057 access scheme chosen. */
1060 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1061 bool load_lanes_p
, slp_tree slp_node
,
1062 stmt_vector_for_cost
*prologue_cost_vec
,
1063 stmt_vector_for_cost
*body_cost_vec
)
1067 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1068 unsigned int inside_cost
= 0, prologue_cost
= 0;
1070 /* The SLP costs were already calculated during SLP tree build. */
1071 if (PURE_SLP_STMT (stmt_info
))
1074 /* Grouped accesses? */
1075 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1076 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1078 group_size
= vect_cost_group_size (stmt_info
);
1079 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1081 /* Not a grouped access. */
1088 /* We assume that the cost of a single load-lanes instruction is
1089 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1090 access is instead being provided by a load-and-permute operation,
1091 include the cost of the permutes. */
1092 if (!load_lanes_p
&& group_size
> 1)
1094 /* Uses an even and odd extract operations for each needed permute. */
1095 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
1096 inside_cost
+= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1097 stmt_info
, 0, vect_body
);
1099 if (dump_enabled_p ())
1100 dump_printf_loc (MSG_NOTE
, vect_location
,
1101 "vect_model_load_cost: strided group_size = %d .\n",
1105 /* The loads themselves. */
1106 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1108 /* N scalar loads plus gathering them into a vector. */
1109 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1110 inside_cost
+= record_stmt_cost (body_cost_vec
,
1111 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1112 scalar_load
, stmt_info
, 0, vect_body
);
1113 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1114 stmt_info
, 0, vect_body
);
1117 vect_get_load_cost (first_dr
, ncopies
,
1118 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1119 || group_size
> 1 || slp_node
),
1120 &inside_cost
, &prologue_cost
,
1121 prologue_cost_vec
, body_cost_vec
, true);
1123 if (dump_enabled_p ())
1124 dump_printf_loc (MSG_NOTE
, vect_location
,
1125 "vect_model_load_cost: inside_cost = %d, "
1126 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1130 /* Calculate cost of DR's memory access. */
1132 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1133 bool add_realign_cost
, unsigned int *inside_cost
,
1134 unsigned int *prologue_cost
,
1135 stmt_vector_for_cost
*prologue_cost_vec
,
1136 stmt_vector_for_cost
*body_cost_vec
,
1137 bool record_prologue_costs
)
1139 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1140 gimple stmt
= DR_STMT (dr
);
1141 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1143 switch (alignment_support_scheme
)
1147 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1148 stmt_info
, 0, vect_body
);
1150 if (dump_enabled_p ())
1151 dump_printf_loc (MSG_NOTE
, vect_location
,
1152 "vect_model_load_cost: aligned.\n");
1156 case dr_unaligned_supported
:
1158 /* Here, we assign an additional cost for the unaligned load. */
1159 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1160 unaligned_load
, stmt_info
,
1161 DR_MISALIGNMENT (dr
), vect_body
);
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE
, vect_location
,
1165 "vect_model_load_cost: unaligned supported by "
1170 case dr_explicit_realign
:
1172 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1173 vector_load
, stmt_info
, 0, vect_body
);
1174 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1175 vec_perm
, stmt_info
, 0, vect_body
);
1177 /* FIXME: If the misalignment remains fixed across the iterations of
1178 the containing loop, the following cost should be added to the
1180 if (targetm
.vectorize
.builtin_mask_for_load
)
1181 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1182 stmt_info
, 0, vect_body
);
1184 if (dump_enabled_p ())
1185 dump_printf_loc (MSG_NOTE
, vect_location
,
1186 "vect_model_load_cost: explicit realign\n");
1190 case dr_explicit_realign_optimized
:
1192 if (dump_enabled_p ())
1193 dump_printf_loc (MSG_NOTE
, vect_location
,
1194 "vect_model_load_cost: unaligned software "
1197 /* Unaligned software pipeline has a load of an address, an initial
1198 load, and possibly a mask operation to "prime" the loop. However,
1199 if this is an access in a group of loads, which provide grouped
1200 access, then the above cost should only be considered for one
1201 access in the group. Inside the loop, there is a load op
1202 and a realignment op. */
1204 if (add_realign_cost
&& record_prologue_costs
)
1206 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1207 vector_stmt
, stmt_info
,
1209 if (targetm
.vectorize
.builtin_mask_for_load
)
1210 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1211 vector_stmt
, stmt_info
,
1215 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1216 stmt_info
, 0, vect_body
);
1217 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1218 stmt_info
, 0, vect_body
);
1220 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_NOTE
, vect_location
,
1222 "vect_model_load_cost: explicit realign optimized"
1228 case dr_unaligned_unsupported
:
1230 *inside_cost
= VECT_MAX_COST
;
1232 if (dump_enabled_p ())
1233 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1234 "vect_model_load_cost: unsupported access.\n");
1243 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1244 the loop preheader for the vectorized stmt STMT. */
1247 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1250 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1253 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1254 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1258 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1262 if (nested_in_vect_loop_p (loop
, stmt
))
1265 pe
= loop_preheader_edge (loop
);
1266 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1267 gcc_assert (!new_bb
);
1271 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1273 gimple_stmt_iterator gsi_bb_start
;
1275 gcc_assert (bb_vinfo
);
1276 bb
= BB_VINFO_BB (bb_vinfo
);
1277 gsi_bb_start
= gsi_after_labels (bb
);
1278 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1282 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE
, vect_location
,
1285 "created new init_stmt: ");
1286 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1287 dump_printf (MSG_NOTE
, "\n");
1291 /* Function vect_init_vector.
1293 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1294 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1295 vector type a vector with all elements equal to VAL is created first.
1296 Place the initialization at BSI if it is not NULL. Otherwise, place the
1297 initialization at the loop preheader.
1298 Return the DEF of INIT_STMT.
1299 It will be used in the vectorization of STMT. */
1302 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1309 if (TREE_CODE (type
) == VECTOR_TYPE
1310 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1312 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1314 if (CONSTANT_CLASS_P (val
))
1315 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1318 new_temp
= make_ssa_name (TREE_TYPE (type
), NULL
);
1319 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1322 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1326 val
= build_vector_from_val (type
, val
);
1329 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1330 init_stmt
= gimple_build_assign (new_var
, val
);
1331 new_temp
= make_ssa_name (new_var
, init_stmt
);
1332 gimple_assign_set_lhs (init_stmt
, new_temp
);
1333 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1334 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1339 /* Function vect_get_vec_def_for_operand.
1341 OP is an operand in STMT. This function returns a (vector) def that will be
1342 used in the vectorized stmt for STMT.
1344 In the case that OP is an SSA_NAME which is defined in the loop, then
1345 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1347 In case OP is an invariant or constant, a new stmt that creates a vector def
1348 needs to be introduced. */
1351 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1356 stmt_vec_info def_stmt_info
= NULL
;
1357 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1358 unsigned int nunits
;
1359 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1361 enum vect_def_type dt
;
1365 if (dump_enabled_p ())
1367 dump_printf_loc (MSG_NOTE
, vect_location
,
1368 "vect_get_vec_def_for_operand: ");
1369 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1370 dump_printf (MSG_NOTE
, "\n");
1373 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1374 &def_stmt
, &def
, &dt
);
1375 gcc_assert (is_simple_use
);
1376 if (dump_enabled_p ())
1378 int loc_printed
= 0;
1381 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1383 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1384 dump_printf (MSG_NOTE
, "\n");
1389 dump_printf (MSG_NOTE
, " def_stmt = ");
1391 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1392 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1393 dump_printf (MSG_NOTE
, "\n");
1399 /* Case 1: operand is a constant. */
1400 case vect_constant_def
:
1402 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1403 gcc_assert (vector_type
);
1404 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1409 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1410 if (dump_enabled_p ())
1411 dump_printf_loc (MSG_NOTE
, vect_location
,
1412 "Create vector_cst. nunits = %d\n", nunits
);
1414 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1417 /* Case 2: operand is defined outside the loop - loop invariant. */
1418 case vect_external_def
:
1420 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1421 gcc_assert (vector_type
);
1426 /* Create 'vec_inv = {inv,inv,..,inv}' */
1427 if (dump_enabled_p ())
1428 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1430 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1433 /* Case 3: operand is defined inside the loop. */
1434 case vect_internal_def
:
1437 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1439 /* Get the def from the vectorized stmt. */
1440 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1442 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1443 /* Get vectorized pattern statement. */
1445 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1446 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1447 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1448 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1449 gcc_assert (vec_stmt
);
1450 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1451 vec_oprnd
= PHI_RESULT (vec_stmt
);
1452 else if (is_gimple_call (vec_stmt
))
1453 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1455 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1459 /* Case 4: operand is defined by a loop header phi - reduction */
1460 case vect_reduction_def
:
1461 case vect_double_reduction_def
:
1462 case vect_nested_cycle
:
1466 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1467 loop
= (gimple_bb (def_stmt
))->loop_father
;
1469 /* Get the def before the loop */
1470 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1471 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1474 /* Case 5: operand is defined by loop-header phi - induction. */
1475 case vect_induction_def
:
1477 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1479 /* Get the def from the vectorized stmt. */
1480 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1481 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1482 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1483 vec_oprnd
= PHI_RESULT (vec_stmt
);
1485 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1495 /* Function vect_get_vec_def_for_stmt_copy
1497 Return a vector-def for an operand. This function is used when the
1498 vectorized stmt to be created (by the caller to this function) is a "copy"
1499 created in case the vectorized result cannot fit in one vector, and several
1500 copies of the vector-stmt are required. In this case the vector-def is
1501 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1502 of the stmt that defines VEC_OPRND.
1503 DT is the type of the vector def VEC_OPRND.
1506 In case the vectorization factor (VF) is bigger than the number
1507 of elements that can fit in a vectype (nunits), we have to generate
1508 more than one vector stmt to vectorize the scalar stmt. This situation
1509 arises when there are multiple data-types operated upon in the loop; the
1510 smallest data-type determines the VF, and as a result, when vectorizing
1511 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1512 vector stmt (each computing a vector of 'nunits' results, and together
1513 computing 'VF' results in each iteration). This function is called when
1514 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1515 which VF=16 and nunits=4, so the number of copies required is 4):
1517 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1519 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1520 VS1.1: vx.1 = memref1 VS1.2
1521 VS1.2: vx.2 = memref2 VS1.3
1522 VS1.3: vx.3 = memref3
1524 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1525 VSnew.1: vz1 = vx.1 + ... VSnew.2
1526 VSnew.2: vz2 = vx.2 + ... VSnew.3
1527 VSnew.3: vz3 = vx.3 + ...
1529 The vectorization of S1 is explained in vectorizable_load.
1530 The vectorization of S2:
1531 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1532 the function 'vect_get_vec_def_for_operand' is called to
1533 get the relevant vector-def for each operand of S2. For operand x it
1534 returns the vector-def 'vx.0'.
1536 To create the remaining copies of the vector-stmt (VSnew.j), this
1537 function is called to get the relevant vector-def for each operand. It is
1538 obtained from the respective VS1.j stmt, which is recorded in the
1539 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1541 For example, to obtain the vector-def 'vx.1' in order to create the
1542 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1543 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1544 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1545 and return its def ('vx.1').
1546 Overall, to create the above sequence this function will be called 3 times:
1547 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1548 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1549 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1552 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1554 gimple vec_stmt_for_operand
;
1555 stmt_vec_info def_stmt_info
;
1557 /* Do nothing; can reuse same def. */
1558 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1561 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1562 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1563 gcc_assert (def_stmt_info
);
1564 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1565 gcc_assert (vec_stmt_for_operand
);
1566 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1567 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1568 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1570 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1575 /* Get vectorized definitions for the operands to create a copy of an original
1576 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1579 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1580 vec
<tree
> *vec_oprnds0
,
1581 vec
<tree
> *vec_oprnds1
)
1583 tree vec_oprnd
= vec_oprnds0
->pop ();
1585 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1586 vec_oprnds0
->quick_push (vec_oprnd
);
1588 if (vec_oprnds1
&& vec_oprnds1
->length ())
1590 vec_oprnd
= vec_oprnds1
->pop ();
1591 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1592 vec_oprnds1
->quick_push (vec_oprnd
);
1597 /* Get vectorized definitions for OP0 and OP1.
1598 REDUC_INDEX is the index of reduction operand in case of reduction,
1599 and -1 otherwise. */
1602 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1603 vec
<tree
> *vec_oprnds0
,
1604 vec
<tree
> *vec_oprnds1
,
1605 slp_tree slp_node
, int reduc_index
)
1609 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1610 auto_vec
<tree
> ops (nops
);
1611 auto_vec
<vec
<tree
> > vec_defs (nops
);
1613 ops
.quick_push (op0
);
1615 ops
.quick_push (op1
);
1617 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1619 *vec_oprnds0
= vec_defs
[0];
1621 *vec_oprnds1
= vec_defs
[1];
1627 vec_oprnds0
->create (1);
1628 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1629 vec_oprnds0
->quick_push (vec_oprnd
);
1633 vec_oprnds1
->create (1);
1634 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1635 vec_oprnds1
->quick_push (vec_oprnd
);
1641 /* Function vect_finish_stmt_generation.
1643 Insert a new stmt. */
1646 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1647 gimple_stmt_iterator
*gsi
)
1649 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1650 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1651 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1653 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1655 if (!gsi_end_p (*gsi
)
1656 && gimple_has_mem_ops (vec_stmt
))
1658 gimple at_stmt
= gsi_stmt (*gsi
);
1659 tree vuse
= gimple_vuse (at_stmt
);
1660 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1662 tree vdef
= gimple_vdef (at_stmt
);
1663 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1664 /* If we have an SSA vuse and insert a store, update virtual
1665 SSA form to avoid triggering the renamer. Do so only
1666 if we can easily see all uses - which is what almost always
1667 happens with the way vectorized stmts are inserted. */
1668 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1669 && ((is_gimple_assign (vec_stmt
)
1670 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1671 || (is_gimple_call (vec_stmt
)
1672 && !(gimple_call_flags (vec_stmt
)
1673 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1675 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1676 gimple_set_vdef (vec_stmt
, new_vdef
);
1677 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1681 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1683 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1686 if (dump_enabled_p ())
1688 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1689 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1690 dump_printf (MSG_NOTE
, "\n");
1693 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1696 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1697 a function declaration if the target has a vectorized version
1698 of the function, or NULL_TREE if the function cannot be vectorized. */
1701 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1703 tree fndecl
= gimple_call_fndecl (call
);
1705 /* We only handle functions that do not read or clobber memory -- i.e.
1706 const or novops ones. */
1707 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1711 || TREE_CODE (fndecl
) != FUNCTION_DECL
1712 || !DECL_BUILT_IN (fndecl
))
1715 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1720 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
1721 gimple_stmt_iterator
*);
1724 /* Function vectorizable_mask_load_store.
1726 Check if STMT performs a conditional load or store that can be vectorized.
1727 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1728 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1729 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1732 vectorizable_mask_load_store (gimple stmt
, gimple_stmt_iterator
*gsi
,
1733 gimple
*vec_stmt
, slp_tree slp_node
)
1735 tree vec_dest
= NULL
;
1736 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1737 stmt_vec_info prev_stmt_info
;
1738 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1739 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1740 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1741 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1742 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1746 tree dataref_ptr
= NULL_TREE
;
1748 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1752 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1753 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1754 int gather_scale
= 1;
1755 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1760 enum vect_def_type dt
;
1762 if (slp_node
!= NULL
)
1765 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1766 gcc_assert (ncopies
>= 1);
1768 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1769 mask
= gimple_call_arg (stmt
, 2);
1770 if (TYPE_PRECISION (TREE_TYPE (mask
))
1771 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))))
1774 /* FORNOW. This restriction should be relaxed. */
1775 if (nested_in_vect_loop
&& ncopies
> 1)
1777 if (dump_enabled_p ())
1778 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1779 "multiple types in nested loop.");
1783 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1786 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1789 if (!STMT_VINFO_DATA_REF (stmt_info
))
1792 elem_type
= TREE_TYPE (vectype
);
1794 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1797 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1800 if (STMT_VINFO_GATHER_P (stmt_info
))
1804 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
1805 &gather_off
, &gather_scale
);
1806 gcc_assert (gather_decl
);
1807 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, NULL
,
1808 &def_stmt
, &def
, &gather_dt
,
1809 &gather_off_vectype
))
1811 if (dump_enabled_p ())
1812 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1813 "gather index use not simple.");
1817 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1819 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1820 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1822 if (dump_enabled_p ())
1823 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1824 "masked gather with integer mask not supported.");
1828 else if (tree_int_cst_compare (nested_in_vect_loop
1829 ? STMT_VINFO_DR_STEP (stmt_info
)
1830 : DR_STEP (dr
), size_zero_node
) <= 0)
1832 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1833 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
), !is_store
))
1836 if (TREE_CODE (mask
) != SSA_NAME
)
1839 if (!vect_is_simple_use (mask
, stmt
, loop_vinfo
, NULL
,
1840 &def_stmt
, &def
, &dt
))
1845 tree rhs
= gimple_call_arg (stmt
, 3);
1846 if (!vect_is_simple_use (rhs
, stmt
, loop_vinfo
, NULL
,
1847 &def_stmt
, &def
, &dt
))
1851 if (!vec_stmt
) /* transformation not required. */
1853 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1855 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1858 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1864 if (STMT_VINFO_GATHER_P (stmt_info
))
1866 tree vec_oprnd0
= NULL_TREE
, op
;
1867 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1868 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1869 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1870 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1871 tree mask_perm_mask
= NULL_TREE
;
1872 edge pe
= loop_preheader_edge (loop
);
1875 enum { NARROW
, NONE
, WIDEN
} modifier
;
1876 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1878 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1879 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1880 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1881 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1882 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1883 scaletype
= TREE_VALUE (arglist
);
1884 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1885 && types_compatible_p (srctype
, masktype
));
1887 if (nunits
== gather_off_nunits
)
1889 else if (nunits
== gather_off_nunits
/ 2)
1891 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1894 for (i
= 0; i
< gather_off_nunits
; ++i
)
1895 sel
[i
] = i
| nunits
;
1897 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
1898 gcc_assert (perm_mask
!= NULL_TREE
);
1900 else if (nunits
== gather_off_nunits
* 2)
1902 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1905 for (i
= 0; i
< nunits
; ++i
)
1906 sel
[i
] = i
< gather_off_nunits
1907 ? i
: i
+ nunits
- gather_off_nunits
;
1909 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
1910 gcc_assert (perm_mask
!= NULL_TREE
);
1912 for (i
= 0; i
< nunits
; ++i
)
1913 sel
[i
] = i
| gather_off_nunits
;
1914 mask_perm_mask
= vect_gen_perm_mask (masktype
, sel
);
1915 gcc_assert (mask_perm_mask
!= NULL_TREE
);
1920 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1922 ptr
= fold_convert (ptrtype
, gather_base
);
1923 if (!is_gimple_min_invariant (ptr
))
1925 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1926 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1927 gcc_assert (!new_bb
);
1930 scale
= build_int_cst (scaletype
, gather_scale
);
1932 prev_stmt_info
= NULL
;
1933 for (j
= 0; j
< ncopies
; ++j
)
1935 if (modifier
== WIDEN
&& (j
& 1))
1936 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1937 perm_mask
, stmt
, gsi
);
1940 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
1943 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1945 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1947 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1948 == TYPE_VECTOR_SUBPARTS (idxtype
));
1949 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
1950 var
= make_ssa_name (var
, NULL
);
1951 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1953 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
1955 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1959 if (mask_perm_mask
&& (j
& 1))
1960 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1961 mask_perm_mask
, stmt
, gsi
);
1965 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
1968 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
,
1969 &def_stmt
, &def
, &dt
);
1970 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
1974 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
1976 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
1977 == TYPE_VECTOR_SUBPARTS (masktype
));
1978 var
= vect_get_new_vect_var (masktype
, vect_simple_var
,
1980 var
= make_ssa_name (var
, NULL
);
1981 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
1983 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
1984 mask_op
, NULL_TREE
);
1985 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1991 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
1994 if (!useless_type_conversion_p (vectype
, rettype
))
1996 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
1997 == TYPE_VECTOR_SUBPARTS (rettype
));
1998 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
1999 op
= make_ssa_name (var
, new_stmt
);
2000 gimple_call_set_lhs (new_stmt
, op
);
2001 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2002 var
= make_ssa_name (vec_dest
, NULL
);
2003 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2005 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
2010 var
= make_ssa_name (vec_dest
, new_stmt
);
2011 gimple_call_set_lhs (new_stmt
, var
);
2014 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2016 if (modifier
== NARROW
)
2023 var
= permute_vec_elements (prev_res
, var
,
2024 perm_mask
, stmt
, gsi
);
2025 new_stmt
= SSA_NAME_DEF_STMT (var
);
2028 if (prev_stmt_info
== NULL
)
2029 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2031 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2032 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2038 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2039 prev_stmt_info
= NULL
;
2040 for (i
= 0; i
< ncopies
; i
++)
2042 unsigned align
, misalign
;
2046 tree rhs
= gimple_call_arg (stmt
, 3);
2047 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
, NULL
);
2048 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2049 /* We should have catched mismatched types earlier. */
2050 gcc_assert (useless_type_conversion_p (vectype
,
2051 TREE_TYPE (vec_rhs
)));
2052 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2053 NULL_TREE
, &dummy
, gsi
,
2054 &ptr_incr
, false, &inv_p
);
2055 gcc_assert (!inv_p
);
2059 vect_is_simple_use (vec_rhs
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2061 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2062 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2064 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2065 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2066 TYPE_SIZE_UNIT (vectype
));
2069 align
= TYPE_ALIGN_UNIT (vectype
);
2070 if (aligned_access_p (dr
))
2072 else if (DR_MISALIGNMENT (dr
) == -1)
2074 align
= TYPE_ALIGN_UNIT (elem_type
);
2078 misalign
= DR_MISALIGNMENT (dr
);
2079 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2082 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2083 gimple_call_arg (stmt
, 1),
2085 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2087 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2089 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2090 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2095 tree vec_mask
= NULL_TREE
;
2096 prev_stmt_info
= NULL
;
2097 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2098 for (i
= 0; i
< ncopies
; i
++)
2100 unsigned align
, misalign
;
2104 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2105 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2106 NULL_TREE
, &dummy
, gsi
,
2107 &ptr_incr
, false, &inv_p
);
2108 gcc_assert (!inv_p
);
2112 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2114 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2115 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2116 TYPE_SIZE_UNIT (vectype
));
2119 align
= TYPE_ALIGN_UNIT (vectype
);
2120 if (aligned_access_p (dr
))
2122 else if (DR_MISALIGNMENT (dr
) == -1)
2124 align
= TYPE_ALIGN_UNIT (elem_type
);
2128 misalign
= DR_MISALIGNMENT (dr
);
2129 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2132 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2133 gimple_call_arg (stmt
, 1),
2135 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
, NULL
));
2136 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2138 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2140 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2141 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2149 /* Function vectorizable_call.
2151 Check if STMT performs a function call that can be vectorized.
2152 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2153 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2154 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2157 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
2163 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2164 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2165 tree vectype_out
, vectype_in
;
2168 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2169 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2170 tree fndecl
, new_temp
, def
, rhs_type
;
2172 enum vect_def_type dt
[3]
2173 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2174 gimple new_stmt
= NULL
;
2176 vec
<tree
> vargs
= vNULL
;
2177 enum { NARROW
, NONE
, WIDEN
} modifier
;
2181 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2184 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2187 /* Is STMT a vectorizable call? */
2188 if (!is_gimple_call (stmt
))
2191 if (gimple_call_internal_p (stmt
)
2192 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2193 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2194 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2197 if (gimple_call_lhs (stmt
) == NULL_TREE
2198 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2201 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2203 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2205 /* Process function arguments. */
2206 rhs_type
= NULL_TREE
;
2207 vectype_in
= NULL_TREE
;
2208 nargs
= gimple_call_num_args (stmt
);
2210 /* Bail out if the function has more than three arguments, we do not have
2211 interesting builtin functions to vectorize with more than two arguments
2212 except for fma. No arguments is also not good. */
2213 if (nargs
== 0 || nargs
> 3)
2216 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2217 if (gimple_call_internal_p (stmt
)
2218 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2221 rhs_type
= unsigned_type_node
;
2224 for (i
= 0; i
< nargs
; i
++)
2228 op
= gimple_call_arg (stmt
, i
);
2230 /* We can only handle calls with arguments of the same type. */
2232 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2234 if (dump_enabled_p ())
2235 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2236 "argument types differ.\n");
2240 rhs_type
= TREE_TYPE (op
);
2242 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2243 &def_stmt
, &def
, &dt
[i
], &opvectype
))
2245 if (dump_enabled_p ())
2246 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2247 "use not simple.\n");
2252 vectype_in
= opvectype
;
2254 && opvectype
!= vectype_in
)
2256 if (dump_enabled_p ())
2257 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2258 "argument vector types differ.\n");
2262 /* If all arguments are external or constant defs use a vector type with
2263 the same size as the output vector type. */
2265 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2267 gcc_assert (vectype_in
);
2270 if (dump_enabled_p ())
2272 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2273 "no vectype for scalar type ");
2274 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2275 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2282 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2283 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2284 if (nunits_in
== nunits_out
/ 2)
2286 else if (nunits_out
== nunits_in
)
2288 else if (nunits_out
== nunits_in
/ 2)
2293 /* For now, we only vectorize functions if a target specific builtin
2294 is available. TODO -- in some cases, it might be profitable to
2295 insert the calls for pieces of the vector, in order to be able
2296 to vectorize other operations in the loop. */
2297 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
2298 if (fndecl
== NULL_TREE
)
2300 if (gimple_call_internal_p (stmt
)
2301 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
2304 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2305 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2306 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2307 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2309 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2310 { 0, 1, 2, ... vf - 1 } vector. */
2311 gcc_assert (nargs
== 0);
2315 if (dump_enabled_p ())
2316 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2317 "function is not vectorizable.\n");
2322 gcc_assert (!gimple_vuse (stmt
));
2324 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2326 else if (modifier
== NARROW
)
2327 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2329 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2331 /* Sanity check: make sure that at least one copy of the vectorized stmt
2332 needs to be generated. */
2333 gcc_assert (ncopies
>= 1);
2335 if (!vec_stmt
) /* transformation not required. */
2337 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2338 if (dump_enabled_p ())
2339 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2341 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2347 if (dump_enabled_p ())
2348 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2351 scalar_dest
= gimple_call_lhs (stmt
);
2352 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2354 prev_stmt_info
= NULL
;
2358 for (j
= 0; j
< ncopies
; ++j
)
2360 /* Build argument list for the vectorized call. */
2362 vargs
.create (nargs
);
2368 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2369 vec
<tree
> vec_oprnds0
;
2371 for (i
= 0; i
< nargs
; i
++)
2372 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2373 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2374 vec_oprnds0
= vec_defs
[0];
2376 /* Arguments are ready. Create the new vector stmt. */
2377 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2380 for (k
= 0; k
< nargs
; k
++)
2382 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2383 vargs
[k
] = vec_oprndsk
[i
];
2385 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2386 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2387 gimple_call_set_lhs (new_stmt
, new_temp
);
2388 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2389 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2392 for (i
= 0; i
< nargs
; i
++)
2394 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2395 vec_oprndsi
.release ();
2400 for (i
= 0; i
< nargs
; i
++)
2402 op
= gimple_call_arg (stmt
, i
);
2405 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2408 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2410 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2413 vargs
.quick_push (vec_oprnd0
);
2416 if (gimple_call_internal_p (stmt
)
2417 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2419 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2421 for (k
= 0; k
< nunits_out
; ++k
)
2422 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2423 tree cst
= build_vector (vectype_out
, v
);
2425 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
2426 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
2427 new_temp
= make_ssa_name (new_var
, init_stmt
);
2428 gimple_assign_set_lhs (init_stmt
, new_temp
);
2429 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2430 new_temp
= make_ssa_name (vec_dest
, NULL
);
2431 new_stmt
= gimple_build_assign (new_temp
,
2432 gimple_assign_lhs (init_stmt
));
2436 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2437 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2438 gimple_call_set_lhs (new_stmt
, new_temp
);
2440 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2443 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2445 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2447 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2453 for (j
= 0; j
< ncopies
; ++j
)
2455 /* Build argument list for the vectorized call. */
2457 vargs
.create (nargs
* 2);
2463 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2464 vec
<tree
> vec_oprnds0
;
2466 for (i
= 0; i
< nargs
; i
++)
2467 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2468 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2469 vec_oprnds0
= vec_defs
[0];
2471 /* Arguments are ready. Create the new vector stmt. */
2472 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2476 for (k
= 0; k
< nargs
; k
++)
2478 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2479 vargs
.quick_push (vec_oprndsk
[i
]);
2480 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2482 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2483 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2484 gimple_call_set_lhs (new_stmt
, new_temp
);
2485 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2486 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2489 for (i
= 0; i
< nargs
; i
++)
2491 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2492 vec_oprndsi
.release ();
2497 for (i
= 0; i
< nargs
; i
++)
2499 op
= gimple_call_arg (stmt
, i
);
2503 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2505 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2509 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2511 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2513 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2516 vargs
.quick_push (vec_oprnd0
);
2517 vargs
.quick_push (vec_oprnd1
);
2520 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2521 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2522 gimple_call_set_lhs (new_stmt
, new_temp
);
2523 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2526 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2528 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2530 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2533 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2538 /* No current target implements this case. */
2544 /* The call in STMT might prevent it from being removed in dce.
2545 We however cannot remove it here, due to the way the ssa name
2546 it defines is mapped to the new definition. So just replace
2547 rhs of the statement with something harmless. */
2552 type
= TREE_TYPE (scalar_dest
);
2553 if (is_pattern_stmt_p (stmt_info
))
2554 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2556 lhs
= gimple_call_lhs (stmt
);
2557 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2558 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2559 set_vinfo_for_stmt (stmt
, NULL
);
2560 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2561 gsi_replace (gsi
, new_stmt
, false);
2567 struct simd_call_arg_info
2571 enum vect_def_type dt
;
2572 HOST_WIDE_INT linear_step
;
2576 /* Function vectorizable_simd_clone_call.
2578 Check if STMT performs a function call that can be vectorized
2579 by calling a simd clone of the function.
2580 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2581 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2582 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2585 vectorizable_simd_clone_call (gimple stmt
, gimple_stmt_iterator
*gsi
,
2586 gimple
*vec_stmt
, slp_tree slp_node
)
2591 tree vec_oprnd0
= NULL_TREE
;
2592 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2594 unsigned int nunits
;
2595 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2596 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2597 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2598 tree fndecl
, new_temp
, def
;
2600 gimple new_stmt
= NULL
;
2602 vec
<simd_call_arg_info
> arginfo
= vNULL
;
2603 vec
<tree
> vargs
= vNULL
;
2605 tree lhs
, rtype
, ratype
;
2606 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2608 /* Is STMT a vectorizable call? */
2609 if (!is_gimple_call (stmt
))
2612 fndecl
= gimple_call_fndecl (stmt
);
2613 if (fndecl
== NULL_TREE
)
2616 struct cgraph_node
*node
= cgraph_get_node (fndecl
);
2617 if (node
== NULL
|| node
->simd_clones
== NULL
)
2620 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2623 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2626 if (gimple_call_lhs (stmt
)
2627 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2630 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2632 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2634 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2638 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2641 /* Process function arguments. */
2642 nargs
= gimple_call_num_args (stmt
);
2644 /* Bail out if the function has zero arguments. */
2648 arginfo
.create (nargs
);
2650 for (i
= 0; i
< nargs
; i
++)
2652 simd_call_arg_info thisarginfo
;
2655 thisarginfo
.linear_step
= 0;
2656 thisarginfo
.align
= 0;
2657 thisarginfo
.op
= NULL_TREE
;
2659 op
= gimple_call_arg (stmt
, i
);
2660 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2661 &def_stmt
, &def
, &thisarginfo
.dt
,
2662 &thisarginfo
.vectype
)
2663 || thisarginfo
.dt
== vect_uninitialized_def
)
2665 if (dump_enabled_p ())
2666 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2667 "use not simple.\n");
2672 if (thisarginfo
.dt
== vect_constant_def
2673 || thisarginfo
.dt
== vect_external_def
)
2674 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2676 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2678 if (thisarginfo
.dt
!= vect_constant_def
2679 && thisarginfo
.dt
!= vect_external_def
2681 && TREE_CODE (op
) == SSA_NAME
2682 && simple_iv (loop
, loop_containing_stmt (stmt
), op
, &iv
, false)
2683 && tree_fits_shwi_p (iv
.step
))
2685 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2686 thisarginfo
.op
= iv
.base
;
2688 else if ((thisarginfo
.dt
== vect_constant_def
2689 || thisarginfo
.dt
== vect_external_def
)
2690 && POINTER_TYPE_P (TREE_TYPE (op
)))
2691 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2693 arginfo
.quick_push (thisarginfo
);
2696 unsigned int badness
= 0;
2697 struct cgraph_node
*bestn
= NULL
;
2698 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info
))
2699 bestn
= cgraph_get_node (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info
));
2701 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2702 n
= n
->simdclone
->next_clone
)
2704 unsigned int this_badness
= 0;
2705 if (n
->simdclone
->simdlen
2706 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2707 || n
->simdclone
->nargs
!= nargs
)
2709 if (n
->simdclone
->simdlen
2710 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2711 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2712 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2713 if (n
->simdclone
->inbranch
)
2714 this_badness
+= 2048;
2715 int target_badness
= targetm
.simd_clone
.usable (n
);
2716 if (target_badness
< 0)
2718 this_badness
+= target_badness
* 512;
2719 /* FORNOW: Have to add code to add the mask argument. */
2720 if (n
->simdclone
->inbranch
)
2722 for (i
= 0; i
< nargs
; i
++)
2724 switch (n
->simdclone
->args
[i
].arg_type
)
2726 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2727 if (!useless_type_conversion_p
2728 (n
->simdclone
->args
[i
].orig_type
,
2729 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2731 else if (arginfo
[i
].dt
== vect_constant_def
2732 || arginfo
[i
].dt
== vect_external_def
2733 || arginfo
[i
].linear_step
)
2736 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2737 if (arginfo
[i
].dt
!= vect_constant_def
2738 && arginfo
[i
].dt
!= vect_external_def
)
2741 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2742 if (arginfo
[i
].dt
== vect_constant_def
2743 || arginfo
[i
].dt
== vect_external_def
2744 || (arginfo
[i
].linear_step
2745 != n
->simdclone
->args
[i
].linear_step
))
2748 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2752 case SIMD_CLONE_ARG_TYPE_MASK
:
2755 if (i
== (size_t) -1)
2757 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
2762 if (arginfo
[i
].align
)
2763 this_badness
+= (exact_log2 (arginfo
[i
].align
)
2764 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
2766 if (i
== (size_t) -1)
2768 if (bestn
== NULL
|| this_badness
< badness
)
2771 badness
= this_badness
;
2781 for (i
= 0; i
< nargs
; i
++)
2782 if ((arginfo
[i
].dt
== vect_constant_def
2783 || arginfo
[i
].dt
== vect_external_def
)
2784 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
2787 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
2789 if (arginfo
[i
].vectype
== NULL
2790 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2791 > bestn
->simdclone
->simdlen
))
2798 fndecl
= bestn
->decl
;
2799 nunits
= bestn
->simdclone
->simdlen
;
2800 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2802 /* If the function isn't const, only allow it in simd loops where user
2803 has asserted that at least nunits consecutive iterations can be
2804 performed using SIMD instructions. */
2805 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
2806 && gimple_vuse (stmt
))
2812 /* Sanity check: make sure that at least one copy of the vectorized stmt
2813 needs to be generated. */
2814 gcc_assert (ncopies
>= 1);
2816 if (!vec_stmt
) /* transformation not required. */
2818 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info
) = bestn
->decl
;
2819 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
2820 if (dump_enabled_p ())
2821 dump_printf_loc (MSG_NOTE
, vect_location
,
2822 "=== vectorizable_simd_clone_call ===\n");
2823 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2830 if (dump_enabled_p ())
2831 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2834 scalar_dest
= gimple_call_lhs (stmt
);
2835 vec_dest
= NULL_TREE
;
2840 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2841 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
2842 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
2845 rtype
= TREE_TYPE (ratype
);
2849 prev_stmt_info
= NULL
;
2850 for (j
= 0; j
< ncopies
; ++j
)
2852 /* Build argument list for the vectorized call. */
2854 vargs
.create (nargs
);
2858 for (i
= 0; i
< nargs
; i
++)
2860 unsigned int k
, l
, m
, o
;
2862 op
= gimple_call_arg (stmt
, i
);
2863 switch (bestn
->simdclone
->args
[i
].arg_type
)
2865 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2866 atype
= bestn
->simdclone
->args
[i
].vector_type
;
2867 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
2868 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
2870 if (TYPE_VECTOR_SUBPARTS (atype
)
2871 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
2873 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
2874 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2875 / TYPE_VECTOR_SUBPARTS (atype
));
2876 gcc_assert ((k
& (k
- 1)) == 0);
2879 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2882 vec_oprnd0
= arginfo
[i
].op
;
2883 if ((m
& (k
- 1)) == 0)
2885 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2888 arginfo
[i
].op
= vec_oprnd0
;
2890 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
2892 bitsize_int ((m
& (k
- 1)) * prec
));
2894 = gimple_build_assign (make_ssa_name (atype
, NULL
),
2896 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2897 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
2901 k
= (TYPE_VECTOR_SUBPARTS (atype
)
2902 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
2903 gcc_assert ((k
& (k
- 1)) == 0);
2904 vec
<constructor_elt
, va_gc
> *ctor_elts
;
2906 vec_alloc (ctor_elts
, k
);
2909 for (l
= 0; l
< k
; l
++)
2911 if (m
== 0 && l
== 0)
2913 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2916 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2918 arginfo
[i
].op
= vec_oprnd0
;
2921 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
2925 vargs
.safe_push (vec_oprnd0
);
2928 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
2930 = gimple_build_assign (make_ssa_name (atype
, NULL
),
2932 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2933 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
2938 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2939 vargs
.safe_push (op
);
2941 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2946 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
2951 edge pe
= loop_preheader_edge (loop
);
2952 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2953 gcc_assert (!new_bb
);
2955 tree phi_res
= copy_ssa_name (op
, NULL
);
2956 gimple new_phi
= create_phi_node (phi_res
, loop
->header
);
2957 set_vinfo_for_stmt (new_phi
,
2958 new_stmt_vec_info (new_phi
, loop_vinfo
,
2960 add_phi_arg (new_phi
, arginfo
[i
].op
,
2961 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
2963 = POINTER_TYPE_P (TREE_TYPE (op
))
2964 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
2965 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
2966 ? sizetype
: TREE_TYPE (op
);
2968 = double_int::from_shwi
2969 (bestn
->simdclone
->args
[i
].linear_step
);
2970 cst
*= double_int::from_uhwi (ncopies
* nunits
);
2971 tree tcst
= double_int_to_tree (type
, cst
);
2972 tree phi_arg
= copy_ssa_name (op
, NULL
);
2973 new_stmt
= gimple_build_assign_with_ops (code
, phi_arg
,
2975 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
2976 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
2977 set_vinfo_for_stmt (new_stmt
,
2978 new_stmt_vec_info (new_stmt
, loop_vinfo
,
2980 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
2982 arginfo
[i
].op
= phi_res
;
2983 vargs
.safe_push (phi_res
);
2988 = POINTER_TYPE_P (TREE_TYPE (op
))
2989 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
2990 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
2991 ? sizetype
: TREE_TYPE (op
);
2993 = double_int::from_shwi
2994 (bestn
->simdclone
->args
[i
].linear_step
);
2995 cst
*= double_int::from_uhwi (j
* nunits
);
2996 tree tcst
= double_int_to_tree (type
, cst
);
2997 new_temp
= make_ssa_name (TREE_TYPE (op
), NULL
);
2999 = gimple_build_assign_with_ops (code
, new_temp
,
3000 arginfo
[i
].op
, tcst
);
3001 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3002 vargs
.safe_push (new_temp
);
3005 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3011 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3014 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3016 new_temp
= create_tmp_var (ratype
, NULL
);
3017 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3018 == TYPE_VECTOR_SUBPARTS (rtype
))
3019 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3021 new_temp
= make_ssa_name (rtype
, new_stmt
);
3022 gimple_call_set_lhs (new_stmt
, new_temp
);
3024 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3028 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3031 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3032 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3033 gcc_assert ((k
& (k
- 1)) == 0);
3034 for (l
= 0; l
< k
; l
++)
3039 t
= build_fold_addr_expr (new_temp
);
3040 t
= build2 (MEM_REF
, vectype
, t
,
3041 build_int_cst (TREE_TYPE (t
),
3042 l
* prec
/ BITS_PER_UNIT
));
3045 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3046 size_int (prec
), bitsize_int (l
* prec
));
3048 = gimple_build_assign (make_ssa_name (vectype
, NULL
), t
);
3049 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3050 if (j
== 0 && l
== 0)
3051 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3053 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3055 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3060 tree clobber
= build_constructor (ratype
, NULL
);
3061 TREE_THIS_VOLATILE (clobber
) = 1;
3062 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3063 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3067 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3069 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3070 / TYPE_VECTOR_SUBPARTS (rtype
));
3071 gcc_assert ((k
& (k
- 1)) == 0);
3072 if ((j
& (k
- 1)) == 0)
3073 vec_alloc (ret_ctor_elts
, k
);
3076 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3077 for (m
= 0; m
< o
; m
++)
3079 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3080 size_int (m
), NULL_TREE
, NULL_TREE
);
3082 = gimple_build_assign (make_ssa_name (rtype
, NULL
),
3084 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3085 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3086 gimple_assign_lhs (new_stmt
));
3088 tree clobber
= build_constructor (ratype
, NULL
);
3089 TREE_THIS_VOLATILE (clobber
) = 1;
3090 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3091 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3094 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3095 if ((j
& (k
- 1)) != k
- 1)
3097 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3099 = gimple_build_assign (make_ssa_name (vec_dest
, NULL
),
3101 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3103 if ((unsigned) j
== k
- 1)
3104 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3106 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3108 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3113 tree t
= build_fold_addr_expr (new_temp
);
3114 t
= build2 (MEM_REF
, vectype
, t
,
3115 build_int_cst (TREE_TYPE (t
), 0));
3117 = gimple_build_assign (make_ssa_name (vec_dest
, NULL
), t
);
3118 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3119 tree clobber
= build_constructor (ratype
, NULL
);
3120 TREE_THIS_VOLATILE (clobber
) = 1;
3121 vect_finish_stmt_generation (stmt
,
3122 gimple_build_assign (new_temp
,
3128 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3130 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3132 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3137 /* The call in STMT might prevent it from being removed in dce.
3138 We however cannot remove it here, due to the way the ssa name
3139 it defines is mapped to the new definition. So just replace
3140 rhs of the statement with something harmless. */
3147 type
= TREE_TYPE (scalar_dest
);
3148 if (is_pattern_stmt_p (stmt_info
))
3149 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3151 lhs
= gimple_call_lhs (stmt
);
3152 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3155 new_stmt
= gimple_build_nop ();
3156 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3157 set_vinfo_for_stmt (stmt
, NULL
);
3158 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3159 gsi_replace (gsi
, new_stmt
, false);
3160 unlink_stmt_vdef (stmt
);
3166 /* Function vect_gen_widened_results_half
3168 Create a vector stmt whose code, type, number of arguments, and result
3169 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3170 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3171 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3172 needs to be created (DECL is a function-decl of a target-builtin).
3173 STMT is the original scalar stmt that we are vectorizing. */
3176 vect_gen_widened_results_half (enum tree_code code
,
3178 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3179 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3185 /* Generate half of the widened result: */
3186 if (code
== CALL_EXPR
)
3188 /* Target specific support */
3189 if (op_type
== binary_op
)
3190 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3192 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3193 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3194 gimple_call_set_lhs (new_stmt
, new_temp
);
3198 /* Generic support */
3199 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3200 if (op_type
!= binary_op
)
3202 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
3204 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3205 gimple_assign_set_lhs (new_stmt
, new_temp
);
3207 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3213 /* Get vectorized definitions for loop-based vectorization. For the first
3214 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3215 scalar operand), and for the rest we get a copy with
3216 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3217 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3218 The vectors are collected into VEC_OPRNDS. */
3221 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
3222 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3226 /* Get first vector operand. */
3227 /* All the vector operands except the very first one (that is scalar oprnd)
3229 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3230 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
3232 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3234 vec_oprnds
->quick_push (vec_oprnd
);
3236 /* Get second vector operand. */
3237 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3238 vec_oprnds
->quick_push (vec_oprnd
);
3242 /* For conversion in multiple steps, continue to get operands
3245 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3249 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3250 For multi-step conversions store the resulting vectors and call the function
3254 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3255 int multi_step_cvt
, gimple stmt
,
3257 gimple_stmt_iterator
*gsi
,
3258 slp_tree slp_node
, enum tree_code code
,
3259 stmt_vec_info
*prev_stmt_info
)
3262 tree vop0
, vop1
, new_tmp
, vec_dest
;
3264 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3266 vec_dest
= vec_dsts
.pop ();
3268 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3270 /* Create demotion operation. */
3271 vop0
= (*vec_oprnds
)[i
];
3272 vop1
= (*vec_oprnds
)[i
+ 1];
3273 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3274 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3275 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3276 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3279 /* Store the resulting vector for next recursive call. */
3280 (*vec_oprnds
)[i
/2] = new_tmp
;
3283 /* This is the last step of the conversion sequence. Store the
3284 vectors in SLP_NODE or in vector info of the scalar statement
3285 (or in STMT_VINFO_RELATED_STMT chain). */
3287 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3290 if (!*prev_stmt_info
)
3291 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3293 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3295 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3300 /* For multi-step demotion operations we first generate demotion operations
3301 from the source type to the intermediate types, and then combine the
3302 results (stored in VEC_OPRNDS) in demotion operation to the destination
3306 /* At each level of recursion we have half of the operands we had at the
3308 vec_oprnds
->truncate ((i
+1)/2);
3309 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3310 stmt
, vec_dsts
, gsi
, slp_node
,
3311 VEC_PACK_TRUNC_EXPR
,
3315 vec_dsts
.quick_push (vec_dest
);
3319 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3320 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3321 the resulting vectors and call the function recursively. */
3324 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3325 vec
<tree
> *vec_oprnds1
,
3326 gimple stmt
, tree vec_dest
,
3327 gimple_stmt_iterator
*gsi
,
3328 enum tree_code code1
,
3329 enum tree_code code2
, tree decl1
,
3330 tree decl2
, int op_type
)
3333 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3334 gimple new_stmt1
, new_stmt2
;
3335 vec
<tree
> vec_tmp
= vNULL
;
3337 vec_tmp
.create (vec_oprnds0
->length () * 2);
3338 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3340 if (op_type
== binary_op
)
3341 vop1
= (*vec_oprnds1
)[i
];
3345 /* Generate the two halves of promotion operation. */
3346 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3347 op_type
, vec_dest
, gsi
, stmt
);
3348 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3349 op_type
, vec_dest
, gsi
, stmt
);
3350 if (is_gimple_call (new_stmt1
))
3352 new_tmp1
= gimple_call_lhs (new_stmt1
);
3353 new_tmp2
= gimple_call_lhs (new_stmt2
);
3357 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3358 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3361 /* Store the results for the next step. */
3362 vec_tmp
.quick_push (new_tmp1
);
3363 vec_tmp
.quick_push (new_tmp2
);
3366 vec_oprnds0
->release ();
3367 *vec_oprnds0
= vec_tmp
;
3371 /* Check if STMT performs a conversion operation, that can be vectorized.
3372 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3373 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3374 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3377 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
3378 gimple
*vec_stmt
, slp_tree slp_node
)
3382 tree op0
, op1
= NULL_TREE
;
3383 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3384 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3385 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3386 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3387 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3388 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3392 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3393 gimple new_stmt
= NULL
;
3394 stmt_vec_info prev_stmt_info
;
3397 tree vectype_out
, vectype_in
;
3399 tree lhs_type
, rhs_type
;
3400 enum { NARROW
, NONE
, WIDEN
} modifier
;
3401 vec
<tree
> vec_oprnds0
= vNULL
;
3402 vec
<tree
> vec_oprnds1
= vNULL
;
3404 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3405 int multi_step_cvt
= 0;
3406 vec
<tree
> vec_dsts
= vNULL
;
3407 vec
<tree
> interm_types
= vNULL
;
3408 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3410 enum machine_mode rhs_mode
;
3411 unsigned short fltsz
;
3413 /* Is STMT a vectorizable conversion? */
3415 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3418 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3421 if (!is_gimple_assign (stmt
))
3424 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3427 code
= gimple_assign_rhs_code (stmt
);
3428 if (!CONVERT_EXPR_CODE_P (code
)
3429 && code
!= FIX_TRUNC_EXPR
3430 && code
!= FLOAT_EXPR
3431 && code
!= WIDEN_MULT_EXPR
3432 && code
!= WIDEN_LSHIFT_EXPR
)
3435 op_type
= TREE_CODE_LENGTH (code
);
3437 /* Check types of lhs and rhs. */
3438 scalar_dest
= gimple_assign_lhs (stmt
);
3439 lhs_type
= TREE_TYPE (scalar_dest
);
3440 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3442 op0
= gimple_assign_rhs1 (stmt
);
3443 rhs_type
= TREE_TYPE (op0
);
3445 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3446 && !((INTEGRAL_TYPE_P (lhs_type
)
3447 && INTEGRAL_TYPE_P (rhs_type
))
3448 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3449 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3452 if ((INTEGRAL_TYPE_P (lhs_type
)
3453 && (TYPE_PRECISION (lhs_type
)
3454 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3455 || (INTEGRAL_TYPE_P (rhs_type
)
3456 && (TYPE_PRECISION (rhs_type
)
3457 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
3459 if (dump_enabled_p ())
3460 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3461 "type conversion to/from bit-precision unsupported."
3466 /* Check the operands of the operation. */
3467 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3468 &def_stmt
, &def
, &dt
[0], &vectype_in
))
3470 if (dump_enabled_p ())
3471 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3472 "use not simple.\n");
3475 if (op_type
== binary_op
)
3479 op1
= gimple_assign_rhs2 (stmt
);
3480 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3481 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3483 if (CONSTANT_CLASS_P (op0
))
3484 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
3485 &def_stmt
, &def
, &dt
[1], &vectype_in
);
3487 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3492 if (dump_enabled_p ())
3493 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3494 "use not simple.\n");
3499 /* If op0 is an external or constant defs use a vector type of
3500 the same size as the output vector type. */
3502 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3504 gcc_assert (vectype_in
);
3507 if (dump_enabled_p ())
3509 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3510 "no vectype for scalar type ");
3511 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3512 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3518 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3519 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3520 if (nunits_in
< nunits_out
)
3522 else if (nunits_out
== nunits_in
)
3527 /* Multiple types in SLP are handled by creating the appropriate number of
3528 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3530 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3532 else if (modifier
== NARROW
)
3533 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3535 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3537 /* Sanity check: make sure that at least one copy of the vectorized stmt
3538 needs to be generated. */
3539 gcc_assert (ncopies
>= 1);
3541 /* Supportable by target? */
3545 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3547 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3552 if (dump_enabled_p ())
3553 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3554 "conversion not supported by target.\n");
3558 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3559 &code1
, &code2
, &multi_step_cvt
,
3562 /* Binary widening operation can only be supported directly by the
3564 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3568 if (code
!= FLOAT_EXPR
3569 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3570 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3573 rhs_mode
= TYPE_MODE (rhs_type
);
3574 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3575 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3576 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3577 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3580 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3581 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3582 if (cvt_type
== NULL_TREE
)
3585 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3587 if (!supportable_convert_operation (code
, vectype_out
,
3588 cvt_type
, &decl1
, &codecvt1
))
3591 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3592 cvt_type
, &codecvt1
,
3593 &codecvt2
, &multi_step_cvt
,
3597 gcc_assert (multi_step_cvt
== 0);
3599 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3600 vectype_in
, &code1
, &code2
,
3601 &multi_step_cvt
, &interm_types
))
3605 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3608 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3609 codecvt2
= ERROR_MARK
;
3613 interm_types
.safe_push (cvt_type
);
3614 cvt_type
= NULL_TREE
;
3619 gcc_assert (op_type
== unary_op
);
3620 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3621 &code1
, &multi_step_cvt
,
3625 if (code
!= FIX_TRUNC_EXPR
3626 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3627 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3630 rhs_mode
= TYPE_MODE (rhs_type
);
3632 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3633 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3634 if (cvt_type
== NULL_TREE
)
3636 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3639 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3640 &code1
, &multi_step_cvt
,
3649 if (!vec_stmt
) /* transformation not required. */
3651 if (dump_enabled_p ())
3652 dump_printf_loc (MSG_NOTE
, vect_location
,
3653 "=== vectorizable_conversion ===\n");
3654 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3656 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3657 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3659 else if (modifier
== NARROW
)
3661 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3662 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3666 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3667 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3669 interm_types
.release ();
3674 if (dump_enabled_p ())
3675 dump_printf_loc (MSG_NOTE
, vect_location
,
3676 "transform conversion. ncopies = %d.\n", ncopies
);
3678 if (op_type
== binary_op
)
3680 if (CONSTANT_CLASS_P (op0
))
3681 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3682 else if (CONSTANT_CLASS_P (op1
))
3683 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3686 /* In case of multi-step conversion, we first generate conversion operations
3687 to the intermediate types, and then from that types to the final one.
3688 We create vector destinations for the intermediate type (TYPES) received
3689 from supportable_*_operation, and store them in the correct order
3690 for future use in vect_create_vectorized_*_stmts (). */
3691 vec_dsts
.create (multi_step_cvt
+ 1);
3692 vec_dest
= vect_create_destination_var (scalar_dest
,
3693 (cvt_type
&& modifier
== WIDEN
)
3694 ? cvt_type
: vectype_out
);
3695 vec_dsts
.quick_push (vec_dest
);
3699 for (i
= interm_types
.length () - 1;
3700 interm_types
.iterate (i
, &intermediate_type
); i
--)
3702 vec_dest
= vect_create_destination_var (scalar_dest
,
3704 vec_dsts
.quick_push (vec_dest
);
3709 vec_dest
= vect_create_destination_var (scalar_dest
,
3711 ? vectype_out
: cvt_type
);
3715 if (modifier
== WIDEN
)
3717 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3718 if (op_type
== binary_op
)
3719 vec_oprnds1
.create (1);
3721 else if (modifier
== NARROW
)
3722 vec_oprnds0
.create (
3723 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3725 else if (code
== WIDEN_LSHIFT_EXPR
)
3726 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
3729 prev_stmt_info
= NULL
;
3733 for (j
= 0; j
< ncopies
; j
++)
3736 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
3739 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3741 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3743 /* Arguments are ready, create the new vector stmt. */
3744 if (code1
== CALL_EXPR
)
3746 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3747 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3748 gimple_call_set_lhs (new_stmt
, new_temp
);
3752 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
3753 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
3755 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3756 gimple_assign_set_lhs (new_stmt
, new_temp
);
3759 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3761 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3765 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3767 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3768 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3773 /* In case the vectorization factor (VF) is bigger than the number
3774 of elements that we can fit in a vectype (nunits), we have to
3775 generate more than one vector stmt - i.e - we need to "unroll"
3776 the vector stmt by a factor VF/nunits. */
3777 for (j
= 0; j
< ncopies
; j
++)
3784 if (code
== WIDEN_LSHIFT_EXPR
)
3789 /* Store vec_oprnd1 for every vector stmt to be created
3790 for SLP_NODE. We check during the analysis that all
3791 the shift arguments are the same. */
3792 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3793 vec_oprnds1
.quick_push (vec_oprnd1
);
3795 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3799 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
3800 &vec_oprnds1
, slp_node
, -1);
3804 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3805 vec_oprnds0
.quick_push (vec_oprnd0
);
3806 if (op_type
== binary_op
)
3808 if (code
== WIDEN_LSHIFT_EXPR
)
3811 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
3813 vec_oprnds1
.quick_push (vec_oprnd1
);
3819 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3820 vec_oprnds0
.truncate (0);
3821 vec_oprnds0
.quick_push (vec_oprnd0
);
3822 if (op_type
== binary_op
)
3824 if (code
== WIDEN_LSHIFT_EXPR
)
3827 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
3829 vec_oprnds1
.truncate (0);
3830 vec_oprnds1
.quick_push (vec_oprnd1
);
3834 /* Arguments are ready. Create the new vector stmts. */
3835 for (i
= multi_step_cvt
; i
>= 0; i
--)
3837 tree this_dest
= vec_dsts
[i
];
3838 enum tree_code c1
= code1
, c2
= code2
;
3839 if (i
== 0 && codecvt2
!= ERROR_MARK
)
3844 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
3846 stmt
, this_dest
, gsi
,
3847 c1
, c2
, decl1
, decl2
,
3851 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3855 if (codecvt1
== CALL_EXPR
)
3857 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3858 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3859 gimple_call_set_lhs (new_stmt
, new_temp
);
3863 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3864 new_temp
= make_ssa_name (vec_dest
, NULL
);
3865 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
3870 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3873 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
3876 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3879 if (!prev_stmt_info
)
3880 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3882 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3883 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3888 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3892 /* In case the vectorization factor (VF) is bigger than the number
3893 of elements that we can fit in a vectype (nunits), we have to
3894 generate more than one vector stmt - i.e - we need to "unroll"
3895 the vector stmt by a factor VF/nunits. */
3896 for (j
= 0; j
< ncopies
; j
++)
3900 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3904 vec_oprnds0
.truncate (0);
3905 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
3906 vect_pow2 (multi_step_cvt
) - 1);
3909 /* Arguments are ready. Create the new vector stmts. */
3911 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3913 if (codecvt1
== CALL_EXPR
)
3915 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3916 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3917 gimple_call_set_lhs (new_stmt
, new_temp
);
3921 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3922 new_temp
= make_ssa_name (vec_dest
, NULL
);
3923 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
3927 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3928 vec_oprnds0
[i
] = new_temp
;
3931 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
3932 stmt
, vec_dsts
, gsi
,
3937 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3941 vec_oprnds0
.release ();
3942 vec_oprnds1
.release ();
3943 vec_dsts
.release ();
3944 interm_types
.release ();
3950 /* Function vectorizable_assignment.
3952 Check if STMT performs an assignment (copy) that can be vectorized.
3953 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3954 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3955 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3958 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
3959 gimple
*vec_stmt
, slp_tree slp_node
)
3964 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3965 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3966 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3970 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3971 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3974 vec
<tree
> vec_oprnds
= vNULL
;
3976 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3977 gimple new_stmt
= NULL
;
3978 stmt_vec_info prev_stmt_info
= NULL
;
3979 enum tree_code code
;
3982 /* Multiple types in SLP are handled by creating the appropriate number of
3983 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3985 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3988 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3990 gcc_assert (ncopies
>= 1);
3992 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3995 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3998 /* Is vectorizable assignment? */
3999 if (!is_gimple_assign (stmt
))
4002 scalar_dest
= gimple_assign_lhs (stmt
);
4003 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4006 code
= gimple_assign_rhs_code (stmt
);
4007 if (gimple_assign_single_p (stmt
)
4008 || code
== PAREN_EXPR
4009 || CONVERT_EXPR_CODE_P (code
))
4010 op
= gimple_assign_rhs1 (stmt
);
4014 if (code
== VIEW_CONVERT_EXPR
)
4015 op
= TREE_OPERAND (op
, 0);
4017 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
4018 &def_stmt
, &def
, &dt
[0], &vectype_in
))
4020 if (dump_enabled_p ())
4021 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4022 "use not simple.\n");
4026 /* We can handle NOP_EXPR conversions that do not change the number
4027 of elements or the vector size. */
4028 if ((CONVERT_EXPR_CODE_P (code
)
4029 || code
== VIEW_CONVERT_EXPR
)
4031 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4032 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4033 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4036 /* We do not handle bit-precision changes. */
4037 if ((CONVERT_EXPR_CODE_P (code
)
4038 || code
== VIEW_CONVERT_EXPR
)
4039 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4040 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4041 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4042 || ((TYPE_PRECISION (TREE_TYPE (op
))
4043 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4044 /* But a conversion that does not change the bit-pattern is ok. */
4045 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4046 > TYPE_PRECISION (TREE_TYPE (op
)))
4047 && TYPE_UNSIGNED (TREE_TYPE (op
))))
4049 if (dump_enabled_p ())
4050 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4051 "type conversion to/from bit-precision "
4056 if (!vec_stmt
) /* transformation not required. */
4058 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4059 if (dump_enabled_p ())
4060 dump_printf_loc (MSG_NOTE
, vect_location
,
4061 "=== vectorizable_assignment ===\n");
4062 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4067 if (dump_enabled_p ())
4068 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4071 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4074 for (j
= 0; j
< ncopies
; j
++)
4078 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4080 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4082 /* Arguments are ready. create the new vector stmt. */
4083 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4085 if (CONVERT_EXPR_CODE_P (code
)
4086 || code
== VIEW_CONVERT_EXPR
)
4087 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4088 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4089 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4090 gimple_assign_set_lhs (new_stmt
, new_temp
);
4091 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4093 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4100 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4102 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4104 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4107 vec_oprnds
.release ();
4112 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4113 either as shift by a scalar or by a vector. */
4116 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4119 enum machine_mode vec_mode
;
4124 vectype
= get_vectype_for_scalar_type (scalar_type
);
4128 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4130 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4132 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4134 || (optab_handler (optab
, TYPE_MODE (vectype
))
4135 == CODE_FOR_nothing
))
4139 vec_mode
= TYPE_MODE (vectype
);
4140 icode
= (int) optab_handler (optab
, vec_mode
);
4141 if (icode
== CODE_FOR_nothing
)
4148 /* Function vectorizable_shift.
4150 Check if STMT performs a shift operation that can be vectorized.
4151 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4152 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4153 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4156 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
4157 gimple
*vec_stmt
, slp_tree slp_node
)
4161 tree op0
, op1
= NULL
;
4162 tree vec_oprnd1
= NULL_TREE
;
4163 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4165 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4166 enum tree_code code
;
4167 enum machine_mode vec_mode
;
4171 enum machine_mode optab_op2_mode
;
4174 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4175 gimple new_stmt
= NULL
;
4176 stmt_vec_info prev_stmt_info
;
4183 vec
<tree
> vec_oprnds0
= vNULL
;
4184 vec
<tree
> vec_oprnds1
= vNULL
;
4187 bool scalar_shift_arg
= true;
4188 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4191 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4194 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4197 /* Is STMT a vectorizable binary/unary operation? */
4198 if (!is_gimple_assign (stmt
))
4201 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4204 code
= gimple_assign_rhs_code (stmt
);
4206 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4207 || code
== RROTATE_EXPR
))
4210 scalar_dest
= gimple_assign_lhs (stmt
);
4211 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4212 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4213 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4215 if (dump_enabled_p ())
4216 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4217 "bit-precision shifts not supported.\n");
4221 op0
= gimple_assign_rhs1 (stmt
);
4222 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4223 &def_stmt
, &def
, &dt
[0], &vectype
))
4225 if (dump_enabled_p ())
4226 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4227 "use not simple.\n");
4230 /* If op0 is an external or constant def use a vector type with
4231 the same size as the output vector type. */
4233 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4235 gcc_assert (vectype
);
4238 if (dump_enabled_p ())
4239 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4240 "no vectype for scalar type\n");
4244 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4245 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4246 if (nunits_out
!= nunits_in
)
4249 op1
= gimple_assign_rhs2 (stmt
);
4250 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4251 &def
, &dt
[1], &op1_vectype
))
4253 if (dump_enabled_p ())
4254 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4255 "use not simple.\n");
4260 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4264 /* Multiple types in SLP are handled by creating the appropriate number of
4265 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4267 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4270 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4272 gcc_assert (ncopies
>= 1);
4274 /* Determine whether the shift amount is a vector, or scalar. If the
4275 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4277 if (dt
[1] == vect_internal_def
&& !slp_node
)
4278 scalar_shift_arg
= false;
4279 else if (dt
[1] == vect_constant_def
4280 || dt
[1] == vect_external_def
4281 || dt
[1] == vect_internal_def
)
4283 /* In SLP, need to check whether the shift count is the same,
4284 in loops if it is a constant or invariant, it is always
4288 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4291 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4292 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4293 scalar_shift_arg
= false;
4298 if (dump_enabled_p ())
4299 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4300 "operand mode requires invariant argument.\n");
4304 /* Vector shifted by vector. */
4305 if (!scalar_shift_arg
)
4307 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4308 if (dump_enabled_p ())
4309 dump_printf_loc (MSG_NOTE
, vect_location
,
4310 "vector/vector shift/rotate found.\n");
4313 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4314 if (op1_vectype
== NULL_TREE
4315 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4317 if (dump_enabled_p ())
4318 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4319 "unusable type for last operand in"
4320 " vector/vector shift/rotate.\n");
4324 /* See if the machine has a vector shifted by scalar insn and if not
4325 then see if it has a vector shifted by vector insn. */
4328 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4330 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4332 if (dump_enabled_p ())
4333 dump_printf_loc (MSG_NOTE
, vect_location
,
4334 "vector/scalar shift/rotate found.\n");
4338 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4340 && (optab_handler (optab
, TYPE_MODE (vectype
))
4341 != CODE_FOR_nothing
))
4343 scalar_shift_arg
= false;
4345 if (dump_enabled_p ())
4346 dump_printf_loc (MSG_NOTE
, vect_location
,
4347 "vector/vector shift/rotate found.\n");
4349 /* Unlike the other binary operators, shifts/rotates have
4350 the rhs being int, instead of the same type as the lhs,
4351 so make sure the scalar is the right type if we are
4352 dealing with vectors of long long/long/short/char. */
4353 if (dt
[1] == vect_constant_def
)
4354 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4355 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4359 && TYPE_MODE (TREE_TYPE (vectype
))
4360 != TYPE_MODE (TREE_TYPE (op1
)))
4362 if (dump_enabled_p ())
4363 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4364 "unusable type for last operand in"
4365 " vector/vector shift/rotate.\n");
4368 if (vec_stmt
&& !slp_node
)
4370 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4371 op1
= vect_init_vector (stmt
, op1
,
4372 TREE_TYPE (vectype
), NULL
);
4379 /* Supportable by target? */
4382 if (dump_enabled_p ())
4383 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4387 vec_mode
= TYPE_MODE (vectype
);
4388 icode
= (int) optab_handler (optab
, vec_mode
);
4389 if (icode
== CODE_FOR_nothing
)
4391 if (dump_enabled_p ())
4392 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4393 "op not supported by target.\n");
4394 /* Check only during analysis. */
4395 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4396 || (vf
< vect_min_worthwhile_factor (code
)
4399 if (dump_enabled_p ())
4400 dump_printf_loc (MSG_NOTE
, vect_location
,
4401 "proceeding using word mode.\n");
4404 /* Worthwhile without SIMD support? Check only during analysis. */
4405 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4406 && vf
< vect_min_worthwhile_factor (code
)
4409 if (dump_enabled_p ())
4410 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4411 "not worthwhile without SIMD support.\n");
4415 if (!vec_stmt
) /* transformation not required. */
4417 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4418 if (dump_enabled_p ())
4419 dump_printf_loc (MSG_NOTE
, vect_location
,
4420 "=== vectorizable_shift ===\n");
4421 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4427 if (dump_enabled_p ())
4428 dump_printf_loc (MSG_NOTE
, vect_location
,
4429 "transform binary/unary operation.\n");
4432 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4434 prev_stmt_info
= NULL
;
4435 for (j
= 0; j
< ncopies
; j
++)
4440 if (scalar_shift_arg
)
4442 /* Vector shl and shr insn patterns can be defined with scalar
4443 operand 2 (shift operand). In this case, use constant or loop
4444 invariant op1 directly, without extending it to vector mode
4446 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4447 if (!VECTOR_MODE_P (optab_op2_mode
))
4449 if (dump_enabled_p ())
4450 dump_printf_loc (MSG_NOTE
, vect_location
,
4451 "operand 1 using scalar mode.\n");
4453 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4454 vec_oprnds1
.quick_push (vec_oprnd1
);
4457 /* Store vec_oprnd1 for every vector stmt to be created
4458 for SLP_NODE. We check during the analysis that all
4459 the shift arguments are the same.
4460 TODO: Allow different constants for different vector
4461 stmts generated for an SLP instance. */
4462 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4463 vec_oprnds1
.quick_push (vec_oprnd1
);
4468 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4469 (a special case for certain kind of vector shifts); otherwise,
4470 operand 1 should be of a vector type (the usual case). */
4472 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4475 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4479 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4481 /* Arguments are ready. Create the new vector stmt. */
4482 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4484 vop1
= vec_oprnds1
[i
];
4485 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
4486 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4487 gimple_assign_set_lhs (new_stmt
, new_temp
);
4488 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4490 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4497 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4499 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4500 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4503 vec_oprnds0
.release ();
4504 vec_oprnds1
.release ();
4510 /* Function vectorizable_operation.
4512 Check if STMT performs a binary, unary or ternary operation that can
4514 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4515 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4516 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4519 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
4520 gimple
*vec_stmt
, slp_tree slp_node
)
4524 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4525 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4527 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4528 enum tree_code code
;
4529 enum machine_mode vec_mode
;
4536 enum vect_def_type dt
[3]
4537 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4538 gimple new_stmt
= NULL
;
4539 stmt_vec_info prev_stmt_info
;
4545 vec
<tree
> vec_oprnds0
= vNULL
;
4546 vec
<tree
> vec_oprnds1
= vNULL
;
4547 vec
<tree
> vec_oprnds2
= vNULL
;
4548 tree vop0
, vop1
, vop2
;
4549 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4552 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4555 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4558 /* Is STMT a vectorizable binary/unary operation? */
4559 if (!is_gimple_assign (stmt
))
4562 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4565 code
= gimple_assign_rhs_code (stmt
);
4567 /* For pointer addition, we should use the normal plus for
4568 the vector addition. */
4569 if (code
== POINTER_PLUS_EXPR
)
4572 /* Support only unary or binary operations. */
4573 op_type
= TREE_CODE_LENGTH (code
);
4574 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4576 if (dump_enabled_p ())
4577 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4578 "num. args = %d (not unary/binary/ternary op).\n",
4583 scalar_dest
= gimple_assign_lhs (stmt
);
4584 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4586 /* Most operations cannot handle bit-precision types without extra
4588 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4589 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4590 /* Exception are bitwise binary operations. */
4591 && code
!= BIT_IOR_EXPR
4592 && code
!= BIT_XOR_EXPR
4593 && code
!= BIT_AND_EXPR
)
4595 if (dump_enabled_p ())
4596 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4597 "bit-precision arithmetic not supported.\n");
4601 op0
= gimple_assign_rhs1 (stmt
);
4602 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4603 &def_stmt
, &def
, &dt
[0], &vectype
))
4605 if (dump_enabled_p ())
4606 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4607 "use not simple.\n");
4610 /* If op0 is an external or constant def use a vector type with
4611 the same size as the output vector type. */
4613 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4615 gcc_assert (vectype
);
4618 if (dump_enabled_p ())
4620 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4621 "no vectype for scalar type ");
4622 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4624 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4630 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4631 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4632 if (nunits_out
!= nunits_in
)
4635 if (op_type
== binary_op
|| op_type
== ternary_op
)
4637 op1
= gimple_assign_rhs2 (stmt
);
4638 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4641 if (dump_enabled_p ())
4642 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4643 "use not simple.\n");
4647 if (op_type
== ternary_op
)
4649 op2
= gimple_assign_rhs3 (stmt
);
4650 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4653 if (dump_enabled_p ())
4654 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4655 "use not simple.\n");
4661 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4665 /* Multiple types in SLP are handled by creating the appropriate number of
4666 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4668 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4671 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4673 gcc_assert (ncopies
>= 1);
4675 /* Shifts are handled in vectorizable_shift (). */
4676 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4677 || code
== RROTATE_EXPR
)
4680 /* Supportable by target? */
4682 vec_mode
= TYPE_MODE (vectype
);
4683 if (code
== MULT_HIGHPART_EXPR
)
4685 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
4686 icode
= LAST_INSN_CODE
;
4688 icode
= CODE_FOR_nothing
;
4692 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4695 if (dump_enabled_p ())
4696 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4700 icode
= (int) optab_handler (optab
, vec_mode
);
4703 if (icode
== CODE_FOR_nothing
)
4705 if (dump_enabled_p ())
4706 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4707 "op not supported by target.\n");
4708 /* Check only during analysis. */
4709 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4710 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
4712 if (dump_enabled_p ())
4713 dump_printf_loc (MSG_NOTE
, vect_location
,
4714 "proceeding using word mode.\n");
4717 /* Worthwhile without SIMD support? Check only during analysis. */
4718 if (!VECTOR_MODE_P (vec_mode
)
4720 && vf
< vect_min_worthwhile_factor (code
))
4722 if (dump_enabled_p ())
4723 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4724 "not worthwhile without SIMD support.\n");
4728 if (!vec_stmt
) /* transformation not required. */
4730 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
4731 if (dump_enabled_p ())
4732 dump_printf_loc (MSG_NOTE
, vect_location
,
4733 "=== vectorizable_operation ===\n");
4734 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4740 if (dump_enabled_p ())
4741 dump_printf_loc (MSG_NOTE
, vect_location
,
4742 "transform binary/unary operation.\n");
4745 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4747 /* In case the vectorization factor (VF) is bigger than the number
4748 of elements that we can fit in a vectype (nunits), we have to generate
4749 more than one vector stmt - i.e - we need to "unroll" the
4750 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4751 from one copy of the vector stmt to the next, in the field
4752 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4753 stages to find the correct vector defs to be used when vectorizing
4754 stmts that use the defs of the current stmt. The example below
4755 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4756 we need to create 4 vectorized stmts):
4758 before vectorization:
4759 RELATED_STMT VEC_STMT
4763 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4765 RELATED_STMT VEC_STMT
4766 VS1_0: vx0 = memref0 VS1_1 -
4767 VS1_1: vx1 = memref1 VS1_2 -
4768 VS1_2: vx2 = memref2 VS1_3 -
4769 VS1_3: vx3 = memref3 - -
4770 S1: x = load - VS1_0
4773 step2: vectorize stmt S2 (done here):
4774 To vectorize stmt S2 we first need to find the relevant vector
4775 def for the first operand 'x'. This is, as usual, obtained from
4776 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4777 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4778 relevant vector def 'vx0'. Having found 'vx0' we can generate
4779 the vector stmt VS2_0, and as usual, record it in the
4780 STMT_VINFO_VEC_STMT of stmt S2.
4781 When creating the second copy (VS2_1), we obtain the relevant vector
4782 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4783 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4784 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4785 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4786 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4787 chain of stmts and pointers:
4788 RELATED_STMT VEC_STMT
4789 VS1_0: vx0 = memref0 VS1_1 -
4790 VS1_1: vx1 = memref1 VS1_2 -
4791 VS1_2: vx2 = memref2 VS1_3 -
4792 VS1_3: vx3 = memref3 - -
4793 S1: x = load - VS1_0
4794 VS2_0: vz0 = vx0 + v1 VS2_1 -
4795 VS2_1: vz1 = vx1 + v1 VS2_2 -
4796 VS2_2: vz2 = vx2 + v1 VS2_3 -
4797 VS2_3: vz3 = vx3 + v1 - -
4798 S2: z = x + 1 - VS2_0 */
4800 prev_stmt_info
= NULL
;
4801 for (j
= 0; j
< ncopies
; j
++)
4806 if (op_type
== binary_op
|| op_type
== ternary_op
)
4807 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4810 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4812 if (op_type
== ternary_op
)
4814 vec_oprnds2
.create (1);
4815 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
4822 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4823 if (op_type
== ternary_op
)
4825 tree vec_oprnd
= vec_oprnds2
.pop ();
4826 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
4831 /* Arguments are ready. Create the new vector stmt. */
4832 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4834 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
4835 ? vec_oprnds1
[i
] : NULL_TREE
);
4836 vop2
= ((op_type
== ternary_op
)
4837 ? vec_oprnds2
[i
] : NULL_TREE
);
4838 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
,
4840 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4841 gimple_assign_set_lhs (new_stmt
, new_temp
);
4842 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4844 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4851 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4853 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4854 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4857 vec_oprnds0
.release ();
4858 vec_oprnds1
.release ();
4859 vec_oprnds2
.release ();
4864 /* A helper function to ensure data reference DR's base alignment
4868 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
4873 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
4875 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4876 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
4878 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
4879 DECL_USER_ALIGN (base_decl
) = 1;
4880 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
4885 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4886 reversal of the vector elements. If that is impossible to do,
4890 perm_mask_for_reverse (tree vectype
)
4895 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4896 sel
= XALLOCAVEC (unsigned char, nunits
);
4898 for (i
= 0; i
< nunits
; ++i
)
4899 sel
[i
] = nunits
- 1 - i
;
4901 return vect_gen_perm_mask (vectype
, sel
);
4904 /* Function vectorizable_store.
4906 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4908 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4909 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4910 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4913 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4919 tree vec_oprnd
= NULL_TREE
;
4920 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4921 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
4922 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4924 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4925 struct loop
*loop
= NULL
;
4926 enum machine_mode vec_mode
;
4928 enum dr_alignment_support alignment_support_scheme
;
4931 enum vect_def_type dt
;
4932 stmt_vec_info prev_stmt_info
= NULL
;
4933 tree dataref_ptr
= NULL_TREE
;
4934 tree dataref_offset
= NULL_TREE
;
4935 gimple ptr_incr
= NULL
;
4936 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4939 gimple next_stmt
, first_stmt
= NULL
;
4940 bool grouped_store
= false;
4941 bool store_lanes_p
= false;
4942 unsigned int group_size
, i
;
4943 vec
<tree
> dr_chain
= vNULL
;
4944 vec
<tree
> oprnds
= vNULL
;
4945 vec
<tree
> result_chain
= vNULL
;
4947 bool negative
= false;
4948 tree offset
= NULL_TREE
;
4949 vec
<tree
> vec_oprnds
= vNULL
;
4950 bool slp
= (slp_node
!= NULL
);
4951 unsigned int vec_num
;
4952 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4956 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4958 /* Multiple types in SLP are handled by creating the appropriate number of
4959 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4961 if (slp
|| PURE_SLP_STMT (stmt_info
))
4964 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4966 gcc_assert (ncopies
>= 1);
4968 /* FORNOW. This restriction should be relaxed. */
4969 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
4971 if (dump_enabled_p ())
4972 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4973 "multiple types in nested loop.\n");
4977 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4980 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4983 /* Is vectorizable store? */
4985 if (!is_gimple_assign (stmt
))
4988 scalar_dest
= gimple_assign_lhs (stmt
);
4989 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
4990 && is_pattern_stmt_p (stmt_info
))
4991 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
4992 if (TREE_CODE (scalar_dest
) != ARRAY_REF
4993 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
4994 && TREE_CODE (scalar_dest
) != INDIRECT_REF
4995 && TREE_CODE (scalar_dest
) != COMPONENT_REF
4996 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
4997 && TREE_CODE (scalar_dest
) != REALPART_EXPR
4998 && TREE_CODE (scalar_dest
) != MEM_REF
)
5001 gcc_assert (gimple_assign_single_p (stmt
));
5002 op
= gimple_assign_rhs1 (stmt
);
5003 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5006 if (dump_enabled_p ())
5007 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5008 "use not simple.\n");
5012 elem_type
= TREE_TYPE (vectype
);
5013 vec_mode
= TYPE_MODE (vectype
);
5015 /* FORNOW. In some cases can vectorize even if data-type not supported
5016 (e.g. - array initialization with 0). */
5017 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5020 if (!STMT_VINFO_DATA_REF (stmt_info
))
5024 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5025 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5026 size_zero_node
) < 0;
5027 if (negative
&& ncopies
> 1)
5029 if (dump_enabled_p ())
5030 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5031 "multiple types with negative step.\n");
5037 gcc_assert (!grouped_store
);
5038 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5039 if (alignment_support_scheme
!= dr_aligned
5040 && alignment_support_scheme
!= dr_unaligned_supported
)
5042 if (dump_enabled_p ())
5043 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5044 "negative step but alignment required.\n");
5047 if (dt
!= vect_constant_def
5048 && dt
!= vect_external_def
5049 && !perm_mask_for_reverse (vectype
))
5051 if (dump_enabled_p ())
5052 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5053 "negative step and reversing not supported.\n");
5058 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5060 grouped_store
= true;
5061 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5062 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
5064 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5065 if (vect_store_lanes_supported (vectype
, group_size
))
5066 store_lanes_p
= true;
5067 else if (!vect_grouped_store_supported (vectype
, group_size
))
5071 if (first_stmt
== stmt
)
5073 /* STMT is the leader of the group. Check the operands of all the
5074 stmts of the group. */
5075 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5078 gcc_assert (gimple_assign_single_p (next_stmt
));
5079 op
= gimple_assign_rhs1 (next_stmt
);
5080 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
5081 &def_stmt
, &def
, &dt
))
5083 if (dump_enabled_p ())
5084 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5085 "use not simple.\n");
5088 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5093 if (!vec_stmt
) /* transformation not required. */
5095 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5096 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5103 ensure_base_align (stmt_info
, dr
);
5107 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5108 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5110 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5113 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5115 /* We vectorize all the stmts of the interleaving group when we
5116 reach the last stmt in the group. */
5117 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5118 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5127 grouped_store
= false;
5128 /* VEC_NUM is the number of vect stmts to be created for this
5130 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5131 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5132 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5133 op
= gimple_assign_rhs1 (first_stmt
);
5136 /* VEC_NUM is the number of vect stmts to be created for this
5138 vec_num
= group_size
;
5144 group_size
= vec_num
= 1;
5147 if (dump_enabled_p ())
5148 dump_printf_loc (MSG_NOTE
, vect_location
,
5149 "transform store. ncopies = %d\n", ncopies
);
5151 dr_chain
.create (group_size
);
5152 oprnds
.create (group_size
);
5154 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5155 gcc_assert (alignment_support_scheme
);
5156 /* Targets with store-lane instructions must not require explicit
5158 gcc_assert (!store_lanes_p
5159 || alignment_support_scheme
== dr_aligned
5160 || alignment_support_scheme
== dr_unaligned_supported
);
5163 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5166 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5168 aggr_type
= vectype
;
5170 /* In case the vectorization factor (VF) is bigger than the number
5171 of elements that we can fit in a vectype (nunits), we have to generate
5172 more than one vector stmt - i.e - we need to "unroll" the
5173 vector stmt by a factor VF/nunits. For more details see documentation in
5174 vect_get_vec_def_for_copy_stmt. */
5176 /* In case of interleaving (non-unit grouped access):
5183 We create vectorized stores starting from base address (the access of the
5184 first stmt in the chain (S2 in the above example), when the last store stmt
5185 of the chain (S4) is reached:
5188 VS2: &base + vec_size*1 = vx0
5189 VS3: &base + vec_size*2 = vx1
5190 VS4: &base + vec_size*3 = vx3
5192 Then permutation statements are generated:
5194 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5195 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5198 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5199 (the order of the data-refs in the output of vect_permute_store_chain
5200 corresponds to the order of scalar stmts in the interleaving chain - see
5201 the documentation of vect_permute_store_chain()).
5203 In case of both multiple types and interleaving, above vector stores and
5204 permutation stmts are created for every copy. The result vector stmts are
5205 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5206 STMT_VINFO_RELATED_STMT for the next copies.
5209 prev_stmt_info
= NULL
;
5210 for (j
= 0; j
< ncopies
; j
++)
5218 /* Get vectorized arguments for SLP_NODE. */
5219 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5220 NULL
, slp_node
, -1);
5222 vec_oprnd
= vec_oprnds
[0];
5226 /* For interleaved stores we collect vectorized defs for all the
5227 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5228 used as an input to vect_permute_store_chain(), and OPRNDS as
5229 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5231 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5232 OPRNDS are of size 1. */
5233 next_stmt
= first_stmt
;
5234 for (i
= 0; i
< group_size
; i
++)
5236 /* Since gaps are not supported for interleaved stores,
5237 GROUP_SIZE is the exact number of stmts in the chain.
5238 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5239 there is no interleaving, GROUP_SIZE is 1, and only one
5240 iteration of the loop will be executed. */
5241 gcc_assert (next_stmt
5242 && gimple_assign_single_p (next_stmt
));
5243 op
= gimple_assign_rhs1 (next_stmt
);
5245 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5247 dr_chain
.quick_push (vec_oprnd
);
5248 oprnds
.quick_push (vec_oprnd
);
5249 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5253 /* We should have catched mismatched types earlier. */
5254 gcc_assert (useless_type_conversion_p (vectype
,
5255 TREE_TYPE (vec_oprnd
)));
5256 bool simd_lane_access_p
5257 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5258 if (simd_lane_access_p
5259 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5260 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5261 && integer_zerop (DR_OFFSET (first_dr
))
5262 && integer_zerop (DR_INIT (first_dr
))
5263 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5264 get_alias_set (DR_REF (first_dr
))))
5266 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5267 dataref_offset
= build_int_cst (reference_alias_ptr_type
5268 (DR_REF (first_dr
)), 0);
5273 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5274 simd_lane_access_p
? loop
: NULL
,
5275 offset
, &dummy
, gsi
, &ptr_incr
,
5276 simd_lane_access_p
, &inv_p
);
5277 gcc_assert (bb_vinfo
|| !inv_p
);
5281 /* For interleaved stores we created vectorized defs for all the
5282 defs stored in OPRNDS in the previous iteration (previous copy).
5283 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5284 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5286 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5287 OPRNDS are of size 1. */
5288 for (i
= 0; i
< group_size
; i
++)
5291 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5293 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5294 dr_chain
[i
] = vec_oprnd
;
5295 oprnds
[i
] = vec_oprnd
;
5299 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5300 TYPE_SIZE_UNIT (aggr_type
));
5302 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5303 TYPE_SIZE_UNIT (aggr_type
));
5310 /* Combine all the vectors into an array. */
5311 vec_array
= create_vector_array (vectype
, vec_num
);
5312 for (i
= 0; i
< vec_num
; i
++)
5314 vec_oprnd
= dr_chain
[i
];
5315 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5319 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5320 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5321 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5322 gimple_call_set_lhs (new_stmt
, data_ref
);
5323 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5331 result_chain
.create (group_size
);
5333 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5337 next_stmt
= first_stmt
;
5338 for (i
= 0; i
< vec_num
; i
++)
5340 unsigned align
, misalign
;
5343 /* Bump the vector pointer. */
5344 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5348 vec_oprnd
= vec_oprnds
[i
];
5349 else if (grouped_store
)
5350 /* For grouped stores vectorized defs are interleaved in
5351 vect_permute_store_chain(). */
5352 vec_oprnd
= result_chain
[i
];
5354 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
5357 : build_int_cst (reference_alias_ptr_type
5358 (DR_REF (first_dr
)), 0));
5359 align
= TYPE_ALIGN_UNIT (vectype
);
5360 if (aligned_access_p (first_dr
))
5362 else if (DR_MISALIGNMENT (first_dr
) == -1)
5364 TREE_TYPE (data_ref
)
5365 = build_aligned_type (TREE_TYPE (data_ref
),
5366 TYPE_ALIGN (elem_type
));
5367 align
= TYPE_ALIGN_UNIT (elem_type
);
5372 TREE_TYPE (data_ref
)
5373 = build_aligned_type (TREE_TYPE (data_ref
),
5374 TYPE_ALIGN (elem_type
));
5375 misalign
= DR_MISALIGNMENT (first_dr
);
5377 if (dataref_offset
== NULL_TREE
)
5378 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
5382 && dt
!= vect_constant_def
5383 && dt
!= vect_external_def
)
5385 tree perm_mask
= perm_mask_for_reverse (vectype
);
5387 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
5389 tree new_temp
= make_ssa_name (perm_dest
, NULL
);
5391 /* Generate the permute statement. */
5393 = gimple_build_assign_with_ops (VEC_PERM_EXPR
, new_temp
,
5394 vec_oprnd
, vec_oprnd
,
5396 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5398 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5399 vec_oprnd
= new_temp
;
5402 /* Arguments are ready. Create the new vector stmt. */
5403 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
5404 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5409 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5417 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5419 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5420 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5424 dr_chain
.release ();
5426 result_chain
.release ();
5427 vec_oprnds
.release ();
5432 /* Given a vector type VECTYPE and permutation SEL returns
5433 the VECTOR_CST mask that implements the permutation of the
5434 vector elements. If that is impossible to do, returns NULL. */
5437 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
5439 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
5442 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5444 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
5447 mask_elt_type
= lang_hooks
.types
.type_for_mode
5448 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
5449 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
5451 mask_elts
= XALLOCAVEC (tree
, nunits
);
5452 for (i
= nunits
- 1; i
>= 0; i
--)
5453 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
5454 mask_vec
= build_vector (mask_type
, mask_elts
);
5459 /* Given a vector variable X and Y, that was generated for the scalar
5460 STMT, generate instructions to permute the vector elements of X and Y
5461 using permutation mask MASK_VEC, insert them at *GSI and return the
5462 permuted vector variable. */
5465 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
5466 gimple_stmt_iterator
*gsi
)
5468 tree vectype
= TREE_TYPE (x
);
5469 tree perm_dest
, data_ref
;
5472 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
5473 data_ref
= make_ssa_name (perm_dest
, NULL
);
5475 /* Generate the permute statement. */
5476 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
5478 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5483 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5484 inserting them on the loops preheader edge. Returns true if we
5485 were successful in doing so (and thus STMT can be moved then),
5486 otherwise returns false. */
5489 hoist_defs_of_uses (gimple stmt
, struct loop
*loop
)
5495 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5497 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5498 if (!gimple_nop_p (def_stmt
)
5499 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5501 /* Make sure we don't need to recurse. While we could do
5502 so in simple cases when there are more complex use webs
5503 we don't have an easy way to preserve stmt order to fulfil
5504 dependencies within them. */
5507 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
5509 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
5511 gimple def_stmt2
= SSA_NAME_DEF_STMT (op2
);
5512 if (!gimple_nop_p (def_stmt2
)
5513 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
5523 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5525 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5526 if (!gimple_nop_p (def_stmt
)
5527 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5529 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
5530 gsi_remove (&gsi
, false);
5531 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
5538 /* vectorizable_load.
5540 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5542 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5543 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5544 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5547 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5548 slp_tree slp_node
, slp_instance slp_node_instance
)
5551 tree vec_dest
= NULL
;
5552 tree data_ref
= NULL
;
5553 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5554 stmt_vec_info prev_stmt_info
;
5555 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5556 struct loop
*loop
= NULL
;
5557 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
5558 bool nested_in_vect_loop
= false;
5559 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5560 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5563 enum machine_mode mode
;
5564 gimple new_stmt
= NULL
;
5566 enum dr_alignment_support alignment_support_scheme
;
5567 tree dataref_ptr
= NULL_TREE
;
5568 tree dataref_offset
= NULL_TREE
;
5569 gimple ptr_incr
= NULL
;
5570 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5572 int i
, j
, group_size
, group_gap
;
5573 tree msq
= NULL_TREE
, lsq
;
5574 tree offset
= NULL_TREE
;
5575 tree realignment_token
= NULL_TREE
;
5577 vec
<tree
> dr_chain
= vNULL
;
5578 bool grouped_load
= false;
5579 bool load_lanes_p
= false;
5582 bool negative
= false;
5583 bool compute_in_loop
= false;
5584 struct loop
*at_loop
;
5586 bool slp
= (slp_node
!= NULL
);
5587 bool slp_perm
= false;
5588 enum tree_code code
;
5589 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5592 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
5593 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
5594 int gather_scale
= 1;
5595 enum vect_def_type gather_dt
= vect_unknown_def_type
;
5599 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5600 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
5601 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5606 /* Multiple types in SLP are handled by creating the appropriate number of
5607 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5609 if (slp
|| PURE_SLP_STMT (stmt_info
))
5612 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5614 gcc_assert (ncopies
>= 1);
5616 /* FORNOW. This restriction should be relaxed. */
5617 if (nested_in_vect_loop
&& ncopies
> 1)
5619 if (dump_enabled_p ())
5620 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5621 "multiple types in nested loop.\n");
5625 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5628 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5631 /* Is vectorizable load? */
5632 if (!is_gimple_assign (stmt
))
5635 scalar_dest
= gimple_assign_lhs (stmt
);
5636 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5639 code
= gimple_assign_rhs_code (stmt
);
5640 if (code
!= ARRAY_REF
5641 && code
!= BIT_FIELD_REF
5642 && code
!= INDIRECT_REF
5643 && code
!= COMPONENT_REF
5644 && code
!= IMAGPART_EXPR
5645 && code
!= REALPART_EXPR
5647 && TREE_CODE_CLASS (code
) != tcc_declaration
)
5650 if (!STMT_VINFO_DATA_REF (stmt_info
))
5653 elem_type
= TREE_TYPE (vectype
);
5654 mode
= TYPE_MODE (vectype
);
5656 /* FORNOW. In some cases can vectorize even if data-type not supported
5657 (e.g. - data copies). */
5658 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
5660 if (dump_enabled_p ())
5661 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5662 "Aligned load, but unsupported type.\n");
5666 /* Check if the load is a part of an interleaving chain. */
5667 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5669 grouped_load
= true;
5671 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
5673 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5674 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
5676 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5677 if (vect_load_lanes_supported (vectype
, group_size
))
5678 load_lanes_p
= true;
5679 else if (!vect_grouped_load_supported (vectype
, group_size
))
5685 if (STMT_VINFO_GATHER_P (stmt_info
))
5689 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
5690 &gather_off
, &gather_scale
);
5691 gcc_assert (gather_decl
);
5692 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
5693 &def_stmt
, &def
, &gather_dt
,
5694 &gather_off_vectype
))
5696 if (dump_enabled_p ())
5697 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5698 "gather index use not simple.\n");
5702 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
5706 negative
= tree_int_cst_compare (nested_in_vect_loop
5707 ? STMT_VINFO_DR_STEP (stmt_info
)
5709 size_zero_node
) < 0;
5710 if (negative
&& ncopies
> 1)
5712 if (dump_enabled_p ())
5713 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5714 "multiple types with negative step.\n");
5722 if (dump_enabled_p ())
5723 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5724 "negative step for group load not supported"
5728 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5729 if (alignment_support_scheme
!= dr_aligned
5730 && alignment_support_scheme
!= dr_unaligned_supported
)
5732 if (dump_enabled_p ())
5733 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5734 "negative step but alignment required.\n");
5737 if (!perm_mask_for_reverse (vectype
))
5739 if (dump_enabled_p ())
5740 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5741 "negative step and reversing not supported."
5748 if (!vec_stmt
) /* transformation not required. */
5750 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
5751 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
, NULL
);
5755 if (dump_enabled_p ())
5756 dump_printf_loc (MSG_NOTE
, vect_location
,
5757 "transform load. ncopies = %d\n", ncopies
);
5761 ensure_base_align (stmt_info
, dr
);
5763 if (STMT_VINFO_GATHER_P (stmt_info
))
5765 tree vec_oprnd0
= NULL_TREE
, op
;
5766 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
5767 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
5768 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
5769 edge pe
= loop_preheader_edge (loop
);
5772 enum { NARROW
, NONE
, WIDEN
} modifier
;
5773 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
5775 if (nunits
== gather_off_nunits
)
5777 else if (nunits
== gather_off_nunits
/ 2)
5779 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
5782 for (i
= 0; i
< gather_off_nunits
; ++i
)
5783 sel
[i
] = i
| nunits
;
5785 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
5786 gcc_assert (perm_mask
!= NULL_TREE
);
5788 else if (nunits
== gather_off_nunits
* 2)
5790 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
5793 for (i
= 0; i
< nunits
; ++i
)
5794 sel
[i
] = i
< gather_off_nunits
5795 ? i
: i
+ nunits
- gather_off_nunits
;
5797 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
5798 gcc_assert (perm_mask
!= NULL_TREE
);
5804 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
5805 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5806 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5807 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5808 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
5809 scaletype
= TREE_VALUE (arglist
);
5810 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
5812 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5814 ptr
= fold_convert (ptrtype
, gather_base
);
5815 if (!is_gimple_min_invariant (ptr
))
5817 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
5818 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
5819 gcc_assert (!new_bb
);
5822 /* Currently we support only unconditional gather loads,
5823 so mask should be all ones. */
5824 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
5825 mask
= build_int_cst (masktype
, -1);
5826 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
5828 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
5829 mask
= build_vector_from_val (masktype
, mask
);
5830 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5832 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
5836 for (j
= 0; j
< 6; ++j
)
5838 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
5839 mask
= build_real (TREE_TYPE (masktype
), r
);
5840 mask
= build_vector_from_val (masktype
, mask
);
5841 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
5846 scale
= build_int_cst (scaletype
, gather_scale
);
5848 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
5849 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
5850 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
5854 for (j
= 0; j
< 6; ++j
)
5856 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
5857 merge
= build_real (TREE_TYPE (rettype
), r
);
5861 merge
= build_vector_from_val (rettype
, merge
);
5862 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
5864 prev_stmt_info
= NULL
;
5865 for (j
= 0; j
< ncopies
; ++j
)
5867 if (modifier
== WIDEN
&& (j
& 1))
5868 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
5869 perm_mask
, stmt
, gsi
);
5872 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
5875 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
5877 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
5879 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
5880 == TYPE_VECTOR_SUBPARTS (idxtype
));
5881 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
5882 var
= make_ssa_name (var
, NULL
);
5883 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
5885 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
5887 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5892 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
5894 if (!useless_type_conversion_p (vectype
, rettype
))
5896 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
5897 == TYPE_VECTOR_SUBPARTS (rettype
));
5898 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
5899 op
= make_ssa_name (var
, new_stmt
);
5900 gimple_call_set_lhs (new_stmt
, op
);
5901 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5902 var
= make_ssa_name (vec_dest
, NULL
);
5903 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
5905 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
5910 var
= make_ssa_name (vec_dest
, new_stmt
);
5911 gimple_call_set_lhs (new_stmt
, var
);
5914 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5916 if (modifier
== NARROW
)
5923 var
= permute_vec_elements (prev_res
, var
,
5924 perm_mask
, stmt
, gsi
);
5925 new_stmt
= SSA_NAME_DEF_STMT (var
);
5928 if (prev_stmt_info
== NULL
)
5929 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5931 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5932 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5936 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
5938 gimple_stmt_iterator incr_gsi
;
5944 vec
<constructor_elt
, va_gc
> *v
= NULL
;
5945 gimple_seq stmts
= NULL
;
5946 tree stride_base
, stride_step
, alias_off
;
5948 gcc_assert (!nested_in_vect_loop
);
5951 = fold_build_pointer_plus
5952 (unshare_expr (DR_BASE_ADDRESS (dr
)),
5953 size_binop (PLUS_EXPR
,
5954 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
5955 convert_to_ptrofftype (DR_INIT (dr
))));
5956 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
5958 /* For a load with loop-invariant (but other than power-of-2)
5959 stride (i.e. not a grouped access) like so:
5961 for (i = 0; i < n; i += stride)
5964 we generate a new induction variable and new accesses to
5965 form a new vector (or vectors, depending on ncopies):
5967 for (j = 0; ; j += VF*stride)
5969 tmp2 = array[j + stride];
5971 vectemp = {tmp1, tmp2, ...}
5974 ivstep
= stride_step
;
5975 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
5976 build_int_cst (TREE_TYPE (ivstep
), vf
));
5978 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
5980 create_iv (stride_base
, ivstep
, NULL
,
5981 loop
, &incr_gsi
, insert_after
,
5983 incr
= gsi_stmt (incr_gsi
);
5984 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
5986 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
5988 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
5990 prev_stmt_info
= NULL
;
5991 running_off
= offvar
;
5992 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
5993 for (j
= 0; j
< ncopies
; j
++)
5997 vec_alloc (v
, nunits
);
5998 for (i
= 0; i
< nunits
; i
++)
6000 tree newref
, newoff
;
6002 newref
= build2 (MEM_REF
, TREE_TYPE (vectype
),
6003 running_off
, alias_off
);
6005 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
6008 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
6009 newoff
= copy_ssa_name (running_off
, NULL
);
6010 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
6011 running_off
, stride_step
);
6012 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6014 running_off
= newoff
;
6017 vec_inv
= build_constructor (vectype
, v
);
6018 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6019 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6022 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6024 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6025 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6032 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6034 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
6035 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
6036 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6038 /* Check if the chain of loads is already vectorized. */
6039 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6040 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6041 ??? But we can only do so if there is exactly one
6042 as we have no way to get at the rest. Leave the CSE
6044 ??? With the group load eventually participating
6045 in multiple different permutations (having multiple
6046 slp nodes which refer to the same group) the CSE
6047 is even wrong code. See PR56270. */
6050 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6053 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6054 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6056 /* VEC_NUM is the number of vect stmts to be created for this group. */
6059 grouped_load
= false;
6060 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6061 if (SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6063 group_gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
6067 vec_num
= group_size
;
6075 group_size
= vec_num
= 1;
6079 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6080 gcc_assert (alignment_support_scheme
);
6081 /* Targets with load-lane instructions must not require explicit
6083 gcc_assert (!load_lanes_p
6084 || alignment_support_scheme
== dr_aligned
6085 || alignment_support_scheme
== dr_unaligned_supported
);
6087 /* In case the vectorization factor (VF) is bigger than the number
6088 of elements that we can fit in a vectype (nunits), we have to generate
6089 more than one vector stmt - i.e - we need to "unroll" the
6090 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6091 from one copy of the vector stmt to the next, in the field
6092 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6093 stages to find the correct vector defs to be used when vectorizing
6094 stmts that use the defs of the current stmt. The example below
6095 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6096 need to create 4 vectorized stmts):
6098 before vectorization:
6099 RELATED_STMT VEC_STMT
6103 step 1: vectorize stmt S1:
6104 We first create the vector stmt VS1_0, and, as usual, record a
6105 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6106 Next, we create the vector stmt VS1_1, and record a pointer to
6107 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6108 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6110 RELATED_STMT VEC_STMT
6111 VS1_0: vx0 = memref0 VS1_1 -
6112 VS1_1: vx1 = memref1 VS1_2 -
6113 VS1_2: vx2 = memref2 VS1_3 -
6114 VS1_3: vx3 = memref3 - -
6115 S1: x = load - VS1_0
6118 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6119 information we recorded in RELATED_STMT field is used to vectorize
6122 /* In case of interleaving (non-unit grouped access):
6129 Vectorized loads are created in the order of memory accesses
6130 starting from the access of the first stmt of the chain:
6133 VS2: vx1 = &base + vec_size*1
6134 VS3: vx3 = &base + vec_size*2
6135 VS4: vx4 = &base + vec_size*3
6137 Then permutation statements are generated:
6139 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6140 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6143 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6144 (the order of the data-refs in the output of vect_permute_load_chain
6145 corresponds to the order of scalar stmts in the interleaving chain - see
6146 the documentation of vect_permute_load_chain()).
6147 The generation of permutation stmts and recording them in
6148 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6150 In case of both multiple types and interleaving, the vector loads and
6151 permutation stmts above are created for every copy. The result vector
6152 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6153 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6155 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6156 on a target that supports unaligned accesses (dr_unaligned_supported)
6157 we generate the following code:
6161 p = p + indx * vectype_size;
6166 Otherwise, the data reference is potentially unaligned on a target that
6167 does not support unaligned accesses (dr_explicit_realign_optimized) -
6168 then generate the following code, in which the data in each iteration is
6169 obtained by two vector loads, one from the previous iteration, and one
6170 from the current iteration:
6172 msq_init = *(floor(p1))
6173 p2 = initial_addr + VS - 1;
6174 realignment_token = call target_builtin;
6177 p2 = p2 + indx * vectype_size
6179 vec_dest = realign_load (msq, lsq, realignment_token)
6184 /* If the misalignment remains the same throughout the execution of the
6185 loop, we can create the init_addr and permutation mask at the loop
6186 preheader. Otherwise, it needs to be created inside the loop.
6187 This can only occur when vectorizing memory accesses in the inner-loop
6188 nested within an outer-loop that is being vectorized. */
6190 if (nested_in_vect_loop
6191 && (TREE_INT_CST_LOW (DR_STEP (dr
))
6192 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
6194 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
6195 compute_in_loop
= true;
6198 if ((alignment_support_scheme
== dr_explicit_realign_optimized
6199 || alignment_support_scheme
== dr_explicit_realign
)
6200 && !compute_in_loop
)
6202 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
6203 alignment_support_scheme
, NULL_TREE
,
6205 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6207 phi
= SSA_NAME_DEF_STMT (msq
);
6208 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
6215 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6218 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6220 aggr_type
= vectype
;
6222 prev_stmt_info
= NULL
;
6223 for (j
= 0; j
< ncopies
; j
++)
6225 /* 1. Create the vector or array pointer update chain. */
6228 bool simd_lane_access_p
6229 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6230 if (simd_lane_access_p
6231 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6232 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6233 && integer_zerop (DR_OFFSET (first_dr
))
6234 && integer_zerop (DR_INIT (first_dr
))
6235 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6236 get_alias_set (DR_REF (first_dr
)))
6237 && (alignment_support_scheme
== dr_aligned
6238 || alignment_support_scheme
== dr_unaligned_supported
))
6240 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6241 dataref_offset
= build_int_cst (reference_alias_ptr_type
6242 (DR_REF (first_dr
)), 0);
6247 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
6248 offset
, &dummy
, gsi
, &ptr_incr
,
6249 simd_lane_access_p
, &inv_p
);
6251 else if (dataref_offset
)
6252 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
6253 TYPE_SIZE_UNIT (aggr_type
));
6255 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6256 TYPE_SIZE_UNIT (aggr_type
));
6258 if (grouped_load
|| slp_perm
)
6259 dr_chain
.create (vec_num
);
6265 vec_array
= create_vector_array (vectype
, vec_num
);
6268 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6269 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
6270 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
6271 gimple_call_set_lhs (new_stmt
, vec_array
);
6272 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6274 /* Extract each vector into an SSA_NAME. */
6275 for (i
= 0; i
< vec_num
; i
++)
6277 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
6279 dr_chain
.quick_push (new_temp
);
6282 /* Record the mapping between SSA_NAMEs and statements. */
6283 vect_record_grouped_load_vectors (stmt
, dr_chain
);
6287 for (i
= 0; i
< vec_num
; i
++)
6290 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6293 /* 2. Create the vector-load in the loop. */
6294 switch (alignment_support_scheme
)
6297 case dr_unaligned_supported
:
6299 unsigned int align
, misalign
;
6302 = build2 (MEM_REF
, vectype
, dataref_ptr
,
6305 : build_int_cst (reference_alias_ptr_type
6306 (DR_REF (first_dr
)), 0));
6307 align
= TYPE_ALIGN_UNIT (vectype
);
6308 if (alignment_support_scheme
== dr_aligned
)
6310 gcc_assert (aligned_access_p (first_dr
));
6313 else if (DR_MISALIGNMENT (first_dr
) == -1)
6315 TREE_TYPE (data_ref
)
6316 = build_aligned_type (TREE_TYPE (data_ref
),
6317 TYPE_ALIGN (elem_type
));
6318 align
= TYPE_ALIGN_UNIT (elem_type
);
6323 TREE_TYPE (data_ref
)
6324 = build_aligned_type (TREE_TYPE (data_ref
),
6325 TYPE_ALIGN (elem_type
));
6326 misalign
= DR_MISALIGNMENT (first_dr
);
6328 if (dataref_offset
== NULL_TREE
)
6329 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
6333 case dr_explicit_realign
:
6338 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
6340 if (compute_in_loop
)
6341 msq
= vect_setup_realignment (first_stmt
, gsi
,
6343 dr_explicit_realign
,
6346 ptr
= copy_ssa_name (dataref_ptr
, NULL
);
6347 new_stmt
= gimple_build_assign_with_ops
6348 (BIT_AND_EXPR
, ptr
, dataref_ptr
,
6350 (TREE_TYPE (dataref_ptr
),
6351 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6352 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6354 = build2 (MEM_REF
, vectype
, ptr
,
6355 build_int_cst (reference_alias_ptr_type
6356 (DR_REF (first_dr
)), 0));
6357 vec_dest
= vect_create_destination_var (scalar_dest
,
6359 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6360 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6361 gimple_assign_set_lhs (new_stmt
, new_temp
);
6362 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
6363 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
6364 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6367 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
6368 TYPE_SIZE_UNIT (elem_type
));
6369 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
6370 new_stmt
= gimple_build_assign_with_ops
6371 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
6374 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6375 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
6376 gimple_assign_set_lhs (new_stmt
, ptr
);
6377 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6379 = build2 (MEM_REF
, vectype
, ptr
,
6380 build_int_cst (reference_alias_ptr_type
6381 (DR_REF (first_dr
)), 0));
6384 case dr_explicit_realign_optimized
:
6385 new_temp
= copy_ssa_name (dataref_ptr
, NULL
);
6386 new_stmt
= gimple_build_assign_with_ops
6387 (BIT_AND_EXPR
, new_temp
, dataref_ptr
,
6389 (TREE_TYPE (dataref_ptr
),
6390 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6391 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6393 = build2 (MEM_REF
, vectype
, new_temp
,
6394 build_int_cst (reference_alias_ptr_type
6395 (DR_REF (first_dr
)), 0));
6400 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6401 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6402 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6403 gimple_assign_set_lhs (new_stmt
, new_temp
);
6404 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6406 /* 3. Handle explicit realignment if necessary/supported.
6408 vec_dest = realign_load (msq, lsq, realignment_token) */
6409 if (alignment_support_scheme
== dr_explicit_realign_optimized
6410 || alignment_support_scheme
== dr_explicit_realign
)
6412 lsq
= gimple_assign_lhs (new_stmt
);
6413 if (!realignment_token
)
6414 realignment_token
= dataref_ptr
;
6415 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6417 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR
,
6420 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6421 gimple_assign_set_lhs (new_stmt
, new_temp
);
6422 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6424 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6427 if (i
== vec_num
- 1 && j
== ncopies
- 1)
6428 add_phi_arg (phi
, lsq
,
6429 loop_latch_edge (containing_loop
),
6435 /* 4. Handle invariant-load. */
6436 if (inv_p
&& !bb_vinfo
)
6438 gcc_assert (!grouped_load
);
6439 /* If we have versioned for aliasing or the loop doesn't
6440 have any data dependencies that would preclude this,
6441 then we are sure this is a loop invariant load and
6442 thus we can insert it on the preheader edge. */
6443 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
6444 && !nested_in_vect_loop
6445 && hoist_defs_of_uses (stmt
, loop
))
6447 if (dump_enabled_p ())
6449 dump_printf_loc (MSG_NOTE
, vect_location
,
6450 "hoisting out of the vectorized "
6452 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6453 dump_printf (MSG_NOTE
, "\n");
6455 tree tem
= copy_ssa_name (scalar_dest
, NULL
);
6456 gsi_insert_on_edge_immediate
6457 (loop_preheader_edge (loop
),
6458 gimple_build_assign (tem
,
6460 (gimple_assign_rhs1 (stmt
))));
6461 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
6465 gimple_stmt_iterator gsi2
= *gsi
;
6467 new_temp
= vect_init_vector (stmt
, scalar_dest
,
6470 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6471 set_vinfo_for_stmt (new_stmt
,
6472 new_stmt_vec_info (new_stmt
, loop_vinfo
,
6478 tree perm_mask
= perm_mask_for_reverse (vectype
);
6479 new_temp
= permute_vec_elements (new_temp
, new_temp
,
6480 perm_mask
, stmt
, gsi
);
6481 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6484 /* Collect vector loads and later create their permutation in
6485 vect_transform_grouped_load (). */
6486 if (grouped_load
|| slp_perm
)
6487 dr_chain
.quick_push (new_temp
);
6489 /* Store vector loads in the corresponding SLP_NODE. */
6490 if (slp
&& !slp_perm
)
6491 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6493 /* Bump the vector pointer to account for a gap. */
6494 if (slp
&& group_gap
!= 0)
6496 tree bump
= size_binop (MULT_EXPR
,
6497 TYPE_SIZE_UNIT (elem_type
),
6498 size_int (group_gap
));
6499 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6504 if (slp
&& !slp_perm
)
6509 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6510 slp_node_instance
, false))
6512 dr_chain
.release ();
6521 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
6522 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6527 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6529 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6530 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6533 dr_chain
.release ();
6539 /* Function vect_is_simple_cond.
6542 LOOP - the loop that is being vectorized.
6543 COND - Condition that is checked for simple use.
6546 *COMP_VECTYPE - the vector type for the comparison.
6548 Returns whether a COND can be vectorized. Checks whether
6549 condition operands are supportable using vec_is_simple_use. */
6552 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
6553 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
6557 enum vect_def_type dt
;
6558 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
6560 if (!COMPARISON_CLASS_P (cond
))
6563 lhs
= TREE_OPERAND (cond
, 0);
6564 rhs
= TREE_OPERAND (cond
, 1);
6566 if (TREE_CODE (lhs
) == SSA_NAME
)
6568 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
6569 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
6570 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
6573 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
6574 && TREE_CODE (lhs
) != FIXED_CST
)
6577 if (TREE_CODE (rhs
) == SSA_NAME
)
6579 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6580 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
6581 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
6584 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
6585 && TREE_CODE (rhs
) != FIXED_CST
)
6588 *comp_vectype
= vectype1
? vectype1
: vectype2
;
6592 /* vectorizable_condition.
6594 Check if STMT is conditional modify expression that can be vectorized.
6595 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6596 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6599 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6600 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6601 else caluse if it is 2).
6603 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6606 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
6607 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
6610 tree scalar_dest
= NULL_TREE
;
6611 tree vec_dest
= NULL_TREE
;
6612 tree cond_expr
, then_clause
, else_clause
;
6613 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6614 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6615 tree comp_vectype
= NULL_TREE
;
6616 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
6617 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
6618 tree vec_compare
, vec_cond_expr
;
6620 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6622 enum vect_def_type dt
, dts
[4];
6623 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6625 enum tree_code code
;
6626 stmt_vec_info prev_stmt_info
= NULL
;
6628 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6629 vec
<tree
> vec_oprnds0
= vNULL
;
6630 vec
<tree
> vec_oprnds1
= vNULL
;
6631 vec
<tree
> vec_oprnds2
= vNULL
;
6632 vec
<tree
> vec_oprnds3
= vNULL
;
6635 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
6638 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6640 gcc_assert (ncopies
>= 1);
6641 if (reduc_index
&& ncopies
> 1)
6642 return false; /* FORNOW */
6644 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
6647 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6650 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6651 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
6655 /* FORNOW: not yet supported. */
6656 if (STMT_VINFO_LIVE_P (stmt_info
))
6658 if (dump_enabled_p ())
6659 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6660 "value used after loop.\n");
6664 /* Is vectorizable conditional operation? */
6665 if (!is_gimple_assign (stmt
))
6668 code
= gimple_assign_rhs_code (stmt
);
6670 if (code
!= COND_EXPR
)
6673 cond_expr
= gimple_assign_rhs1 (stmt
);
6674 then_clause
= gimple_assign_rhs2 (stmt
);
6675 else_clause
= gimple_assign_rhs3 (stmt
);
6677 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
6682 if (TREE_CODE (then_clause
) == SSA_NAME
)
6684 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
6685 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
6686 &then_def_stmt
, &def
, &dt
))
6689 else if (TREE_CODE (then_clause
) != INTEGER_CST
6690 && TREE_CODE (then_clause
) != REAL_CST
6691 && TREE_CODE (then_clause
) != FIXED_CST
)
6694 if (TREE_CODE (else_clause
) == SSA_NAME
)
6696 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
6697 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
6698 &else_def_stmt
, &def
, &dt
))
6701 else if (TREE_CODE (else_clause
) != INTEGER_CST
6702 && TREE_CODE (else_clause
) != REAL_CST
6703 && TREE_CODE (else_clause
) != FIXED_CST
)
6706 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
6707 /* The result of a vector comparison should be signed type. */
6708 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
6709 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
6710 if (vec_cmp_type
== NULL_TREE
)
6715 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
6716 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
6723 vec_oprnds0
.create (1);
6724 vec_oprnds1
.create (1);
6725 vec_oprnds2
.create (1);
6726 vec_oprnds3
.create (1);
6730 scalar_dest
= gimple_assign_lhs (stmt
);
6731 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6733 /* Handle cond expr. */
6734 for (j
= 0; j
< ncopies
; j
++)
6736 gimple new_stmt
= NULL
;
6741 auto_vec
<tree
, 4> ops
;
6742 auto_vec
<vec
<tree
>, 4> vec_defs
;
6744 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
6745 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
6746 ops
.safe_push (then_clause
);
6747 ops
.safe_push (else_clause
);
6748 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
6749 vec_oprnds3
= vec_defs
.pop ();
6750 vec_oprnds2
= vec_defs
.pop ();
6751 vec_oprnds1
= vec_defs
.pop ();
6752 vec_oprnds0
= vec_defs
.pop ();
6755 vec_defs
.release ();
6761 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
6763 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
6764 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
6767 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
6769 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
6770 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
6771 if (reduc_index
== 1)
6772 vec_then_clause
= reduc_def
;
6775 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
6777 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
6778 NULL
, >emp
, &def
, &dts
[2]);
6780 if (reduc_index
== 2)
6781 vec_else_clause
= reduc_def
;
6784 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
6786 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
6787 NULL
, >emp
, &def
, &dts
[3]);
6793 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
6794 vec_oprnds0
.pop ());
6795 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
6796 vec_oprnds1
.pop ());
6797 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
6798 vec_oprnds2
.pop ());
6799 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
6800 vec_oprnds3
.pop ());
6805 vec_oprnds0
.quick_push (vec_cond_lhs
);
6806 vec_oprnds1
.quick_push (vec_cond_rhs
);
6807 vec_oprnds2
.quick_push (vec_then_clause
);
6808 vec_oprnds3
.quick_push (vec_else_clause
);
6811 /* Arguments are ready. Create the new vector stmt. */
6812 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
6814 vec_cond_rhs
= vec_oprnds1
[i
];
6815 vec_then_clause
= vec_oprnds2
[i
];
6816 vec_else_clause
= vec_oprnds3
[i
];
6818 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
6819 vec_cond_lhs
, vec_cond_rhs
);
6820 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
6821 vec_compare
, vec_then_clause
, vec_else_clause
);
6823 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
6824 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6825 gimple_assign_set_lhs (new_stmt
, new_temp
);
6826 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6828 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6835 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6837 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6839 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6842 vec_oprnds0
.release ();
6843 vec_oprnds1
.release ();
6844 vec_oprnds2
.release ();
6845 vec_oprnds3
.release ();
6851 /* Make sure the statement is vectorizable. */
6854 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
6856 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6857 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6858 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
6860 tree scalar_type
, vectype
;
6861 gimple pattern_stmt
;
6862 gimple_seq pattern_def_seq
;
6864 if (dump_enabled_p ())
6866 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
6867 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6868 dump_printf (MSG_NOTE
, "\n");
6871 if (gimple_has_volatile_ops (stmt
))
6873 if (dump_enabled_p ())
6874 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6875 "not vectorized: stmt has volatile operands\n");
6880 /* Skip stmts that do not need to be vectorized. In loops this is expected
6882 - the COND_EXPR which is the loop exit condition
6883 - any LABEL_EXPRs in the loop
6884 - computations that are used only for array indexing or loop control.
6885 In basic blocks we only analyze statements that are a part of some SLP
6886 instance, therefore, all the statements are relevant.
6888 Pattern statement needs to be analyzed instead of the original statement
6889 if the original statement is not relevant. Otherwise, we analyze both
6890 statements. In basic blocks we are called from some SLP instance
6891 traversal, don't analyze pattern stmts instead, the pattern stmts
6892 already will be part of SLP instance. */
6894 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
6895 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
6896 && !STMT_VINFO_LIVE_P (stmt_info
))
6898 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6900 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
6901 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
6903 /* Analyze PATTERN_STMT instead of the original stmt. */
6904 stmt
= pattern_stmt
;
6905 stmt_info
= vinfo_for_stmt (pattern_stmt
);
6906 if (dump_enabled_p ())
6908 dump_printf_loc (MSG_NOTE
, vect_location
,
6909 "==> examining pattern statement: ");
6910 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6911 dump_printf (MSG_NOTE
, "\n");
6916 if (dump_enabled_p ())
6917 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
6922 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6925 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
6926 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
6928 /* Analyze PATTERN_STMT too. */
6929 if (dump_enabled_p ())
6931 dump_printf_loc (MSG_NOTE
, vect_location
,
6932 "==> examining pattern statement: ");
6933 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6934 dump_printf (MSG_NOTE
, "\n");
6937 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
6941 if (is_pattern_stmt_p (stmt_info
)
6943 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
6945 gimple_stmt_iterator si
;
6947 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
6949 gimple pattern_def_stmt
= gsi_stmt (si
);
6950 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
6951 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
6953 /* Analyze def stmt of STMT if it's a pattern stmt. */
6954 if (dump_enabled_p ())
6956 dump_printf_loc (MSG_NOTE
, vect_location
,
6957 "==> examining pattern def statement: ");
6958 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
6959 dump_printf (MSG_NOTE
, "\n");
6962 if (!vect_analyze_stmt (pattern_def_stmt
,
6963 need_to_vectorize
, node
))
6969 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
6971 case vect_internal_def
:
6974 case vect_reduction_def
:
6975 case vect_nested_cycle
:
6976 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
6977 || relevance
== vect_used_in_outer_by_reduction
6978 || relevance
== vect_unused_in_scope
));
6981 case vect_induction_def
:
6982 case vect_constant_def
:
6983 case vect_external_def
:
6984 case vect_unknown_def_type
:
6991 gcc_assert (PURE_SLP_STMT (stmt_info
));
6993 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
6994 if (dump_enabled_p ())
6996 dump_printf_loc (MSG_NOTE
, vect_location
,
6997 "get vectype for scalar type: ");
6998 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
6999 dump_printf (MSG_NOTE
, "\n");
7002 vectype
= get_vectype_for_scalar_type (scalar_type
);
7005 if (dump_enabled_p ())
7007 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7008 "not SLPed: unsupported data-type ");
7009 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
7011 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7016 if (dump_enabled_p ())
7018 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
7019 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
7020 dump_printf (MSG_NOTE
, "\n");
7023 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
7026 if (STMT_VINFO_RELEVANT_P (stmt_info
))
7028 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
7029 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
7030 || (is_gimple_call (stmt
)
7031 && gimple_call_lhs (stmt
) == NULL_TREE
));
7032 *need_to_vectorize
= true;
7037 && (STMT_VINFO_RELEVANT_P (stmt_info
)
7038 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
7039 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, NULL
)
7040 || vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
7041 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
7042 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
7043 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
7044 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
7045 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
7046 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
7047 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
7048 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
7052 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
7053 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
7054 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
7055 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
7056 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
7057 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
7058 || vectorizable_call (stmt
, NULL
, NULL
, node
)
7059 || vectorizable_store (stmt
, NULL
, NULL
, node
)
7060 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
7065 if (dump_enabled_p ())
7067 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7068 "not vectorized: relevant stmt not ");
7069 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7070 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7071 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7080 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7081 need extra handling, except for vectorizable reductions. */
7082 if (STMT_VINFO_LIVE_P (stmt_info
)
7083 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7084 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
7088 if (dump_enabled_p ())
7090 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7091 "not vectorized: live stmt not ");
7092 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7093 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7094 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7104 /* Function vect_transform_stmt.
7106 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7109 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
7110 bool *grouped_store
, slp_tree slp_node
,
7111 slp_instance slp_node_instance
)
7113 bool is_store
= false;
7114 gimple vec_stmt
= NULL
;
7115 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7118 switch (STMT_VINFO_TYPE (stmt_info
))
7120 case type_demotion_vec_info_type
:
7121 case type_promotion_vec_info_type
:
7122 case type_conversion_vec_info_type
:
7123 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
7127 case induc_vec_info_type
:
7128 gcc_assert (!slp_node
);
7129 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
7133 case shift_vec_info_type
:
7134 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
7138 case op_vec_info_type
:
7139 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
7143 case assignment_vec_info_type
:
7144 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
7148 case load_vec_info_type
:
7149 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
7154 case store_vec_info_type
:
7155 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
7157 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
7159 /* In case of interleaving, the whole chain is vectorized when the
7160 last store in the chain is reached. Store stmts before the last
7161 one are skipped, and there vec_stmt_info shouldn't be freed
7163 *grouped_store
= true;
7164 if (STMT_VINFO_VEC_STMT (stmt_info
))
7171 case condition_vec_info_type
:
7172 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
7176 case call_vec_info_type
:
7177 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7178 stmt
= gsi_stmt (*gsi
);
7179 if (is_gimple_call (stmt
)
7180 && gimple_call_internal_p (stmt
)
7181 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
7185 case call_simd_clone_vec_info_type
:
7186 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7187 stmt
= gsi_stmt (*gsi
);
7190 case reduc_vec_info_type
:
7191 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
7196 if (!STMT_VINFO_LIVE_P (stmt_info
))
7198 if (dump_enabled_p ())
7199 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7200 "stmt not supported.\n");
7205 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7206 is being vectorized, but outside the immediately enclosing loop. */
7208 && STMT_VINFO_LOOP_VINFO (stmt_info
)
7209 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7210 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
7211 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
7212 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
7213 || STMT_VINFO_RELEVANT (stmt_info
) ==
7214 vect_used_in_outer_by_reduction
))
7216 struct loop
*innerloop
= LOOP_VINFO_LOOP (
7217 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
7218 imm_use_iterator imm_iter
;
7219 use_operand_p use_p
;
7223 if (dump_enabled_p ())
7224 dump_printf_loc (MSG_NOTE
, vect_location
,
7225 "Record the vdef for outer-loop vectorization.\n");
7227 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7228 (to be used when vectorizing outer-loop stmts that use the DEF of
7230 if (gimple_code (stmt
) == GIMPLE_PHI
)
7231 scalar_dest
= PHI_RESULT (stmt
);
7233 scalar_dest
= gimple_assign_lhs (stmt
);
7235 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
7237 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
7239 exit_phi
= USE_STMT (use_p
);
7240 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
7245 /* Handle stmts whose DEF is used outside the loop-nest that is
7246 being vectorized. */
7247 if (STMT_VINFO_LIVE_P (stmt_info
)
7248 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7250 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
7255 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
7261 /* Remove a group of stores (for SLP or interleaving), free their
7265 vect_remove_stores (gimple first_stmt
)
7267 gimple next
= first_stmt
;
7269 gimple_stmt_iterator next_si
;
7273 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
7275 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
7276 if (is_pattern_stmt_p (stmt_info
))
7277 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
7278 /* Free the attached stmt_vec_info and remove the stmt. */
7279 next_si
= gsi_for_stmt (next
);
7280 unlink_stmt_vdef (next
);
7281 gsi_remove (&next_si
, true);
7282 release_defs (next
);
7283 free_stmt_vec_info (next
);
7289 /* Function new_stmt_vec_info.
7291 Create and initialize a new stmt_vec_info struct for STMT. */
7294 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
7295 bb_vec_info bb_vinfo
)
7298 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
7300 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
7301 STMT_VINFO_STMT (res
) = stmt
;
7302 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
7303 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
7304 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
7305 STMT_VINFO_LIVE_P (res
) = false;
7306 STMT_VINFO_VECTYPE (res
) = NULL
;
7307 STMT_VINFO_VEC_STMT (res
) = NULL
;
7308 STMT_VINFO_VECTORIZABLE (res
) = true;
7309 STMT_VINFO_IN_PATTERN_P (res
) = false;
7310 STMT_VINFO_RELATED_STMT (res
) = NULL
;
7311 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
7312 STMT_VINFO_DATA_REF (res
) = NULL
;
7314 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
7315 STMT_VINFO_DR_OFFSET (res
) = NULL
;
7316 STMT_VINFO_DR_INIT (res
) = NULL
;
7317 STMT_VINFO_DR_STEP (res
) = NULL
;
7318 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
7320 if (gimple_code (stmt
) == GIMPLE_PHI
7321 && is_loop_header_bb_p (gimple_bb (stmt
)))
7322 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
7324 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
7326 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
7327 STMT_SLP_TYPE (res
) = loop_vect
;
7328 GROUP_FIRST_ELEMENT (res
) = NULL
;
7329 GROUP_NEXT_ELEMENT (res
) = NULL
;
7330 GROUP_SIZE (res
) = 0;
7331 GROUP_STORE_COUNT (res
) = 0;
7332 GROUP_GAP (res
) = 0;
7333 GROUP_SAME_DR_STMT (res
) = NULL
;
7339 /* Create a hash table for stmt_vec_info. */
7342 init_stmt_vec_info_vec (void)
7344 gcc_assert (!stmt_vec_info_vec
.exists ());
7345 stmt_vec_info_vec
.create (50);
7349 /* Free hash table for stmt_vec_info. */
7352 free_stmt_vec_info_vec (void)
7356 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
7358 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
7359 gcc_assert (stmt_vec_info_vec
.exists ());
7360 stmt_vec_info_vec
.release ();
7364 /* Free stmt vectorization related info. */
7367 free_stmt_vec_info (gimple stmt
)
7369 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7374 /* Check if this statement has a related "pattern stmt"
7375 (introduced by the vectorizer during the pattern recognition
7376 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7378 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
7380 stmt_vec_info patt_info
7381 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7384 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
7387 gimple_stmt_iterator si
;
7388 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
7389 free_stmt_vec_info (gsi_stmt (si
));
7391 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info
));
7395 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
7396 set_vinfo_for_stmt (stmt
, NULL
);
7401 /* Function get_vectype_for_scalar_type_and_size.
7403 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7407 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
7409 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
7410 enum machine_mode simd_mode
;
7411 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
7418 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
7419 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
7422 /* For vector types of elements whose mode precision doesn't
7423 match their types precision we use a element type of mode
7424 precision. The vectorization routines will have to make sure
7425 they support the proper result truncation/extension.
7426 We also make sure to build vector types with INTEGER_TYPE
7427 component type only. */
7428 if (INTEGRAL_TYPE_P (scalar_type
)
7429 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
7430 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
7431 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
7432 TYPE_UNSIGNED (scalar_type
));
7434 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7435 When the component mode passes the above test simply use a type
7436 corresponding to that mode. The theory is that any use that
7437 would cause problems with this will disable vectorization anyway. */
7438 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
7439 && !INTEGRAL_TYPE_P (scalar_type
))
7440 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
7442 /* We can't build a vector type of elements with alignment bigger than
7444 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
7445 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
7446 TYPE_UNSIGNED (scalar_type
));
7448 /* If we felt back to using the mode fail if there was
7449 no scalar type for it. */
7450 if (scalar_type
== NULL_TREE
)
7453 /* If no size was supplied use the mode the target prefers. Otherwise
7454 lookup a vector mode of the specified size. */
7456 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
7458 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
7459 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
7463 vectype
= build_vector_type (scalar_type
, nunits
);
7465 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7466 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
7472 unsigned int current_vector_size
;
7474 /* Function get_vectype_for_scalar_type.
7476 Returns the vector type corresponding to SCALAR_TYPE as supported
7480 get_vectype_for_scalar_type (tree scalar_type
)
7483 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
7484 current_vector_size
);
7486 && current_vector_size
== 0)
7487 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
7491 /* Function get_same_sized_vectype
7493 Returns a vector type corresponding to SCALAR_TYPE of size
7494 VECTOR_TYPE if supported by the target. */
7497 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
7499 return get_vectype_for_scalar_type_and_size
7500 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
7503 /* Function vect_is_simple_use.
7506 LOOP_VINFO - the vect info of the loop that is being vectorized.
7507 BB_VINFO - the vect info of the basic block that is being vectorized.
7508 OPERAND - operand of STMT in the loop or bb.
7509 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7511 Returns whether a stmt with OPERAND can be vectorized.
7512 For loops, supportable operands are constants, loop invariants, and operands
7513 that are defined by the current iteration of the loop. Unsupportable
7514 operands are those that are defined by a previous iteration of the loop (as
7515 is the case in reduction/induction computations).
7516 For basic blocks, supportable operands are constants and bb invariants.
7517 For now, operands defined outside the basic block are not supported. */
7520 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7521 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7522 tree
*def
, enum vect_def_type
*dt
)
7525 stmt_vec_info stmt_vinfo
;
7526 struct loop
*loop
= NULL
;
7529 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7534 if (dump_enabled_p ())
7536 dump_printf_loc (MSG_NOTE
, vect_location
,
7537 "vect_is_simple_use: operand ");
7538 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
7539 dump_printf (MSG_NOTE
, "\n");
7542 if (CONSTANT_CLASS_P (operand
))
7544 *dt
= vect_constant_def
;
7548 if (is_gimple_min_invariant (operand
))
7551 *dt
= vect_external_def
;
7555 if (TREE_CODE (operand
) == PAREN_EXPR
)
7557 if (dump_enabled_p ())
7558 dump_printf_loc (MSG_NOTE
, vect_location
, "non-associatable copy.\n");
7559 operand
= TREE_OPERAND (operand
, 0);
7562 if (TREE_CODE (operand
) != SSA_NAME
)
7564 if (dump_enabled_p ())
7565 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7570 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
7571 if (*def_stmt
== NULL
)
7573 if (dump_enabled_p ())
7574 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7579 if (dump_enabled_p ())
7581 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
7582 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
7583 dump_printf (MSG_NOTE
, "\n");
7586 /* Empty stmt is expected only in case of a function argument.
7587 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7588 if (gimple_nop_p (*def_stmt
))
7591 *dt
= vect_external_def
;
7595 bb
= gimple_bb (*def_stmt
);
7597 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
7598 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
7599 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
7600 *dt
= vect_external_def
;
7603 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
7604 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
7607 if (*dt
== vect_unknown_def_type
7609 && *dt
== vect_double_reduction_def
7610 && gimple_code (stmt
) != GIMPLE_PHI
))
7612 if (dump_enabled_p ())
7613 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7614 "Unsupported pattern.\n");
7618 if (dump_enabled_p ())
7619 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: %d.\n", *dt
);
7621 switch (gimple_code (*def_stmt
))
7624 *def
= gimple_phi_result (*def_stmt
);
7628 *def
= gimple_assign_lhs (*def_stmt
);
7632 *def
= gimple_call_lhs (*def_stmt
);
7637 if (dump_enabled_p ())
7638 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7639 "unsupported defining stmt:\n");
7646 /* Function vect_is_simple_use_1.
7648 Same as vect_is_simple_use_1 but also determines the vector operand
7649 type of OPERAND and stores it to *VECTYPE. If the definition of
7650 OPERAND is vect_uninitialized_def, vect_constant_def or
7651 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7652 is responsible to compute the best suited vector type for the
7656 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7657 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7658 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
7660 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
7664 /* Now get a vector type if the def is internal, otherwise supply
7665 NULL_TREE and leave it up to the caller to figure out a proper
7666 type for the use stmt. */
7667 if (*dt
== vect_internal_def
7668 || *dt
== vect_induction_def
7669 || *dt
== vect_reduction_def
7670 || *dt
== vect_double_reduction_def
7671 || *dt
== vect_nested_cycle
)
7673 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
7675 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7676 && !STMT_VINFO_RELEVANT (stmt_info
)
7677 && !STMT_VINFO_LIVE_P (stmt_info
))
7678 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7680 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7681 gcc_assert (*vectype
!= NULL_TREE
);
7683 else if (*dt
== vect_uninitialized_def
7684 || *dt
== vect_constant_def
7685 || *dt
== vect_external_def
)
7686 *vectype
= NULL_TREE
;
7694 /* Function supportable_widening_operation
7696 Check whether an operation represented by the code CODE is a
7697 widening operation that is supported by the target platform in
7698 vector form (i.e., when operating on arguments of type VECTYPE_IN
7699 producing a result of type VECTYPE_OUT).
7701 Widening operations we currently support are NOP (CONVERT), FLOAT
7702 and WIDEN_MULT. This function checks if these operations are supported
7703 by the target platform either directly (via vector tree-codes), or via
7707 - CODE1 and CODE2 are codes of vector operations to be used when
7708 vectorizing the operation, if available.
7709 - MULTI_STEP_CVT determines the number of required intermediate steps in
7710 case of multi-step conversion (like char->short->int - in that case
7711 MULTI_STEP_CVT will be 1).
7712 - INTERM_TYPES contains the intermediate type required to perform the
7713 widening operation (short in the above example). */
7716 supportable_widening_operation (enum tree_code code
, gimple stmt
,
7717 tree vectype_out
, tree vectype_in
,
7718 enum tree_code
*code1
, enum tree_code
*code2
,
7719 int *multi_step_cvt
,
7720 vec
<tree
> *interm_types
)
7722 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7723 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7724 struct loop
*vect_loop
= NULL
;
7725 enum machine_mode vec_mode
;
7726 enum insn_code icode1
, icode2
;
7727 optab optab1
, optab2
;
7728 tree vectype
= vectype_in
;
7729 tree wide_vectype
= vectype_out
;
7730 enum tree_code c1
, c2
;
7732 tree prev_type
, intermediate_type
;
7733 enum machine_mode intermediate_mode
, prev_mode
;
7734 optab optab3
, optab4
;
7736 *multi_step_cvt
= 0;
7738 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
7742 case WIDEN_MULT_EXPR
:
7743 /* The result of a vectorized widening operation usually requires
7744 two vectors (because the widened results do not fit into one vector).
7745 The generated vector results would normally be expected to be
7746 generated in the same order as in the original scalar computation,
7747 i.e. if 8 results are generated in each vector iteration, they are
7748 to be organized as follows:
7749 vect1: [res1,res2,res3,res4],
7750 vect2: [res5,res6,res7,res8].
7752 However, in the special case that the result of the widening
7753 operation is used in a reduction computation only, the order doesn't
7754 matter (because when vectorizing a reduction we change the order of
7755 the computation). Some targets can take advantage of this and
7756 generate more efficient code. For example, targets like Altivec,
7757 that support widen_mult using a sequence of {mult_even,mult_odd}
7758 generate the following vectors:
7759 vect1: [res1,res3,res5,res7],
7760 vect2: [res2,res4,res6,res8].
7762 When vectorizing outer-loops, we execute the inner-loop sequentially
7763 (each vectorized inner-loop iteration contributes to VF outer-loop
7764 iterations in parallel). We therefore don't allow to change the
7765 order of the computation in the inner-loop during outer-loop
7767 /* TODO: Another case in which order doesn't *really* matter is when we
7768 widen and then contract again, e.g. (short)((int)x * y >> 8).
7769 Normally, pack_trunc performs an even/odd permute, whereas the
7770 repack from an even/odd expansion would be an interleave, which
7771 would be significantly simpler for e.g. AVX2. */
7772 /* In any case, in order to avoid duplicating the code below, recurse
7773 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7774 are properly set up for the caller. If we fail, we'll continue with
7775 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7777 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
7778 && !nested_in_vect_loop_p (vect_loop
, stmt
)
7779 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
7780 stmt
, vectype_out
, vectype_in
,
7781 code1
, code2
, multi_step_cvt
,
7784 c1
= VEC_WIDEN_MULT_LO_EXPR
;
7785 c2
= VEC_WIDEN_MULT_HI_EXPR
;
7788 case VEC_WIDEN_MULT_EVEN_EXPR
:
7789 /* Support the recursion induced just above. */
7790 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
7791 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
7794 case WIDEN_LSHIFT_EXPR
:
7795 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
7796 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
7800 c1
= VEC_UNPACK_LO_EXPR
;
7801 c2
= VEC_UNPACK_HI_EXPR
;
7805 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
7806 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
7809 case FIX_TRUNC_EXPR
:
7810 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7811 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7812 computing the operation. */
7819 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
7821 enum tree_code ctmp
= c1
;
7826 if (code
== FIX_TRUNC_EXPR
)
7828 /* The signedness is determined from output operand. */
7829 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
7830 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
7834 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
7835 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
7838 if (!optab1
|| !optab2
)
7841 vec_mode
= TYPE_MODE (vectype
);
7842 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
7843 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
7849 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
7850 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
7853 /* Check if it's a multi-step conversion that can be done using intermediate
7856 prev_type
= vectype
;
7857 prev_mode
= vec_mode
;
7859 if (!CONVERT_EXPR_CODE_P (code
))
7862 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7863 intermediate steps in promotion sequence. We try
7864 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7866 interm_types
->create (MAX_INTERM_CVT_STEPS
);
7867 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
7869 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
7871 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
7872 TYPE_UNSIGNED (prev_type
));
7873 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
7874 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
7876 if (!optab3
|| !optab4
7877 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
7878 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
7879 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
7880 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
7881 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
7882 == CODE_FOR_nothing
)
7883 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
7884 == CODE_FOR_nothing
))
7887 interm_types
->quick_push (intermediate_type
);
7888 (*multi_step_cvt
)++;
7890 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
7891 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
7894 prev_type
= intermediate_type
;
7895 prev_mode
= intermediate_mode
;
7898 interm_types
->release ();
7903 /* Function supportable_narrowing_operation
7905 Check whether an operation represented by the code CODE is a
7906 narrowing operation that is supported by the target platform in
7907 vector form (i.e., when operating on arguments of type VECTYPE_IN
7908 and producing a result of type VECTYPE_OUT).
7910 Narrowing operations we currently support are NOP (CONVERT) and
7911 FIX_TRUNC. This function checks if these operations are supported by
7912 the target platform directly via vector tree-codes.
7915 - CODE1 is the code of a vector operation to be used when
7916 vectorizing the operation, if available.
7917 - MULTI_STEP_CVT determines the number of required intermediate steps in
7918 case of multi-step conversion (like int->short->char - in that case
7919 MULTI_STEP_CVT will be 1).
7920 - INTERM_TYPES contains the intermediate type required to perform the
7921 narrowing operation (short in the above example). */
7924 supportable_narrowing_operation (enum tree_code code
,
7925 tree vectype_out
, tree vectype_in
,
7926 enum tree_code
*code1
, int *multi_step_cvt
,
7927 vec
<tree
> *interm_types
)
7929 enum machine_mode vec_mode
;
7930 enum insn_code icode1
;
7931 optab optab1
, interm_optab
;
7932 tree vectype
= vectype_in
;
7933 tree narrow_vectype
= vectype_out
;
7935 tree intermediate_type
;
7936 enum machine_mode intermediate_mode
, prev_mode
;
7940 *multi_step_cvt
= 0;
7944 c1
= VEC_PACK_TRUNC_EXPR
;
7947 case FIX_TRUNC_EXPR
:
7948 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
7952 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
7953 tree code and optabs used for computing the operation. */
7960 if (code
== FIX_TRUNC_EXPR
)
7961 /* The signedness is determined from output operand. */
7962 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
7964 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
7969 vec_mode
= TYPE_MODE (vectype
);
7970 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
7975 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
7978 /* Check if it's a multi-step conversion that can be done using intermediate
7980 prev_mode
= vec_mode
;
7981 if (code
== FIX_TRUNC_EXPR
)
7982 uns
= TYPE_UNSIGNED (vectype_out
);
7984 uns
= TYPE_UNSIGNED (vectype
);
7986 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
7987 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
7988 costly than signed. */
7989 if (code
== FIX_TRUNC_EXPR
&& uns
)
7991 enum insn_code icode2
;
7994 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
7996 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
7997 if (interm_optab
!= unknown_optab
7998 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
7999 && insn_data
[icode1
].operand
[0].mode
8000 == insn_data
[icode2
].operand
[0].mode
)
8003 optab1
= interm_optab
;
8008 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8009 intermediate steps in promotion sequence. We try
8010 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8011 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8012 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8014 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8016 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
8018 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
8021 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
8022 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8023 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
8024 == CODE_FOR_nothing
))
8027 interm_types
->quick_push (intermediate_type
);
8028 (*multi_step_cvt
)++;
8030 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8033 prev_mode
= intermediate_mode
;
8034 optab1
= interm_optab
;
8037 interm_types
->release ();